summaryrefslogtreecommitdiff
path: root/main.py
diff options
context:
space:
mode:
Diffstat (limited to 'main.py')
-rw-r--r--main.py38
1 files changed, 37 insertions, 1 deletions
diff --git a/main.py b/main.py
index 463b5df..80dcce7 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,9 @@
+import asyncio
import csv
import sys
+import time
+
+import aiohttp
def get_urls(filename: str, limit: int | None = None) -> list[str]:
@@ -29,6 +33,32 @@ def get_urls(filename: str, limit: int | None = None) -> list[str]:
return urls
+async def ping(urls: list[str], max_concurrency=None) -> None:
+ """Make a GET request to members of URLS.
+
+ If MAX_CONCURRENCY is None, browse every site at once.
+
+ Else, only browse MAX_CONCURRENCY number of sites at a time.
+
+ Print the sites as they get browsed; don't return anything.
+
+ """
+
+ async with aiohttp.ClientSession(max_field_size=8190 * 2) as session:
+ for url in urls:
+ try:
+ async with session.get(url) as response:
+ print(f"Status: {response.status}")
+
+ if not response.ok:
+ print(f"Got code {response.status} from URL; skipping")
+ continue
+
+ print(f"Content-Type: {response.headers['content-type']}")
+ except aiohttp.ClientError as e:
+ print(f"Something bad happened with URL: {e}; skipping")
+
+
def main():
limit: int | None = None
@@ -36,7 +66,13 @@ def main():
limit = int(sys.argv[1])
urls = get_urls("majestic_million.csv", limit)
- print(urls)
+
+ start_time = time.perf_counter()
+ asyncio.run(ping(urls))
+ end_time = time.perf_counter()
+
+ elapsed_time = end_time - start_time
+ print(f"Execution time: {elapsed_time:.6f} seconds")
if __name__ == "__main__":