diff options
| author | Brandon C. Irizarry <brandon.irizarry@gmail.com> | 2026-04-16 16:34:00 -0400 |
|---|---|---|
| committer | Brandon C. Irizarry <brandon.irizarry@gmail.com> | 2026-04-16 16:34:00 -0400 |
| commit | 172bccd05779240d6d6cd087a1c19216f0a5e225 (patch) | |
| tree | f463225598d8ecee3d5d474e7d21cb39e97db9e2 /main.py | |
| parent | aa8c44a35a28d665a7bc85869a59e77f9ee9a7f9 (diff) | |
feat: ping sites sequentially and measure how long it takes
Diffstat (limited to 'main.py')
| -rw-r--r-- | main.py | 38 |
1 files changed, 37 insertions, 1 deletions
@@ -1,5 +1,9 @@ +import asyncio import csv import sys +import time + +import aiohttp def get_urls(filename: str, limit: int | None = None) -> list[str]: @@ -29,6 +33,32 @@ def get_urls(filename: str, limit: int | None = None) -> list[str]: return urls +async def ping(urls: list[str], max_concurrency=None) -> None: + """Make a GET request to members of URLS. + + If MAX_CONCURRENCY is None, browse every site at once. + + Else, only browse MAX_CONCURRENCY number of sites at a time. + + Print the sites as they get browsed; don't return anything. + + """ + + async with aiohttp.ClientSession(max_field_size=8190 * 2) as session: + for url in urls: + try: + async with session.get(url) as response: + print(f"Status: {response.status}") + + if not response.ok: + print(f"Got code {response.status} from URL; skipping") + continue + + print(f"Content-Type: {response.headers['content-type']}") + except aiohttp.ClientError as e: + print(f"Something bad happened with URL: {e}; skipping") + + def main(): limit: int | None = None @@ -36,7 +66,13 @@ def main(): limit = int(sys.argv[1]) urls = get_urls("majestic_million.csv", limit) - print(urls) + + start_time = time.perf_counter() + asyncio.run(ping(urls)) + end_time = time.perf_counter() + + elapsed_time = end_time - start_time + print(f"Execution time: {elapsed_time:.6f} seconds") if __name__ == "__main__": |
