From 3bba6f4b6eb048e21b0197ef1029b12235a61ba6 Mon Sep 17 00:00:00 2001 From: demo Date: Sun, 24 May 2026 10:28:53 -0400 Subject: refactor: move main logic into separate function --- main.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'main.go') diff --git a/main.go b/main.go index 74e3144..3f29a26 100644 --- a/main.go +++ b/main.go @@ -38,14 +38,19 @@ func main() { log.Fatal(err) } + crawler(*startURL, *maxConcurrency, *maxURLs) + +} + +func crawler(startURL url.URL, maxConcurrency, maxURLs int) { /* Main logic starts here. */ worklist := make(chan []url.URL) go func() { - worklist <- []url.URL{*startURL} + worklist <- []url.URL{startURL} }() - sem := make(chan struct{}, *maxConcurrency) + sem := make(chan struct{}, maxConcurrency) ctx, cancel := context.WithCancel(context.Background()) seen := make(map[url.URL]bool) @@ -56,8 +61,7 @@ func main() { loop: for list := range worklist { for _, u := range list { - if *maxURLs > 0 && len(seen) == *maxURLs { - cancel() + if maxURLs > 0 && len(seen) == maxURLs { break loop } @@ -70,7 +74,7 @@ loop: sem <- struct{}{} defer func() { <-sem }() - more := crawl(ctx, u) + more := getMoreURLs(ctx, u) if len(more) > 0 { worklist <- more } @@ -79,6 +83,10 @@ loop: } } + // We broke the range loop, meaning there should be no more + // pending getMoreURLs jobs anyway. + cancel() + // For now, print out some diagnostics that prove that there // are still pending sends on the worklist channel. for batch := range worklist { @@ -90,7 +98,7 @@ loop: } } -func crawl(ctx context.Context, u url.URL) []url.URL { +func getMoreURLs(ctx context.Context, u url.URL) []url.URL { select { case <-ctx.Done(): return nil -- cgit v1.2.3