diff options
| author | demo <demo@antix1> | 2026-05-24 10:28:53 -0400 |
|---|---|---|
| committer | demo <demo@antix1> | 2026-05-24 10:28:53 -0400 |
| commit | 3bba6f4b6eb048e21b0197ef1029b12235a61ba6 (patch) | |
| tree | d550c6fa361da624feac6b1904e1aab49949da3f | |
| parent | edce95c4a8ce11f81ef112e1e2fa183c6b3206d2 (diff) | |
refactor: move main logic into separate function
| -rw-r--r-- | main.go | 20 |
1 files changed, 14 insertions, 6 deletions
@@ -38,14 +38,19 @@ func main() { log.Fatal(err) } + crawler(*startURL, *maxConcurrency, *maxURLs) + +} + +func crawler(startURL url.URL, maxConcurrency, maxURLs int) { /* Main logic starts here. */ worklist := make(chan []url.URL) go func() { - worklist <- []url.URL{*startURL} + worklist <- []url.URL{startURL} }() - sem := make(chan struct{}, *maxConcurrency) + sem := make(chan struct{}, maxConcurrency) ctx, cancel := context.WithCancel(context.Background()) seen := make(map[url.URL]bool) @@ -56,8 +61,7 @@ func main() { loop: for list := range worklist { for _, u := range list { - if *maxURLs > 0 && len(seen) == *maxURLs { - cancel() + if maxURLs > 0 && len(seen) == maxURLs { break loop } @@ -70,7 +74,7 @@ loop: sem <- struct{}{} defer func() { <-sem }() - more := crawl(ctx, u) + more := getMoreURLs(ctx, u) if len(more) > 0 { worklist <- more } @@ -79,6 +83,10 @@ loop: } } + // We broke the range loop, meaning there should be no more + // pending getMoreURLs jobs anyway. + cancel() + // For now, print out some diagnostics that prove that there // are still pending sends on the worklist channel. for batch := range worklist { @@ -90,7 +98,7 @@ loop: } } -func crawl(ctx context.Context, u url.URL) []url.URL { +func getMoreURLs(ctx context.Context, u url.URL) []url.URL { select { case <-ctx.Done(): return nil |
