summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordemo <demo@antix1>2026-05-24 10:28:53 -0400
committerdemo <demo@antix1>2026-05-24 10:28:53 -0400
commit3bba6f4b6eb048e21b0197ef1029b12235a61ba6 (patch)
treed550c6fa361da624feac6b1904e1aab49949da3f
parentedce95c4a8ce11f81ef112e1e2fa183c6b3206d2 (diff)
refactor: move main logic into separate function
-rw-r--r--main.go20
1 files changed, 14 insertions, 6 deletions
diff --git a/main.go b/main.go
index 74e3144..3f29a26 100644
--- a/main.go
+++ b/main.go
@@ -38,14 +38,19 @@ func main() {
log.Fatal(err)
}
+ crawler(*startURL, *maxConcurrency, *maxURLs)
+
+}
+
+func crawler(startURL url.URL, maxConcurrency, maxURLs int) {
/* Main logic starts here. */
worklist := make(chan []url.URL)
go func() {
- worklist <- []url.URL{*startURL}
+ worklist <- []url.URL{startURL}
}()
- sem := make(chan struct{}, *maxConcurrency)
+ sem := make(chan struct{}, maxConcurrency)
ctx, cancel := context.WithCancel(context.Background())
seen := make(map[url.URL]bool)
@@ -56,8 +61,7 @@ func main() {
loop:
for list := range worklist {
for _, u := range list {
- if *maxURLs > 0 && len(seen) == *maxURLs {
- cancel()
+ if maxURLs > 0 && len(seen) == maxURLs {
break loop
}
@@ -70,7 +74,7 @@ loop:
sem <- struct{}{}
defer func() { <-sem }()
- more := crawl(ctx, u)
+ more := getMoreURLs(ctx, u)
if len(more) > 0 {
worklist <- more
}
@@ -79,6 +83,10 @@ loop:
}
}
+ // We broke the range loop, meaning there should be no more
+ // pending getMoreURLs jobs anyway.
+ cancel()
+
// For now, print out some diagnostics that prove that there
// are still pending sends on the worklist channel.
for batch := range worklist {
@@ -90,7 +98,7 @@ loop:
}
}
-func crawl(ctx context.Context, u url.URL) []url.URL {
+func getMoreURLs(ctx context.Context, u url.URL) []url.URL {
select {
case <-ctx.Done():
return nil