package main import ( "context" "fmt" "net/url" "sync" ) /* main goroutine: - manages urls channel. */ func workers(startURL url.URL, maxConcurrency, maxURLs int) { worklist := make(chan []url.URL) // Unseen URLs. urls := make(chan url.URL) go func() { worklist <- []url.URL{startURL} }() var wg sync.WaitGroup ctx, cancel := context.WithCancel(context.Background()) // Create maxConcurrency worker goroutines to demultiplex from // the urls channel (unseen links.) for range maxConcurrency { wg.Go(func() { for u := range urls { batch := getBatch(u) select { case <-ctx.Done(): return default: go func() { worklist <- batch }() } } }) } // The main goroutine deduplicates worklist items and sends // unseen ones to the crawlers in a fan-out fashion. seen := make(map[url.URL]bool) count := 1 loop: for batch := range worklist { for _, u := range batch { if !seen[u] { fmt.Printf("%d. %s\n", count, &u) count++ seen[u] = true if len(seen) == maxURLs { break loop } urls <- u } } } cancel() wg.Wait() }