package main import ( "context" "fmt" "log" "net/url" "sync" ) func classic(startURL url.URL, maxConcurrency, maxURLs, maxDepth int) { worklist := make(chan []packet) var numPendingSends int numPendingSends++ go func() { startPacket := packet{startURL, 0} worklist <- []packet{startPacket} }() // Crawl the web concurrently. Map URLs to their depth (i.e // how many links we have to work through to find the URL.) seen := make(map[url.URL]int) count := 1 ctx, cancel := context.WithCancel(context.Background()) var wg sync.WaitGroup sema := make(chan struct{}, maxConcurrency) loop: for ; numPendingSends > 0; numPendingSends-- { batch := <-worklist for _, p := range batch { if _, ok := seen[p.url]; !ok { fmt.Printf("%d. %s\n", count, &p) count++ seen[p.url] = p.depth if len(seen) == maxURLs { break loop } // Track maxDepth here. A maxDepth // greater than zero means a finite // maxDepth value. If the packets // we're seeing reach that depth, // don't use their URLs to spawn new // fetches. if maxDepth > 0 && p.depth == maxDepth { continue } numPendingSends++ wg.Go(func() { sema <- struct{}{} defer func() { <-sema }() select { case <-ctx.Done(): return default: batch := getBatch(p.url) ps := convertToPackets(batch, p.depth+1) worklist <- ps } }) } } } cancel() wg.Wait() } func getBatch(u url.URL) []url.URL { doc, err := fetch(u) if err != nil { log.Print(err) } batch := findURLs(u, doc) return batch }