diff options
| -rw-r--r-- | classic.go | 23 | ||||
| -rw-r--r-- | main.go | 2 |
2 files changed, 15 insertions, 10 deletions
@@ -9,16 +9,18 @@ import ( ) func classic(startURL url.URL, maxConcurrency, maxURLs int) { - worklist := make(chan []url.URL) + worklist := make(chan []packet) var numPendingSends int numPendingSends++ go func() { - worklist <- []url.URL{startURL} + startPacket := packet{startURL, 0} + worklist <- []packet{startPacket} }() - // Crawl the web concurrently. - seen := make(map[url.URL]bool) + // Crawl the web concurrently. Map URLs to their depth (i.e + // how many links we have to work through to find the URL.) + seen := make(map[url.URL]int) count := 1 ctx, cancel := context.WithCancel(context.Background()) @@ -28,12 +30,12 @@ func classic(startURL url.URL, maxConcurrency, maxURLs int) { loop: for ; numPendingSends > 0; numPendingSends-- { batch := <-worklist - for _, u := range batch { - if !seen[u] { - fmt.Printf("%d. %s\n", count, &u) + for _, p := range batch { + if _, ok := seen[p.url]; !ok { + fmt.Printf("%d. %s\n", count, &p) count++ - seen[u] = true + seen[p.url] = p.depth if len(seen) == maxURLs { break loop } @@ -46,7 +48,10 @@ loop: select { case <-ctx.Done(): return - case worklist <- getBatch(u): + default: + batch := getBatch(p.url) + ps := convertToPackets(batch, p.depth+1) + worklist <- ps } }) } @@ -41,7 +41,7 @@ func main() { } getLeakProfile(func() { - workers(*startURL, *maxConcurrency, *maxURLs) + classic(*startURL, *maxConcurrency, *maxURLs) }) } |
