summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--classic.go23
-rw-r--r--main.go2
2 files changed, 15 insertions, 10 deletions
diff --git a/classic.go b/classic.go
index 0b2bfa8..bd689fc 100644
--- a/classic.go
+++ b/classic.go
@@ -9,16 +9,18 @@ import (
)
func classic(startURL url.URL, maxConcurrency, maxURLs int) {
- worklist := make(chan []url.URL)
+ worklist := make(chan []packet)
var numPendingSends int
numPendingSends++
go func() {
- worklist <- []url.URL{startURL}
+ startPacket := packet{startURL, 0}
+ worklist <- []packet{startPacket}
}()
- // Crawl the web concurrently.
- seen := make(map[url.URL]bool)
+ // Crawl the web concurrently. Map URLs to their depth (i.e
+ // how many links we have to work through to find the URL.)
+ seen := make(map[url.URL]int)
count := 1
ctx, cancel := context.WithCancel(context.Background())
@@ -28,12 +30,12 @@ func classic(startURL url.URL, maxConcurrency, maxURLs int) {
loop:
for ; numPendingSends > 0; numPendingSends-- {
batch := <-worklist
- for _, u := range batch {
- if !seen[u] {
- fmt.Printf("%d. %s\n", count, &u)
+ for _, p := range batch {
+ if _, ok := seen[p.url]; !ok {
+ fmt.Printf("%d. %s\n", count, &p)
count++
- seen[u] = true
+ seen[p.url] = p.depth
if len(seen) == maxURLs {
break loop
}
@@ -46,7 +48,10 @@ loop:
select {
case <-ctx.Done():
return
- case worklist <- getBatch(u):
+ default:
+ batch := getBatch(p.url)
+ ps := convertToPackets(batch, p.depth+1)
+ worklist <- ps
}
})
}
diff --git a/main.go b/main.go
index 900cb1f..bd30a4c 100644
--- a/main.go
+++ b/main.go
@@ -41,7 +41,7 @@ func main() {
}
getLeakProfile(func() {
- workers(*startURL, *maxConcurrency, *maxURLs)
+ classic(*startURL, *maxConcurrency, *maxURLs)
})
}