diff options
| author | demo <demo@antix1> | 2026-05-26 18:07:06 -0400 |
|---|---|---|
| committer | demo <demo@antix1> | 2026-05-26 18:07:06 -0400 |
| commit | bd95fa6b7b9862a014bfaf55e98b6849f6122806 (patch) | |
| tree | 89f374ffa592fd19c213dfe9ab88084294cb3e48 /classic.go | |
| parent | dfc111552917ceeabb0852cc6f1ba8a46fc21595 (diff) | |
feat: hit 'em with the classic web crawler
Diffstat (limited to 'classic.go')
| -rw-r--r-- | classic.go | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/classic.go b/classic.go new file mode 100644 index 0000000..9adb5bf --- /dev/null +++ b/classic.go @@ -0,0 +1,49 @@ +package main + +import ( + "fmt" + "log" + "net/url" +) + +func classic(startURL url.URL, maxConcurrency, maxURLs int) { + worklist := make(chan []url.URL) + var numPendingSends int + + numPendingSends++ + go func() { + worklist <- []url.URL{startURL} + }() + + // Crawl the web concurrently. + seen := make(map[url.URL]bool) + count := 1 + + for ; numPendingSends > 0; numPendingSends-- { + batch := <-worklist + for _, u := range batch { + if !seen[u] { + fmt.Printf("%d. %s\n", count, &u) + count++ + + seen[u] = true + + numPendingSends++ + go func() { + worklist <- getBatch(u) + }() + } + } + } +} + +func getBatch(u url.URL) []url.URL { + doc, err := fetch(u) + if err != nil { + log.Print(err) + } + + batch := findURLs(u, doc) + + return batch +} |
