summaryrefslogtreecommitdiff
path: root/classic.go
diff options
context:
space:
mode:
authordemo <demo@antix1>2026-05-26 18:07:06 -0400
committerdemo <demo@antix1>2026-05-26 18:07:06 -0400
commitbd95fa6b7b9862a014bfaf55e98b6849f6122806 (patch)
tree89f374ffa592fd19c213dfe9ab88084294cb3e48 /classic.go
parentdfc111552917ceeabb0852cc6f1ba8a46fc21595 (diff)
feat: hit 'em with the classic web crawler
Diffstat (limited to 'classic.go')
-rw-r--r--classic.go49
1 files changed, 49 insertions, 0 deletions
diff --git a/classic.go b/classic.go
new file mode 100644
index 0000000..9adb5bf
--- /dev/null
+++ b/classic.go
@@ -0,0 +1,49 @@
+package main
+
+import (
+ "fmt"
+ "log"
+ "net/url"
+)
+
+func classic(startURL url.URL, maxConcurrency, maxURLs int) {
+ worklist := make(chan []url.URL)
+ var numPendingSends int
+
+ numPendingSends++
+ go func() {
+ worklist <- []url.URL{startURL}
+ }()
+
+ // Crawl the web concurrently.
+ seen := make(map[url.URL]bool)
+ count := 1
+
+ for ; numPendingSends > 0; numPendingSends-- {
+ batch := <-worklist
+ for _, u := range batch {
+ if !seen[u] {
+ fmt.Printf("%d. %s\n", count, &u)
+ count++
+
+ seen[u] = true
+
+ numPendingSends++
+ go func() {
+ worklist <- getBatch(u)
+ }()
+ }
+ }
+ }
+}
+
+func getBatch(u url.URL) []url.URL {
+ doc, err := fetch(u)
+ if err != nil {
+ log.Print(err)
+ }
+
+ batch := findURLs(u, doc)
+
+ return batch
+}