From bd95fa6b7b9862a014bfaf55e98b6849f6122806 Mon Sep 17 00:00:00 2001 From: demo Date: Tue, 26 May 2026 18:07:06 -0400 Subject: feat: hit 'em with the classic web crawler --- classic.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 classic.go (limited to 'classic.go') diff --git a/classic.go b/classic.go new file mode 100644 index 0000000..9adb5bf --- /dev/null +++ b/classic.go @@ -0,0 +1,49 @@ +package main + +import ( + "fmt" + "log" + "net/url" +) + +func classic(startURL url.URL, maxConcurrency, maxURLs int) { + worklist := make(chan []url.URL) + var numPendingSends int + + numPendingSends++ + go func() { + worklist <- []url.URL{startURL} + }() + + // Crawl the web concurrently. + seen := make(map[url.URL]bool) + count := 1 + + for ; numPendingSends > 0; numPendingSends-- { + batch := <-worklist + for _, u := range batch { + if !seen[u] { + fmt.Printf("%d. %s\n", count, &u) + count++ + + seen[u] = true + + numPendingSends++ + go func() { + worklist <- getBatch(u) + }() + } + } + } +} + +func getBatch(u url.URL) []url.URL { + doc, err := fetch(u) + if err != nil { + log.Print(err) + } + + batch := findURLs(u, doc) + + return batch +} -- cgit v1.2.3