From 26f5b43a82955c77ea4bc1d7a710895e4b36209a Mon Sep 17 00:00:00 2001 From: demo Date: Tue, 26 May 2026 22:32:58 -0400 Subject: docs: add extensive comments --- workers.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/workers.go b/workers.go index 845e947..535e50c 100644 --- a/workers.go +++ b/workers.go @@ -8,21 +8,29 @@ import ( ) /* - + CHANNEL/GOROUTINE NOTES main goroutine: - manages urls channel. */ +// Packet accrues data as it passes through our concurrent +// pipeline. Formerly the web crawler only transmitted [url.URL]'s, +// but usingn a compound data type allows us to add URL +// depth-tracking. type Packet struct { url url.URL depth int } +// String implements the Stringer interface. We need this mainly +// because a [url.URL]'s String method only works when that URL is a +// pointer. func (p Packet) String() string { return fmt.Sprintf("[%d] %s", p.depth, &p.url) } +// workers launches a worker queue for crawling a given Web domain. func workers(startURL url.URL, maxConcurrency, maxURLs int) { worklist := make(chan []Packet) @@ -43,6 +51,9 @@ func workers(startURL url.URL, maxConcurrency, maxURLs int) { wg.Go(func() { for p := range packets { batch := getBatch(p.url) + + // Convert URLs to Packets. In the + // process, bump up the depth by 1. var ps []Packet for _, u := range batch { @@ -66,11 +77,16 @@ func workers(startURL url.URL, maxConcurrency, maxURLs int) { // The main goroutine deduplicates worklist items and sends // unseen ones to the crawlers in a fan-out fashion. seen := make(map[url.URL]int) + + // Used to prettify the running URL listing. count := 1 loop: for batch := range worklist { for _, p := range batch { + // We're tracking _depth_ with the seen-map + // now, so any unseen URL doesn't have any + // depth-entry registered yet. if _, ok := seen[p.url]; !ok { fmt.Printf("%d. %s\n", count, p) count++ @@ -86,8 +102,11 @@ loop: } } + // We're done writing to the packets channel, so close it. close(packets) + // There are some in-flight workers as of this point, so + // signal a cancel to them. cancel() wg.Wait() } -- cgit v1.2.3