summaryrefslogtreecommitdiff
path: root/main.go
diff options
context:
space:
mode:
authordemo <demo@antix1>2026-05-23 10:12:47 -0400
committerdemo <demo@antix1>2026-05-23 10:12:47 -0400
commit8921d502c306fa939fba904479fdbd659a39f6fc (patch)
tree2ca77b0a31c5c4c81d6fb9c8025c857f82d8de04 /main.go
parentee3b576ba30177f89bcb558fb588b3380caf1f95 (diff)
feat: implement simple BFS webcrawler
Diffstat (limited to 'main.go')
-rw-r--r--main.go52
1 files changed, 52 insertions, 0 deletions
diff --git a/main.go b/main.go
index 7905807..f4b9a91 100644
--- a/main.go
+++ b/main.go
@@ -1,5 +1,57 @@
package main
+import (
+ "flag"
+ "fmt"
+ "log"
+ "net/url"
+)
+
func main() {
+ maxConcurrency := flag.Int("c", 0, "Maximum number of concurrent queue pushes")
+ startRawURL := flag.String("url", "", "Entry-point URL")
+ flag.Parse()
+
+ if *maxConcurrency == 0 {
+ log.Fatal("Missing -c argument")
+ }
+
+ if *startRawURL == "" {
+ log.Fatal("Missing -url argument")
+ }
+
+ startURL, err := url.Parse(*startRawURL)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ worklist := make(chan []url.URL)
+ go func() {
+ worklist <- []url.URL{*startURL}
+ }()
+
+ seen := make(map[url.URL]bool)
+ for list := range worklist {
+ for _, u := range list {
+ if !seen[u] {
+ fmt.Printf("%s\n", &u)
+ seen[u] = true
+
+ go func() {
+ more := crawl(u)
+ worklist <- more
+ }()
+ }
+ }
+ }
+}
+
+func crawl(u url.URL) []url.URL {
+ doc, err := fetch(u)
+ if err != nil {
+ log.Print(err)
+ return nil
+ }
+ return findURLs(u, doc)
}