feat: implement href-scanning

We still need to implement text scanning.
author: demo <demo@antix1> 2026-05-07 22:24:26 -0400
committer: demo <demo@antix1> 2026-05-07 22:24:26 -0400
commit: 6aa92cede4f4c70333293cfdac00d9d08db66636 (patch)
tree: 7ea6a07c553e182f0b13592809109759509b7421
parent: 00a2f4555f81257c8043c74d9a6a0428a049339a (diff)
1 files changed, 31 insertions, 4 deletions
diff --git a/main.go b/main.go
index 20f5f36..5286e63 100644
--- a/main.go
+++ b/main.go
@@ -2,10 +2,14 @@ package main
 
 import (
 	"flag"
+	"fmt"
 	"io"
 	"log"
 	"net/http"
 	"time"
+
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
 )
 
 type Link struct {
@@ -20,6 +24,7 @@ func main() {
 	// CLI flag configuration.
 	rawURL := flag.String("url", "", "Web address of target HTML")
 	timeoutSecs := flag.Int("timeout", 2, "Number of seconds after which to time out")
+	flag.Parse()
 
 	if *rawURL == "" {
 		log.Fatal("Missing -url")
@@ -48,12 +53,34 @@ func main() {
 		log.Fatal(err)
 	}
 
-	_ = links
+	fmt.Println(links)
 }
 
-// findLinks consumes the given reader, scraping it of anchor
+// findLinks consumes the given [io.Reader], scraping it of anchor
 // tags. Each anchor tag is "unmarshalled" into a [Link]. The
 // resulting slice of Links is returned, along with an error.
-func findLinks(_ io.Reader) ([]Link, error) {
-	return nil, nil
+func findLinks(r io.Reader) ([]Link, error) {
+	doc, err := html.Parse(r)
+	if err != nil {
+		return nil, fmt.Errorf("can't parse html reader: %w", err)
+	}
+
+	var links []Link
+	for n := range doc.Descendants() {
+		if n.Type == html.ElementNode && n.DataAtom == atom.A {
+			var link Link
+
+			// Scan the href.
+			for _, a := range n.Attr {
+				if a.Key == "href" {
+					link.Href = a.Val
+				}
+			}
+
+			// FIXME: for now, only scan for hrefs.
+			links = append(links, link)
+		}
+	}
+
+	return links, nil
 }
author	demo <demo@antix1>	2026-05-07 22:24:26 -0400
committer	demo <demo@antix1>	2026-05-07 22:24:26 -0400
commit	6aa92cede4f4c70333293cfdac00d9d08db66636 (patch)
tree	7ea6a07c553e182f0b13592809109759509b7421
parent	00a2f4555f81257c8043c74d9a6a0428a049339a (diff)