summaryrefslogtreecommitdiff
path: root/main.go
diff options
context:
space:
mode:
authordemo <demo@antix1>2026-05-07 22:24:26 -0400
committerdemo <demo@antix1>2026-05-07 22:24:26 -0400
commit6aa92cede4f4c70333293cfdac00d9d08db66636 (patch)
tree7ea6a07c553e182f0b13592809109759509b7421 /main.go
parent00a2f4555f81257c8043c74d9a6a0428a049339a (diff)
feat: implement href-scanning
We still need to implement text scanning.
Diffstat (limited to 'main.go')
-rw-r--r--main.go35
1 files changed, 31 insertions, 4 deletions
diff --git a/main.go b/main.go
index 20f5f36..5286e63 100644
--- a/main.go
+++ b/main.go
@@ -2,10 +2,14 @@ package main
import (
"flag"
+ "fmt"
"io"
"log"
"net/http"
"time"
+
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/atom"
)
type Link struct {
@@ -20,6 +24,7 @@ func main() {
// CLI flag configuration.
rawURL := flag.String("url", "", "Web address of target HTML")
timeoutSecs := flag.Int("timeout", 2, "Number of seconds after which to time out")
+ flag.Parse()
if *rawURL == "" {
log.Fatal("Missing -url")
@@ -48,12 +53,34 @@ func main() {
log.Fatal(err)
}
- _ = links
+ fmt.Println(links)
}
-// findLinks consumes the given reader, scraping it of anchor
+// findLinks consumes the given [io.Reader], scraping it of anchor
// tags. Each anchor tag is "unmarshalled" into a [Link]. The
// resulting slice of Links is returned, along with an error.
-func findLinks(_ io.Reader) ([]Link, error) {
- return nil, nil
+func findLinks(r io.Reader) ([]Link, error) {
+ doc, err := html.Parse(r)
+ if err != nil {
+ return nil, fmt.Errorf("can't parse html reader: %w", err)
+ }
+
+ var links []Link
+ for n := range doc.Descendants() {
+ if n.Type == html.ElementNode && n.DataAtom == atom.A {
+ var link Link
+
+ // Scan the href.
+ for _, a := range n.Attr {
+ if a.Key == "href" {
+ link.Href = a.Val
+ }
+ }
+
+ // FIXME: for now, only scan for hrefs.
+ links = append(links, link)
+ }
+ }
+
+ return links, nil
}