diff options
| author | demo <demo@antix1> | 2026-05-08 10:19:23 -0400 |
|---|---|---|
| committer | demo <demo@antix1> | 2026-05-08 10:19:23 -0400 |
| commit | 8a3554d4f32d631bd1c7cc6254ab11b14b541c67 (patch) | |
| tree | dc42a61ab56acf62542b3f2565ac23e557f88f28 /internal/findlinks/doc.go | |
| parent | 6aa92cede4f4c70333293cfdac00d9d08db66636 (diff) | |
feat: separate into remote and local commands
I'd like to be able to read HTML locally as well.
Diffstat (limited to 'internal/findlinks/doc.go')
| -rw-r--r-- | internal/findlinks/doc.go | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/internal/findlinks/doc.go b/internal/findlinks/doc.go new file mode 100644 index 0000000..28573f8 --- /dev/null +++ b/internal/findlinks/doc.go @@ -0,0 +1,47 @@ +// Package findlinks iterates over an HTML tree and extracts relevant +// data from it. +package findlinks + +import ( + "fmt" + "io" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +// A Link encapsulates the data harvested from a link. +type Link struct { + Href string + Text string +} + +// findLinks consumes the given [io.Reader], scraping it of anchor +// tags. Each anchor tag is "unmarshalled" into a [Link]. The +// resulting slice of Links is returned, along with an error. +func FindLinks(r io.Reader) ([]Link, error) { + doc, err := html.Parse(r) + if err != nil { + return nil, fmt.Errorf("can't parse html reader: %w", err) + } + + var links []Link + + for n := range doc.Descendants() { + if n.Type == html.ElementNode && n.DataAtom == atom.A { + var link Link + + // Scan the href. + for _, a := range n.Attr { + if a.Key == "href" { + link.Href = a.Val + } + } + + // FIXME: for now, only scan for hrefs. + links = append(links, link) + } + } + + return links, nil +} |
