feat: implement "v2"

This is based on the Gophercises solution.
author: demo <demo@antix1> 2026-05-09 11:43:48 -0400
committer: demo <demo@antix1> 2026-05-09 11:43:48 -0400
commit: 52bb422959147384291dcfbfe5a6142d363862ab (patch)
tree: 90a17881bf865d43a0431beefded70e3f227e2e6
parent: 7fe62a3f676d810c8df46fa24a7314a2209a9dd2 (diff)
2 files changed, 119 insertions, 12 deletions
diff --git a/internal/findlinks/findlinks_v2.go b/internal/findlinks/findlinks_v2.go
new file mode 100644
index 0000000..e56a961
--- /dev/null
+++ b/internal/findlinks/findlinks_v2.go
@@ -0,0 +1,97 @@
+package findlinks
+
+import (
+	"fmt"
+	"io"
+	"strings"
+
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
+)
+
+// Parse collects the unmarshalled [Link] data from the HTML document
+// represented by r. The data is returned as a slice, along with an
+// error.
+func Parse(r io.Reader) ([]Link, error) {
+	doc, err := html.Parse(r)
+	if err != nil {
+		return nil, fmt.Errorf("can't parse html reader: %w", err)
+	}
+
+	return parseLinks(doc), nil
+}
+
+// parseLinks returns a [Link] slice from doc. Each element is an
+// "unmarshalled" version of an anchor tag element inside doc.
+func parseLinks(doc *html.Node) []Link {
+	linkNodes := harvestLinkNodes(doc)
+
+	var links []Link
+	for _, linkNode := range linkNodes {
+		var link Link
+
+		// Get the link's inner text.
+		link.Text = harvestText(linkNode)
+
+		// Get the href attribute.
+		for _, a := range linkNode.Attr {
+			if a.Key == "href" {
+				link.Href = a.Val
+				break
+			}
+		}
+
+		links = append(links, link)
+	}
+
+	return links
+}
+
+// harvestText returns the harvestText contained inside n.
+//
+// Note that the harvestText could be under many layers of HTML
+// nesting. Hence the [html.ElementNode] case calls harvestText recursively.
+//
+// For the current project, harvestText's argument is always an
+// anchor-tag element.
+func harvestText(n *html.Node) string {
+	switch n.Type {
+	// The text of an [html.TextNode] is its [html.Node.Data]
+	// field.
+	case html.TextNode:
+		return n.Data
+
+	// The text of an [html.ElementNode] is the aggregate of the
+	// text of its children.
+	case html.ElementNode:
+		var builder strings.Builder
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			fmt.Fprintf(&builder, "%s ", harvestText(c))
+		}
+
+		rawResult := builder.String()
+		fields := strings.Fields(rawResult)
+		return strings.Join(fields, " ")
+
+	// Any other kind of node (e.g. [html.CommentNode]) doesn't
+	// have text.
+	default:
+		return ""
+	}
+}
+
+// harvestLinkNodes harvests all of the link nodes contained inside n.
+//
+// For the current project, harvestLinkNodes' argument is always the
+// top-level document node.
+func harvestLinkNodes(node *html.Node) []*html.Node {
+	var links []*html.Node
+
+	for child := range node.Descendants() {
+		if child.Type == html.ElementNode && child.DataAtom == atom.A {
+			links = append(links, child)
+		}
+	}
+
+	return links
+}
diff --git a/internal/test/findlinks_test.go b/internal/test/findlinks_test.go
index 1bc2f20..37c6152 100644
--- a/internal/test/findlinks_test.go
+++ b/internal/test/findlinks_test.go
@@ -1,6 +1,8 @@
 package test
 
 import (
+	"fmt"
+	"io"
 	"os"
 	"testing"
 
@@ -8,14 +10,16 @@ import (
 	"github.com/google/go-cmp/cmp"
 )
 
-func findLinksFile(filename string) ([]findlinks.Link, error) {
+type parserFn func(io.Reader) ([]findlinks.Link, error)
+
+func findLinksFile(filename string, parser parserFn) ([]findlinks.Link, error) {
 	f, err := os.Open(filename)
 	if err != nil {
 		panic("can't open test file")
 	}
 	defer f.Close()
 
-	return findlinks.FindLinks(f)
+	return parser(f)
 }
 
 func TestFindlinks(t *testing.T) {
@@ -37,16 +41,22 @@ func TestFindlinks(t *testing.T) {
 	}
 
 	for _, test := range tests {
-		t.Run(test.filename, func(t *testing.T) {
-			links, err := findLinksFile(test.filename)
-			if err != nil {
-				t.Error(err)
-			}
-
-			if !cmp.Equal(links, test.links) {
-				t.Errorf("got %v, want %v", links, test.links)
-			}
-		})
+		parsers := []parserFn{findlinks.FindLinks, findlinks.Parse}
+
+		for i, p := range parsers {
+			testName := fmt.Sprintf("Parser %d %s", i+1, test.filename)
+
+			t.Run(testName, func(t *testing.T) {
+				links, err := findLinksFile(test.filename, p)
+				if err != nil {
+					t.Error(err)
+				}
+
+				if !cmp.Equal(links, test.links) {
+					t.Errorf("got %v, want %v", links, test.links)
+				}
+			})
+		}
 	}
 
 }
author	demo <demo@antix1>	2026-05-09 11:43:48 -0400
committer	demo <demo@antix1>	2026-05-09 11:43:48 -0400
commit	52bb422959147384291dcfbfe5a6142d363862ab (patch)
tree	90a17881bf865d43a0431beefded70e3f227e2e6
parent	7fe62a3f676d810c8df46fa24a7314a2209a9dd2 (diff)