1 files changed, 44 insertions, 20 deletions
diff --git a/internal/findlinks/findlinks.go b/internal/findlinks/findlinks.go
index 619d8a4..00635ec 100644
--- a/internal/findlinks/findlinks.go
+++ b/internal/findlinks/findlinks.go
@@ -24,33 +24,57 @@ func FindLinks(r io.Reader) ([]Link, error) {
 		return nil, fmt.Errorf("can't parse html reader: %w", err)
 	}
 
-	var links []Link
+	links := iterHTML(doc, nil)
 
-	for n := range doc.Descendants() {
-		trimmedData := strings.TrimSpace(n.Data)
+	return links, nil
+}
+
+func iterHTML(n *html.Node, buffer []Link) []Link {
+	if n.Type == html.ElementNode && n.DataAtom == atom.A {
+		var link Link
+
+		// Href
+		link.Href = extractHref(n)
 
-		if n.Type == html.TextNode && trimmedData != "" {
-			fmt.Printf("Data: %s\n", trimmedData)
-			for anc := range n.Ancestors() {
-				fmt.Printf("\tAncestor: %v\n", anc.Data)
-			}
-			continue
+		// Text
+		chunks := extractText(n, nil)
+		link.Text = strings.Join(chunks, " ")
+
+		buffer = append(buffer, link)
+	} else {
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			buffer = iterHTML(c, buffer)
 		}
+	}
 
-		if n.Type == html.ElementNode && n.DataAtom == atom.A {
-			var link Link
+	return buffer
+}
 
-			// Scan the href.
-			for _, a := range n.Attr {
-				if a.Key == "href" {
-					link.Href = a.Val
-				}
-			}
+// extractHref returns the first href attribute of anchor.
+func extractHref(anchor *html.Node) string {
+	var href string
 
-			// FIXME: for now, only scan for hrefs.
-			links = append(links, link)
+	for _, a := range anchor.Attr {
+		if a.Key == atom.Href.String() {
+			href = a.Val
+			break
 		}
 	}
 
-	return links, nil
+	return href
+}
+
+// extractText recursively scans anchor to return the various nested
+// pieces of text content.
+func extractText(anchor *html.Node, buffer []string) []string {
+	for c := anchor.FirstChild; c != nil; c = c.NextSibling {
+		switch c.Type {
+		case html.TextNode:
+			buffer = append(buffer, c.Data)
+		case html.ElementNode:
+			buffer = extractText(c, buffer)
+		}
+	}
+
+	return buffer
 }