summaryrefslogtreecommitdiff
path: root/internal/links/find.go
diff options
context:
space:
mode:
Diffstat (limited to 'internal/links/find.go')
-rw-r--r--internal/links/find.go18
1 files changed, 9 insertions, 9 deletions
diff --git a/internal/links/find.go b/internal/links/find.go
index 17b6d8f..6e1ab63 100644
--- a/internal/links/find.go
+++ b/internal/links/find.go
@@ -10,7 +10,7 @@ import (
"golang.org/x/net/html/atom"
)
-func Parse(htmlInput io.Reader, baseURL *url.URL) ([]string, error) {
+func Parse(htmlInput io.Reader, refURL *url.URL) ([]string, error) {
var err error
hrefs, err := parse(htmlInput)
@@ -18,7 +18,7 @@ func Parse(htmlInput io.Reader, baseURL *url.URL) ([]string, error) {
return nil, fmt.Errorf("can't parse: %w", err)
}
- hrefs = filterByBaseURL(baseURL, hrefs)
+ hrefs = filterByBaseURL(refURL, hrefs)
return hrefs, nil
}
@@ -58,19 +58,19 @@ func findHrefs(doc *html.Node) []string {
}
// filterByBaseURL returns the slice of all web addresses in hrefs
-// that are under baseURL. In passing, it also resolves these with
-// respect to baseURL.
+// that are under refURL. In passing, it also resolves these with
+// respect to refURL.
//
// The motivation is that, when crawling pages
// to build a sitemap, the crawl should never leave the top-level Web
// domain those pages belong to.
-func filterByBaseURL(baseURL *url.URL, hrefs []string) []string {
+func filterByBaseURL(refURL *url.URL, hrefs []string) []string {
var neighbors []string
for _, href := range hrefs {
// Check that href parses as a URL, and at the same
- // time resolve it with respec to baseURL.
- u, err := baseURL.Parse(href)
+ // time resolve it with respect to refURL.
+ u, err := refURL.Parse(href)
if err != nil {
log.Printf("%s: %v", u, err)
continue
@@ -78,8 +78,8 @@ func filterByBaseURL(baseURL *url.URL, hrefs []string) []string {
// If href is a valid absolute URL, it will parse
// successfully, so we need to check hostnames.
- if u.Hostname() != baseURL.Hostname() {
- log.Printf("different hostnames: %s %s", u, baseURL)
+ if u.Hostname() != refURL.Hostname() {
+ log.Printf("different hostnames: %s %s", u, refURL)
continue
}