diff options
| author | demo <demo@antix1> | 2026-05-28 11:45:10 -0400 |
|---|---|---|
| committer | demo <demo@antix1> | 2026-05-28 11:45:10 -0400 |
| commit | 2e4b6abc07036df7a001b1b8305edc55f27dda9f (patch) | |
| tree | ae2b70f925ee1da38eaac63c4c6d93843ddaf387 | |
| parent | 6867410a6b30ec4a3d96f2438b202add8519c959 (diff) | |
refactor: move html document creation to getBatch
Also, if there are errors, I log them and simply return a nil slice.
| -rw-r--r-- | classic.go | 14 | ||||
| -rw-r--r-- | fetch.go | 11 |
2 files changed, 17 insertions, 8 deletions
@@ -1,11 +1,14 @@ package main import ( + "bytes" "context" "fmt" "log" "net/url" "sync" + + "golang.org/x/net/html" ) func classic(startURL url.URL, maxConcurrency, maxURLs, maxDepth int) { @@ -73,12 +76,19 @@ loop: } func getBatch(u url.URL) []url.URL { - doc, err := fetch(u) + htmlBytes, err := fetch(u) + if err != nil { + log.Print(err) + return nil + } + + htmlDoc, err := html.Parse(bytes.NewReader(htmlBytes)) if err != nil { log.Print(err) + return nil } - batch := findURLs(u, doc) + batch := findURLs(u, htmlDoc) return batch } @@ -2,10 +2,9 @@ package main import ( "fmt" + "io" "net/http" "net/url" - - "golang.org/x/net/html" ) // fetch makes a GET request to refURL, returning the HTML contents of @@ -14,7 +13,7 @@ import ( // A [url.URL] type is used for refURL to simplify recursive or else // repeated use of this function when crawling webpages to, say, build // a sitemap. -func fetch(refURL url.URL) (*html.Node, error) { +func fetch(refURL url.URL) ([]byte, error) { rawURL := refURL.String() // For now we leave the client unconfigured. @@ -35,10 +34,10 @@ func fetch(refURL url.URL) (*html.Node, error) { return nil, fmt.Errorf("status for %s for %s: %s", http.MethodGet, rawURL, resp.Status) } - htmlDoc, err := html.Parse(resp.Body) + htmlBytes, err := io.ReadAll(resp.Body) if err != nil { - return nil, fmt.Errorf("can't parse response body: %w", err) + return nil, fmt.Errorf("can't read reponse body into byte buffer") } - return htmlDoc, nil + return htmlBytes, nil } |
