diff options
Diffstat (limited to 'fetch.go')
| -rw-r--r-- | fetch.go | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/fetch.go b/fetch.go new file mode 100644 index 0000000..f81f327 --- /dev/null +++ b/fetch.go @@ -0,0 +1,44 @@ +package main + +import ( + "fmt" + "net/http" + "net/url" + + "golang.org/x/net/html" +) + +// fetch makes a GET request to refURL, returning the HTML contents of +// that webpage. An error is also returned. +// +// A [url.URL] type is used for refURL to simplify recursive or else +// repeated use of this function when crawling webpages to, say, build +// a sitemap. +func fetch(refURL url.URL) (*html.Node, error) { + rawURL := refURL.String() + + // For now we leave the client unconfigured. + client := http.Client{} + + req, err := http.NewRequest(http.MethodGet, rawURL, nil) + if err != nil { + return nil, fmt.Errorf("can't create request: %w", err) + } + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("client failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("status for %s for %s: %s", http.MethodGet, rawURL, resp.Status) + } + + htmlDoc, err := html.Parse(resp.Body) + if err != nil { + return nil, fmt.Errorf("can't parse response body: %w", err) + } + + return htmlDoc, nil +} |
