summaryrefslogtreecommitdiff
path: root/fetch.go
diff options
context:
space:
mode:
Diffstat (limited to 'fetch.go')
-rw-r--r--fetch.go44
1 files changed, 44 insertions, 0 deletions
diff --git a/fetch.go b/fetch.go
new file mode 100644
index 0000000..f81f327
--- /dev/null
+++ b/fetch.go
@@ -0,0 +1,44 @@
+package main
+
+import (
+ "fmt"
+ "net/http"
+ "net/url"
+
+ "golang.org/x/net/html"
+)
+
+// fetch makes a GET request to refURL, returning the HTML contents of
+// that webpage. An error is also returned.
+//
+// A [url.URL] type is used for refURL to simplify recursive or else
+// repeated use of this function when crawling webpages to, say, build
+// a sitemap.
+func fetch(refURL url.URL) (*html.Node, error) {
+ rawURL := refURL.String()
+
+ // For now we leave the client unconfigured.
+ client := http.Client{}
+
+ req, err := http.NewRequest(http.MethodGet, rawURL, nil)
+ if err != nil {
+ return nil, fmt.Errorf("can't create request: %w", err)
+ }
+
+ resp, err := client.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("client failed: %w", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("status for %s for %s: %s", http.MethodGet, rawURL, resp.Status)
+ }
+
+ htmlDoc, err := html.Parse(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("can't parse response body: %w", err)
+ }
+
+ return htmlDoc, nil
+}