diff options
| -rw-r--r-- | cmd/httpclient/main.go | 50 | ||||
| -rw-r--r-- | cmd/localclient/main.go | 14 | ||||
| -rw-r--r-- | internal/findlinks/doc.go | 47 | ||||
| -rw-r--r-- | main.go | 86 |
4 files changed, 111 insertions, 86 deletions
diff --git a/cmd/httpclient/main.go b/cmd/httpclient/main.go new file mode 100644 index 0000000..945ee36 --- /dev/null +++ b/cmd/httpclient/main.go @@ -0,0 +1,50 @@ +package main + +import ( + "flag" + "fmt" + "log" + "net/http" + "time" + + "git.brandonirizarry.xyz/links/internal/findlinks" +) + +func main() { + // Logging configuration. + log.SetFlags(log.LstdFlags | log.Lshortfile) + + // CLI flag configuration. + rawURL := flag.String("url", "", "Web address of target HTML") + timeoutSecs := flag.Int("timeout", 2, "Number of seconds after which to time out") + flag.Parse() + + if *rawURL == "" { + log.Fatal("Missing -url") + } + + // Configure the request. + timeout := time.Duration(*timeoutSecs) * time.Second + client := http.Client{ + Timeout: timeout, + } + + req, err := http.NewRequest(http.MethodGet, *rawURL, nil) + if err != nil { + log.Fatal(err) + } + + // Perform the request. + resp, err := client.Do(req) + if err != nil { + log.Fatal(err) + } + defer resp.Body.Close() + + links, err := findlinks.FindLinks(resp.Body) + if err != nil { + log.Fatal(err) + } + + fmt.Println(links) +} diff --git a/cmd/localclient/main.go b/cmd/localclient/main.go new file mode 100644 index 0000000..21d405d --- /dev/null +++ b/cmd/localclient/main.go @@ -0,0 +1,14 @@ +package main + +import ( + "flag" + "log" +) + +func main() { + filename := flag.String("file", "", "Local HTML file") + if *filename == "" { + log.Fatal("Missing -file argument") + } + +} diff --git a/internal/findlinks/doc.go b/internal/findlinks/doc.go new file mode 100644 index 0000000..28573f8 --- /dev/null +++ b/internal/findlinks/doc.go @@ -0,0 +1,47 @@ +// Package findlinks iterates over an HTML tree and extracts relevant +// data from it. +package findlinks + +import ( + "fmt" + "io" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +// A Link encapsulates the data harvested from a link. +type Link struct { + Href string + Text string +} + +// findLinks consumes the given [io.Reader], scraping it of anchor +// tags. Each anchor tag is "unmarshalled" into a [Link]. The +// resulting slice of Links is returned, along with an error. +func FindLinks(r io.Reader) ([]Link, error) { + doc, err := html.Parse(r) + if err != nil { + return nil, fmt.Errorf("can't parse html reader: %w", err) + } + + var links []Link + + for n := range doc.Descendants() { + if n.Type == html.ElementNode && n.DataAtom == atom.A { + var link Link + + // Scan the href. + for _, a := range n.Attr { + if a.Key == "href" { + link.Href = a.Val + } + } + + // FIXME: for now, only scan for hrefs. + links = append(links, link) + } + } + + return links, nil +} diff --git a/main.go b/main.go deleted file mode 100644 index 5286e63..0000000 --- a/main.go +++ /dev/null @@ -1,86 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "io" - "log" - "net/http" - "time" - - "golang.org/x/net/html" - "golang.org/x/net/html/atom" -) - -type Link struct { - Href string - Text string -} - -func main() { - // Logging configuration. - log.SetFlags(log.LstdFlags | log.Lshortfile) - - // CLI flag configuration. - rawURL := flag.String("url", "", "Web address of target HTML") - timeoutSecs := flag.Int("timeout", 2, "Number of seconds after which to time out") - flag.Parse() - - if *rawURL == "" { - log.Fatal("Missing -url") - } - - // Configure the request. - timeout := time.Duration(*timeoutSecs) * time.Second - client := http.Client{ - Timeout: timeout, - } - - req, err := http.NewRequest(http.MethodGet, *rawURL, nil) - if err != nil { - log.Fatal(err) - } - - // Perform the request. - resp, err := client.Do(req) - if err != nil { - log.Fatal(err) - } - defer resp.Body.Close() - - links, err := findLinks(resp.Body) - if err != nil { - log.Fatal(err) - } - - fmt.Println(links) -} - -// findLinks consumes the given [io.Reader], scraping it of anchor -// tags. Each anchor tag is "unmarshalled" into a [Link]. The -// resulting slice of Links is returned, along with an error. -func findLinks(r io.Reader) ([]Link, error) { - doc, err := html.Parse(r) - if err != nil { - return nil, fmt.Errorf("can't parse html reader: %w", err) - } - - var links []Link - for n := range doc.Descendants() { - if n.Type == html.ElementNode && n.DataAtom == atom.A { - var link Link - - // Scan the href. - for _, a := range n.Attr { - if a.Key == "href" { - link.Href = a.Val - } - } - - // FIXME: for now, only scan for hrefs. - links = append(links, link) - } - } - - return links, nil -} |
