summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordemo <demo@antix1>2026-05-28 17:28:59 -0400
committerdemo <demo@antix1>2026-05-28 17:28:59 -0400
commit848481fd1a738225664fa398275fcd1afd61c4ff (patch)
tree9883e7f82b358de00e1d7b3b866f71f64d719a43
parent0d7056446009f38ea44249f2f74ab70b05410cfb (diff)
feat: implement shortcode feature
-rw-r--r--classic.go4
-rw-r--r--main.go30
-rw-r--r--shortcodes.go50
3 files changed, 80 insertions, 4 deletions
diff --git a/classic.go b/classic.go
index cdf8606..b7a3d8d 100644
--- a/classic.go
+++ b/classic.go
@@ -75,6 +75,10 @@ loop:
cancel()
wg.Wait()
+ // FIXME: eventually, when all crawlers terminate properly, we
+ // can move this out to main: that is, all crawlers will
+ // return the same seen map that will be processed by this
+ // code.
fmt.Println("Generating sitemap...")
sitemap, err := toSitemap(seen, maxDepth, maxURLs)
if err != nil {
diff --git a/main.go b/main.go
index 3582ed5..b48f6c9 100644
--- a/main.go
+++ b/main.go
@@ -3,6 +3,7 @@
package main
import (
+ "errors"
"flag"
"fmt"
"log"
@@ -18,9 +19,11 @@ func main() {
log.SetFlags(log.LstdFlags | log.Lshortfile)
maxConcurrency := flag.Int("c", 0, "Maximum number of concurrent queue pushes")
- startRawURL := flag.String("url", "", "Entry-point URL")
+ urlArg := flag.String("url", "", "Entry-point URL")
maxURLs := flag.Int("max", 0, "Maximum number of URLs to collect (omitted or 0 means no limit)")
maxDepth := flag.Int("depth", 0, "Maximum URL depth (omitted or 0 means no limit)")
+ shortcode := flag.String("shortcode", "", "URL shortcode")
+ shortcodeFilename := flag.String("scfile", "urls.csv", "Shortcode CSV file")
flag.Parse()
@@ -34,8 +37,9 @@ func main() {
log.Fatalf("Invalid -c argument: %d", *maxConcurrency)
}
- if *startRawURL == "" {
- log.Fatal("Missing -url argument")
+ startRawURL, err := chooseFrom(*urlArg, *shortcode, *shortcodeFilename)
+ if err != nil {
+ log.Fatal(err)
}
if *maxURLs < 0 {
@@ -46,7 +50,7 @@ func main() {
log.Fatalf("Invalid -depth argument: %d", *maxDepth)
}
- startURL, err := convertToURL(*startRawURL)
+ startURL, err := convertToURL(startRawURL)
if err != nil {
log.Fatal(err)
}
@@ -58,6 +62,24 @@ func main() {
})
}
+// chooseFrom determines whether to use a -url or -shortcode
+// argument. If both are present or absent, an error is returned. If
+// exactly one is present, return that one.
+func chooseFrom(urlArg, shortcode, shortcodeFilename string) (string, error) {
+ urlAbsent := (urlArg == "")
+ shortcodeAbsent := (shortcode == "")
+
+ if urlAbsent == shortcodeAbsent {
+ return "", errors.New("-url and -shortcode flags either both missing or both present")
+ }
+
+ if urlAbsent {
+ return getURLFromShortcode(shortcodeFilename, shortcode)
+ }
+
+ return urlArg, nil
+}
+
// convertToURL parses the given rawURL into a [url.URL]. If the
// rawURL is missing a scheme, "https://" is prepended before parsing.
//
diff --git a/shortcodes.go b/shortcodes.go
new file mode 100644
index 0000000..978b39b
--- /dev/null
+++ b/shortcodes.go
@@ -0,0 +1,50 @@
+package main
+
+import (
+ "encoding/csv"
+ "fmt"
+ "io"
+ "os"
+)
+
+// getURLFromShortcode looks for the shortcode inside the file
+// specified by filename (default "urls.csv" in the top-level project
+// directory), and returns the associated URL.
+//
+// Right now we perform an O(n) search across the entire file just to
+// get a single URL, so there's room for improvement here.
+func getURLFromShortcode(filename, shortcode string) (string, error) {
+ f, err := os.Open(filename)
+ if err != nil {
+ return "", fmt.Errorf("can't open shortcode file: %w", err)
+ }
+ defer f.Close()
+
+ r := csv.NewReader(f)
+
+ // Read, vet, and discard the column-header row.
+ header, err := r.Read()
+ if err != nil {
+ return "", fmt.Errorf("missing header line: %w", err)
+ }
+
+ if header[0] != "shortcode" || header[1] != "url" {
+ return "", fmt.Errorf("invalid CSV header: %v", header)
+ }
+
+ for i := 1; ; i++ {
+ record, err := r.Read()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return "", fmt.Errorf("error reading %s, row %d: %w", filename, i, err)
+ }
+
+ if record[0] == shortcode {
+ return record[1], nil
+ }
+ }
+
+ return "", fmt.Errorf("no URL with shortcode %s", shortcode)
+}