diff options
Diffstat (limited to 'main.go')
-rw-r--r-- | main.go | 93 |
1 files changed, 56 insertions, 37 deletions
@@ -36,45 +36,8 @@ func (h *headerFlags) Set(value string) error { return nil } -func init() { - flag.StringVar(&input, "input", "", "url or file path") - flag.IntVar(&timeout, "timeout", 10, "timeout for http requests in seconds") - flag.StringVar(&userAgent, "user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.10 Safari/605.1.1", "set user-agent") - flag.Var(&headers, "header", "add http header to request (e.g. -header \"Authorization: Bearer val1\")") - flag.IntVar(&depth, "depth", 0, "recursion depth for same-domain links (0 disables crawling)") - flag.IntVar(&delay, "delay", 4, "delay between requests in seconds when crawling (only applies if depth > 0)") - flag.Parse() - - if input == "" { - fmt.Printf("[err] input is required. use -input <url|file>\n") - os.Exit(1) - } - httpClient.Timeout = time.Duration(timeout) * time.Second -} - var defaultRegex = regexp.MustCompile(`(?:"|')((?:[a-zA-Z]{1,10}://|//)[^"'/]+\.[a-zA-Z]{2,}[^"']*|(?:/|\.\./|\./)[^"'><,;|()*\[\]\s][^"'><,;|()]{1,}|[a-zA-Z0-9_\-/]+/[a-zA-Z0-9_\-/.]+\.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml)(?:[\?|#][^"']*)?)["']`) -func main() { - sourceType, err := resolveInput(input) - if err != nil { - fmt.Printf("[err] %v\n", err) - os.Exit(1) - } - - if sourceType == "url" { - baseURL, _ := url.Parse(input) - crawl(input, baseURL, depth) - } else { - content, err := fetchContent("file", input) - if err != nil { - fmt.Printf("[err] failed to fetch %s: %v\n", input, err) - os.Exit(1) - } - matches := parseContent(content, defaultRegex) - printMatches(input, matches) - } -} - func resolveInput(input string) (string, error) { if strings.HasPrefix(input, "http://") || strings.HasPrefix(input, "https://") { return "url", nil @@ -202,3 +165,59 @@ func resolveURL(base *url.URL, href string) string { } return base.ResolveReference(ref).String() } + +func init() { + const usageHeader = ` +web crawling and link extraction tool written in Go, inspired by https://github.com/GerbenJavado/LinkFinder + +author: heqnx - https://heqnx.com + +` + flag.Usage = func() { + fmt.Fprint(os.Stderr, usageHeader) + fmt.Fprintf(os.Stderr, "usage of %s:\n", os.Args[0]) + flag.PrintDefaults() + } + flag.CommandLine.SetOutput(os.Stderr) +} + +func main() { + flag.StringVar(&input, "input", "", "url or file path") + flag.IntVar(&timeout, "timeout", 10, "timeout for http requests in seconds") + flag.StringVar(&userAgent, "user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.10 Safari/605.1.1", "set user-agent") + flag.Var(&headers, "header", "add http header to request (e.g. -header \"Authorization: Bearer val1\")") + flag.IntVar(&depth, "depth", 0, "recursion depth for same-domain links (0 disables crawling)") + flag.IntVar(&delay, "delay", 4, "delay between requests in seconds when crawling (only applies if depth > 0)") + flag.Parse() + + if flag.NFlag() == 0 && flag.NArg() == 0 { + flag.Usage() + os.Exit(1) + } + + if input == "" { + fmt.Printf("[err] input is required. use -input <url|file>\n") + os.Exit(1) + } + + httpClient.Timeout = time.Duration(timeout) * time.Second + sourceType, err := resolveInput(input) + if err != nil { + fmt.Printf("[err] %v\n", err) + os.Exit(1) + } + + if sourceType == "url" { + baseURL, _ := url.Parse(input) + crawl(input, baseURL, depth) + } else { + content, err := fetchContent("file", input) + if err != nil { + fmt.Printf("[err] failed to fetch %s: %v\n", input, err) + os.Exit(1) + } + matches := parseContent(content, defaultRegex) + printMatches(input, matches) + } +} + |