aboutsummaryrefslogtreecommitdiff
path: root/main.go
diff options
context:
space:
mode:
Diffstat (limited to 'main.go')
-rw-r--r--main.go93
1 files changed, 56 insertions, 37 deletions
diff --git a/main.go b/main.go
index f85e753..921c53b 100644
--- a/main.go
+++ b/main.go
@@ -36,45 +36,8 @@ func (h *headerFlags) Set(value string) error {
return nil
}
-func init() {
- flag.StringVar(&input, "input", "", "url or file path")
- flag.IntVar(&timeout, "timeout", 10, "timeout for http requests in seconds")
- flag.StringVar(&userAgent, "user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.10 Safari/605.1.1", "set user-agent")
- flag.Var(&headers, "header", "add http header to request (e.g. -header \"Authorization: Bearer val1\")")
- flag.IntVar(&depth, "depth", 0, "recursion depth for same-domain links (0 disables crawling)")
- flag.IntVar(&delay, "delay", 4, "delay between requests in seconds when crawling (only applies if depth > 0)")
- flag.Parse()
-
- if input == "" {
- fmt.Printf("[err] input is required. use -input <url|file>\n")
- os.Exit(1)
- }
- httpClient.Timeout = time.Duration(timeout) * time.Second
-}
-
var defaultRegex = regexp.MustCompile(`(?:"|')((?:[a-zA-Z]{1,10}://|//)[^"'/]+\.[a-zA-Z]{2,}[^"']*|(?:/|\.\./|\./)[^"'><,;|()*\[\]\s][^"'><,;|()]{1,}|[a-zA-Z0-9_\-/]+/[a-zA-Z0-9_\-/.]+\.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml)(?:[\?|#][^"']*)?)["']`)
-func main() {
- sourceType, err := resolveInput(input)
- if err != nil {
- fmt.Printf("[err] %v\n", err)
- os.Exit(1)
- }
-
- if sourceType == "url" {
- baseURL, _ := url.Parse(input)
- crawl(input, baseURL, depth)
- } else {
- content, err := fetchContent("file", input)
- if err != nil {
- fmt.Printf("[err] failed to fetch %s: %v\n", input, err)
- os.Exit(1)
- }
- matches := parseContent(content, defaultRegex)
- printMatches(input, matches)
- }
-}
-
func resolveInput(input string) (string, error) {
if strings.HasPrefix(input, "http://") || strings.HasPrefix(input, "https://") {
return "url", nil
@@ -202,3 +165,59 @@ func resolveURL(base *url.URL, href string) string {
}
return base.ResolveReference(ref).String()
}
+
+func init() {
+ const usageHeader = `
+web crawling and link extraction tool written in Go, inspired by https://github.com/GerbenJavado/LinkFinder
+
+author: heqnx - https://heqnx.com
+
+`
+ flag.Usage = func() {
+ fmt.Fprint(os.Stderr, usageHeader)
+ fmt.Fprintf(os.Stderr, "usage of %s:\n", os.Args[0])
+ flag.PrintDefaults()
+ }
+ flag.CommandLine.SetOutput(os.Stderr)
+}
+
+func main() {
+ flag.StringVar(&input, "input", "", "url or file path")
+ flag.IntVar(&timeout, "timeout", 10, "timeout for http requests in seconds")
+ flag.StringVar(&userAgent, "user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.10 Safari/605.1.1", "set user-agent")
+ flag.Var(&headers, "header", "add http header to request (e.g. -header \"Authorization: Bearer val1\")")
+ flag.IntVar(&depth, "depth", 0, "recursion depth for same-domain links (0 disables crawling)")
+ flag.IntVar(&delay, "delay", 4, "delay between requests in seconds when crawling (only applies if depth > 0)")
+ flag.Parse()
+
+ if flag.NFlag() == 0 && flag.NArg() == 0 {
+ flag.Usage()
+ os.Exit(1)
+ }
+
+ if input == "" {
+ fmt.Printf("[err] input is required. use -input <url|file>\n")
+ os.Exit(1)
+ }
+
+ httpClient.Timeout = time.Duration(timeout) * time.Second
+ sourceType, err := resolveInput(input)
+ if err != nil {
+ fmt.Printf("[err] %v\n", err)
+ os.Exit(1)
+ }
+
+ if sourceType == "url" {
+ baseURL, _ := url.Parse(input)
+ crawl(input, baseURL, depth)
+ } else {
+ content, err := fetchContent("file", input)
+ if err != nil {
+ fmt.Printf("[err] failed to fetch %s: %v\n", input, err)
+ os.Exit(1)
+ }
+ matches := parseContent(content, defaultRegex)
+ printMatches(input, matches)
+ }
+}
+