diff --git a/internal/utils/bytes.go b/internal/utils/bytes.go new file mode 100644 index 0000000..8ed1eba --- /dev/null +++ b/internal/utils/bytes.go @@ -0,0 +1,30 @@ +package utils + +import ( + "unicode" + "unicode/utf8" +) + +var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} + +func IsEmptyBytes(b []byte) bool { + if len(b) == 0 { + return true + } + for i := 0 ; i < len(b); i++ { + c := b[i] + if c == 0 { + continue + } + if utf8.RuneStart(c) { + r, _ := utf8.DecodeRune(b[i:]) + if !unicode.IsSpace(r) { + return false + } + } + if asciiSpace[c] == 0 { + return false + } + } + return true +} diff --git a/internal/utils/errors.go b/internal/utils/errors.go new file mode 100644 index 0000000..f32fcc7 --- /dev/null +++ b/internal/utils/errors.go @@ -0,0 +1,9 @@ +package utils + +var ignoredErrors = []string{ + "too many redirects detected when doing the request", +} + +func IgnoreError(err error) bool { + return StringsContain(ignoredErrors, err.Error()) +} diff --git a/internal/workers/download.go b/internal/workers/download.go index c92ecda..2c81451 100644 --- a/internal/workers/download.go +++ b/internal/workers/download.go @@ -10,7 +10,6 @@ import ( ) func DownloadWorker(c *fasthttp.Client, queue <-chan string, baseUrl, baseDir string, wg *sync.WaitGroup) { - wg.Add(1) defer wg.Done() for file := range queue { if file == "" { @@ -33,7 +32,7 @@ func DownloadWorker(c *fasthttp.Client, queue <-chan string, baseUrl, baseDir st fmt.Printf("warning: %s appears to be an html file, skipping\n", uri) continue } - if len(body) == 0 { + if utils.IsEmptyBytes(body) { fmt.Printf("warning: %s appears to be an empty file, skipping\n", uri) continue } diff --git a/internal/workers/findobjects.go b/internal/workers/findobjects.go index 1ba4ea3..ab716f1 100644 --- a/internal/workers/findobjects.go +++ b/internal/workers/findobjects.go @@ -17,7 +17,6 @@ var checkedObjs = make(map[string]bool) var checkedObjsMutex sync.Mutex func FindObjectsWorker(c *fasthttp.Client, queue chan string, baseUrl, baseDir string, wg *sync.WaitGroup, storage *filesystem.ObjectStorage) { - wg.Add(1) defer wg.Done() var ctr int for { @@ -68,7 +67,7 @@ func FindObjectsWorker(c *fasthttp.Client, queue chan string, baseUrl, baseDir s fmt.Printf("warning: %s appears to be an html file, skipping\n", uri) continue } - if len(body) == 0 { + if utils.IsEmptyBytes(body) { fmt.Printf("warning: %s appears to be an empty file, skipping\n", uri) continue } diff --git a/internal/workers/findref.go b/internal/workers/findref.go index 02fa58e..4a410f7 100644 --- a/internal/workers/findref.go +++ b/internal/workers/findref.go @@ -17,7 +17,6 @@ var checkedRefs = make(map[string]bool) var checkedRefsMutex sync.Mutex func FindRefWorker(c *fasthttp.Client, queue chan string, baseUrl, baseDir string, wg *sync.WaitGroup) { - wg.Add(1) defer wg.Done() var ctr int for { @@ -60,7 +59,7 @@ func FindRefWorker(c *fasthttp.Client, queue chan string, baseUrl, baseDir strin fmt.Printf("warning: %s appears to be an html file, skipping\n", uri) continue } - if len(body) == 0 { + if utils.IsEmptyBytes(body) { fmt.Printf("warning: %s appears to be an empty file, skipping\n", uri) continue } diff --git a/internal/workers/recursivedownload.go b/internal/workers/recursivedownload.go index cbfd5d2..c2e7f58 100644 --- a/internal/workers/recursivedownload.go +++ b/internal/workers/recursivedownload.go @@ -12,7 +12,6 @@ import ( ) func RecursiveDownloadWorker(c *fasthttp.Client, queue chan string, baseUrl, baseDir string, wg *sync.WaitGroup) { - wg.Add(1) defer wg.Done() var ctr int for { diff --git a/pkg/goop/clone.go b/pkg/goop/clone.go index 3362855..c3a051d 100644 --- a/pkg/goop/clone.go +++ b/pkg/goop/clone.go @@ -138,18 +138,20 @@ func FetchGit(baseUrl, baseDir string) error { } if code != 200 { - fmt.Fprintf(os.Stderr, "error: %s/.git/HEAD does not exist", baseUrl) - //return fmt.Errorf("error: %s/.git/HEAD does not exist", baseUrl) + fmt.Fprintf(os.Stderr, "error: %s/.git/HEAD does not exist\n", baseUrl) } else if !bytes.HasPrefix(body, refPrefix) { - fmt.Fprintf(os.Stderr, "error: %s/.git/HEAD is not a git HEAD file", baseUrl) - //return fmt.Errorf("error: %s/.git/HEAD is not a git HEAD file", baseUrl) + fmt.Fprintf(os.Stderr, "error: %s/.git/HEAD is not a git HEAD file\n", baseUrl) } fmt.Printf("[-] Testing %s/.git/ ", baseUrl) code, body, err = c.Get(nil, utils.Url(baseUrl, ".git/")) fmt.Printf("[%d]\n", code) if err != nil { - return err + if utils.IgnoreError(err) { + fmt.Fprintf(os.Stderr, "error: %s\n", err) + } else { + return err + } } if code == 200 && utils.IsHtml(body) { @@ -159,7 +161,8 @@ func FetchGit(baseUrl, baseDir string) error { } if utils.StringsContain(indexedFiles, "HEAD") { fmt.Println("[-] Fetching .git recursively") - queue := createQueue(1000) + queue := createQueue(2000) + wg.Add(maxConcurrency) for w := 1; w <= maxConcurrency; w++ { go workers.RecursiveDownloadWorker(c, queue, baseUrl, baseDir, &wg) } @@ -177,7 +180,9 @@ func FetchGit(baseUrl, baseDir string) error { fmt.Println("[-] Fetching common files") queue := createQueue(len(commonFiles)) - for w := 1; w <= utils.MinInt(maxConcurrency, len(commonFiles)); w++ { + concurrency := utils.MinInt(maxConcurrency, len(commonFiles)) + wg.Add(concurrency) + for w := 1; w <= concurrency; w++ { go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg) } for _, f := range commonFiles { @@ -188,6 +193,7 @@ func FetchGit(baseUrl, baseDir string) error { fmt.Println("[-] Finding refs") queue = createQueue(100) + wg.Add(maxConcurrency) for w := 1; w <= maxConcurrency; w++ { go workers.FindRefWorker(c, queue, baseUrl, baseDir, &wg) } @@ -205,7 +211,9 @@ func FetchGit(baseUrl, baseDir string) error { } hashes := packRegex.FindAllSubmatch(infoPacks, -1) queue = createQueue(len(hashes) * 3) - for w := 1; w <= utils.MinInt(maxConcurrency, len(hashes)); w++ { + concurrency := utils.MinInt(maxConcurrency, len(hashes)) + wg.Add(concurrency) + for w := 1; w <= concurrency; w++ { go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg) } for _, sha1 := range hashes { @@ -225,6 +233,7 @@ func FetchGit(baseUrl, baseDir string) error { utils.Url(baseDir, ".git/info/refs"), utils.Url(baseDir, ".git/FETCH_HEAD"), utils.Url(baseDir, ".git/ORIG_HEAD"), + utils.Url(baseDir, ".git/HEAD"), } gitRefsDir := utils.Url(baseDir, ".git/refs") @@ -315,6 +324,7 @@ func FetchGit(baseUrl, baseDir string) error { fmt.Println("[-] Fetching objects") queue = createQueue(2000) + wg.Add(maxConcurrency) for w := 1; w <= maxConcurrency; w++ { go workers.FindObjectsWorker(c, queue, baseUrl, baseDir, &wg, storage) }