mirror of https://github.com/nyancrimew/goop.git
performance and edge case handling improvements
+ new, better and cuter logging
This commit is contained in:
parent
ed1f0311a2
commit
08d74d8c9b
13
cmd/goop.go
13
cmd/goop.go
|
@ -1,10 +1,11 @@
|
||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"github.com/deletescape/goop/pkg/goop"
|
|
||||||
"github.com/spf13/cobra"
|
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"github.com/deletescape/goop/pkg/goop"
|
||||||
|
"github.com/phuslu/log"
|
||||||
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
|
||||||
var force bool
|
var force bool
|
||||||
|
@ -21,12 +22,12 @@ var rootCmd = &cobra.Command{
|
||||||
}
|
}
|
||||||
if list {
|
if list {
|
||||||
if err := goop.CloneList(args[0], dir, force, keep); err != nil {
|
if err := goop.CloneList(args[0], dir, force, keep); err != nil {
|
||||||
fmt.Fprintln(os.Stderr, err)
|
log.Error().Err(err).Msg("exiting")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if err := goop.Clone(args[0], dir, force, keep); err != nil {
|
if err := goop.Clone(args[0], dir, force, keep); err != nil {
|
||||||
fmt.Fprintln(os.Stderr, err)
|
log.Error().Err(err).Msg("exiting")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,7 +42,7 @@ func init() {
|
||||||
|
|
||||||
func Execute() {
|
func Execute() {
|
||||||
if err := rootCmd.Execute(); err != nil {
|
if err := rootCmd.Execute(); err != nil {
|
||||||
fmt.Fprintln(os.Stderr, err)
|
log.Error().Err(err).Msg("exiting")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
1
go.mod
1
go.mod
|
@ -6,6 +6,7 @@ require (
|
||||||
github.com/PuerkitoBio/goquery v1.6.0
|
github.com/PuerkitoBio/goquery v1.6.0
|
||||||
github.com/go-git/go-billy/v5 v5.0.0
|
github.com/go-git/go-billy/v5 v5.0.0
|
||||||
github.com/go-git/go-git/v5 v5.2.0
|
github.com/go-git/go-git/v5 v5.2.0
|
||||||
|
github.com/phuslu/log v1.0.75
|
||||||
github.com/spf13/cobra v1.1.1
|
github.com/spf13/cobra v1.1.1
|
||||||
github.com/valyala/fasthttp v1.16.0
|
github.com/valyala/fasthttp v1.16.0
|
||||||
)
|
)
|
||||||
|
|
2
go.sum
2
go.sum
|
@ -164,6 +164,8 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLA
|
||||||
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
|
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
|
||||||
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
|
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
|
||||||
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
|
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
|
||||||
|
github.com/phuslu/log v1.0.75 h1:2Qcqgwo1sOsvj7QIuclIS92hmWxIISI2+XskYM1Nw2A=
|
||||||
|
github.com/phuslu/log v1.0.75/go.mod h1:kzJN3LRifrepxThMjufQwS7S35yFAB+jAV1qgA7eBW4=
|
||||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
|
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
|
||||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
package jobtracker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type JobTracker struct {
|
||||||
|
activeWorkers int32
|
||||||
|
queuedJobs int32
|
||||||
|
didWork bool
|
||||||
|
cond *sync.Cond
|
||||||
|
Queue chan string
|
||||||
|
}
|
||||||
|
|
||||||
|
func Nap() {
|
||||||
|
time.Sleep(40 * time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewJobTracker() *JobTracker {
|
||||||
|
return &JobTracker{
|
||||||
|
cond: sync.NewCond(&sync.Mutex{}),
|
||||||
|
Queue: make(chan string, 999999), // TODO: dont create oversized queues, we should try to save memory; maybe read the channel docs again
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jt *JobTracker) AddJob(job string) {
|
||||||
|
// TODO: can we discard empty jobs here?
|
||||||
|
jt.cond.L.Lock()
|
||||||
|
atomic.AddInt32(&jt.queuedJobs, 1)
|
||||||
|
jt.Queue <- job
|
||||||
|
jt.cond.L.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jt *JobTracker) StartWork() {
|
||||||
|
atomic.AddInt32(&jt.activeWorkers, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jt *JobTracker) EndWork() {
|
||||||
|
jt.didWork = true
|
||||||
|
atomic.AddInt32(&jt.activeWorkers, -1)
|
||||||
|
atomic.AddInt32(&jt.queuedJobs, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jt *JobTracker) HasWork() bool {
|
||||||
|
// TODO: didWork is a somewhat ugly workaround to ensure we dont exit before doing work at least once,
|
||||||
|
// this will however result in locking up if we create a JobTracker but never queue any jobs
|
||||||
|
hasWork := !jt.didWork || (atomic.LoadInt32(&jt.queuedJobs) > 0 && atomic.LoadInt32(&jt.activeWorkers) > 0)
|
||||||
|
|
||||||
|
if !hasWork {
|
||||||
|
jt.cond.Broadcast()
|
||||||
|
}
|
||||||
|
return hasWork
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jt *JobTracker) Wait() {
|
||||||
|
defer close(jt.Queue)
|
||||||
|
|
||||||
|
jt.cond.L.Lock()
|
||||||
|
for jt.HasWork() {
|
||||||
|
jt.cond.Wait()
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,9 +2,10 @@ package utils
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"github.com/PuerkitoBio/goquery"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
)
|
)
|
||||||
|
|
||||||
var htmlTag = []byte{'<', 'h', 't', 'm', 'l'}
|
var htmlTag = []byte{'<', 'h', 't', 'm', 'l'}
|
||||||
|
@ -36,5 +37,5 @@ func GetIndexedFiles(body []byte) ([]string, error) {
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
return files, err
|
return files, exitErr
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package utils
|
||||||
|
|
||||||
import "strings"
|
import "strings"
|
||||||
|
|
||||||
|
//TODO: replace all uses of this with the proper path utils
|
||||||
func Url(base, path string) string {
|
func Url(base, path string) string {
|
||||||
return strings.TrimSuffix(base, "/") + "/" + strings.TrimPrefix(path, "/")
|
return strings.TrimSuffix(base, "/") + "/" + strings.TrimPrefix(path, "/")
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
package workers
|
|
||||||
|
|
||||||
import "time"
|
|
||||||
|
|
||||||
const (
|
|
||||||
gracePeriod = 350 * time.Millisecond
|
|
||||||
graceTimes = 15
|
|
||||||
)
|
|
|
@ -1,65 +1,73 @@
|
||||||
package workers
|
package workers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"github.com/deletescape/goop/internal/utils"
|
|
||||||
"github.com/valyala/fasthttp"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
|
||||||
"time"
|
"github.com/deletescape/goop/internal/jobtracker"
|
||||||
|
"github.com/deletescape/goop/internal/utils"
|
||||||
|
"github.com/phuslu/log"
|
||||||
|
"github.com/valyala/fasthttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
func DownloadWorker(c *fasthttp.Client, queue chan string, baseUrl, baseDir string, wg *sync.WaitGroup, allowHtml bool) {
|
func DownloadWorker(c *fasthttp.Client, baseUrl, baseDir string, jt *jobtracker.JobTracker, allowHtml, allowEmpty bool) {
|
||||||
defer wg.Done()
|
|
||||||
var ctr int
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case file := <-queue:
|
case file := <-jt.Queue:
|
||||||
|
downloadWork(c, baseUrl, baseDir, file, jt, allowHtml, allowEmpty)
|
||||||
checkRatelimted()
|
|
||||||
if file == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
targetFile := utils.Url(baseDir, file)
|
|
||||||
if utils.Exists(targetFile) {
|
|
||||||
fmt.Printf("%s was downloaded already, skipping\n", targetFile)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
uri := utils.Url(baseUrl, file)
|
|
||||||
code, body, err := c.Get(nil, uri)
|
|
||||||
fmt.Printf("[-] Fetching %s [%d]\n", uri, code)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if code == 200 {
|
|
||||||
if !allowHtml && utils.IsHtml(body) {
|
|
||||||
fmt.Printf("warning: %s appears to be an html file, skipping\n", uri)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if utils.IsEmptyBytes(body) {
|
|
||||||
fmt.Printf("warning: %s appears to be an empty file, skipping\n", uri)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := utils.CreateParentFolders(targetFile); err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := ioutil.WriteFile(targetFile, body, os.ModePerm); err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
}
|
|
||||||
} else if code == 429 {
|
|
||||||
setRatelimited()
|
|
||||||
queue <- file
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
// TODO: get rid of dirty hack somehow
|
if !jt.HasWork() {
|
||||||
if ctr >= graceTimes {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ctr++
|
jobtracker.Nap()
|
||||||
time.Sleep(gracePeriod)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func downloadWork(c *fasthttp.Client, baseUrl, baseDir, file string, jt *jobtracker.JobTracker, allowHtml, allowEmpty bool) {
|
||||||
|
jt.StartWork()
|
||||||
|
defer jt.EndWork()
|
||||||
|
|
||||||
|
if file == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
checkRatelimted()
|
||||||
|
|
||||||
|
targetFile := utils.Url(baseDir, file)
|
||||||
|
if utils.Exists(targetFile) {
|
||||||
|
log.Info().Str("file", targetFile).Msg("already fetched, skipping redownload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
uri := utils.Url(baseUrl, file)
|
||||||
|
code, body, err := c.Get(nil, uri)
|
||||||
|
if err == nil && code != 200 {
|
||||||
|
if code == 429 {
|
||||||
|
setRatelimited()
|
||||||
|
jt.AddJob(file)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Warn().Str("uri", uri).Int("code", code).Msg("couldn't fetch file")
|
||||||
|
return
|
||||||
|
} else if err != nil {
|
||||||
|
log.Error().Str("uri", uri).Int("code", code).Err(err).Msg("couldn't fetch file")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !allowHtml && utils.IsHtml(body) {
|
||||||
|
log.Warn().Str("uri", uri).Msg("file appears to be html, skipping")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !allowEmpty && utils.IsEmptyBytes(body) {
|
||||||
|
log.Warn().Str("uri", uri).Msg("file appears to be empty, skipping")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := utils.CreateParentFolders(targetFile); err != nil {
|
||||||
|
log.Error().Str("uri", uri).Str("file", targetFile).Err(err).Msg("couldn't create parent directories")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := ioutil.WriteFile(targetFile, body, os.ModePerm); err != nil {
|
||||||
|
log.Error().Str("uri", uri).Str("file", targetFile).Err(err).Msg("clouldn't write file")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Info().Str("uri", uri).Str("file", file).Msg("fetched file")
|
||||||
|
}
|
||||||
|
|
|
@ -2,110 +2,123 @@ package workers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/deletescape/goop/internal/jobtracker"
|
||||||
"github.com/deletescape/goop/internal/utils"
|
"github.com/deletescape/goop/internal/utils"
|
||||||
"github.com/go-git/go-git/v5/plumbing"
|
"github.com/go-git/go-git/v5/plumbing"
|
||||||
"github.com/go-git/go-git/v5/plumbing/object"
|
"github.com/go-git/go-git/v5/plumbing/object"
|
||||||
"github.com/go-git/go-git/v5/storage/filesystem"
|
"github.com/go-git/go-git/v5/storage/filesystem"
|
||||||
|
"github.com/phuslu/log"
|
||||||
"github.com/valyala/fasthttp"
|
"github.com/valyala/fasthttp"
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var checkedObjs = make(map[string]bool)
|
var checkedObjs = make(map[string]bool)
|
||||||
var checkedObjsMutex sync.Mutex
|
var checkedObjsMutex sync.Mutex
|
||||||
|
|
||||||
func FindObjectsWorker(c *fasthttp.Client, queue chan string, baseUrl, baseDir string, wg *sync.WaitGroup, storage *filesystem.ObjectStorage) {
|
func FindObjectsWorker(c *fasthttp.Client, baseUrl, baseDir string, jt *jobtracker.JobTracker, storage *filesystem.ObjectStorage) {
|
||||||
defer wg.Done()
|
|
||||||
var ctr int
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case obj := <-queue:
|
case obj := <-jt.Queue:
|
||||||
checkRatelimted()
|
findObjWork(c, baseUrl, baseDir, obj, jt, storage)
|
||||||
if obj == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
ctr = 0
|
|
||||||
checkedObjsMutex.Lock()
|
|
||||||
if checked, ok := checkedObjs[obj]; checked && ok {
|
|
||||||
// Obj has already been checked
|
|
||||||
checkedObjsMutex.Unlock()
|
|
||||||
continue
|
|
||||||
} else {
|
|
||||||
checkedObjs[obj] = true
|
|
||||||
}
|
|
||||||
checkedObjsMutex.Unlock()
|
|
||||||
file := fmt.Sprintf(".git/objects/%s/%s", obj[:2], obj[2:])
|
|
||||||
fullPath := utils.Url(baseDir, file)
|
|
||||||
if utils.Exists(fullPath) {
|
|
||||||
fmt.Printf("%s was downloaded already, skipping\n", fullPath)
|
|
||||||
encObj, err := storage.EncodedObject(plumbing.AnyObject, plumbing.NewHash(obj))
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
decObj, err := object.DecodeObject(storage, encObj)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
referencedHashes := utils.GetReferencedHashes(decObj)
|
|
||||||
for _, h := range referencedHashes {
|
|
||||||
queue <- h
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
uri := utils.Url(baseUrl, file)
|
|
||||||
code, body, err := c.Get(nil, uri)
|
|
||||||
fmt.Printf("[-] Fetching %s [%d]\n", uri, code)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if code == 200 {
|
|
||||||
if utils.IsHtml(body) {
|
|
||||||
fmt.Printf("warning: %s appears to be an html file, skipping\n", uri)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if utils.IsEmptyBytes(body) {
|
|
||||||
fmt.Printf("warning: %s appears to be an empty file, skipping\n", uri)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := utils.CreateParentFolders(fullPath); err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := ioutil.WriteFile(fullPath, body, os.ModePerm); err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
encObj, err := storage.EncodedObject(plumbing.AnyObject, plumbing.NewHash(obj))
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
decObj, err := object.DecodeObject(storage, encObj)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
referencedHashes := utils.GetReferencedHashes(decObj)
|
|
||||||
for _, h := range referencedHashes {
|
|
||||||
queue <- h
|
|
||||||
}
|
|
||||||
} else if code == 429 {
|
|
||||||
setRatelimited()
|
|
||||||
queue <- obj
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
// TODO: get rid of dirty hack somehow
|
if !jt.HasWork() {
|
||||||
if ctr >= graceTimes {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ctr++
|
jobtracker.Nap()
|
||||||
time.Sleep(gracePeriod)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func findObjWork(c *fasthttp.Client, baseUrl, baseDir, obj string, jt *jobtracker.JobTracker, storage *filesystem.ObjectStorage) {
|
||||||
|
jt.StartWork()
|
||||||
|
defer jt.EndWork()
|
||||||
|
|
||||||
|
if obj == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
checkRatelimted()
|
||||||
|
|
||||||
|
checkedObjsMutex.Lock()
|
||||||
|
if checked, ok := checkedObjs[obj]; checked && ok {
|
||||||
|
// Obj has already been checked
|
||||||
|
checkedObjsMutex.Unlock()
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
checkedObjs[obj] = true
|
||||||
|
}
|
||||||
|
checkedObjsMutex.Unlock()
|
||||||
|
|
||||||
|
file := fmt.Sprintf(".git/objects/%s/%s", obj[:2], obj[2:])
|
||||||
|
fullPath := utils.Url(baseDir, file)
|
||||||
|
if utils.Exists(fullPath) {
|
||||||
|
log.Info().Str("obj", obj).Msg("already fetched, skipping redownload")
|
||||||
|
encObj, err := storage.EncodedObject(plumbing.AnyObject, plumbing.NewHash(obj))
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Str("obj", obj).Err(err).Msg("couldn't read object")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
decObj, err := object.DecodeObject(storage, encObj)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Str("obj", obj).Err(err).Msg("couldn't decode object")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
referencedHashes := utils.GetReferencedHashes(decObj)
|
||||||
|
for _, h := range referencedHashes {
|
||||||
|
jt.AddJob(h)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
uri := utils.Url(baseUrl, file)
|
||||||
|
code, body, err := c.Get(nil, uri)
|
||||||
|
if err == nil && code != 200 {
|
||||||
|
if code == 429 {
|
||||||
|
setRatelimited()
|
||||||
|
jt.AddJob(obj)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Warn().Str("obj", obj).Int("code", code).Msg("failed to fetch object")
|
||||||
|
return
|
||||||
|
} else if err != nil {
|
||||||
|
log.Error().Str("obj", obj).Int("code", code).Err(err).Msg("failed to fetch object")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if utils.IsHtml(body) {
|
||||||
|
log.Warn().Str("uri", uri).Msg("file appears to be html, skipping")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if utils.IsEmptyBytes(body) {
|
||||||
|
log.Warn().Str("uri", uri).Msg("file appears to be empty, skipping")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := utils.CreateParentFolders(fullPath); err != nil {
|
||||||
|
log.Error().Str("uri", uri).Str("file", fullPath).Err(err).Msg("couldn't create parent directories")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := ioutil.WriteFile(fullPath, body, os.ModePerm); err != nil {
|
||||||
|
log.Error().Str("uri", uri).Str("file", fullPath).Err(err).Msg("clouldn't write file")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info().Str("obj", obj).Msg("fetched object")
|
||||||
|
|
||||||
|
encObj, err := storage.EncodedObject(plumbing.AnyObject, plumbing.NewHash(obj))
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Str("obj", obj).Err(err).Msg("couldn't read object")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
decObj, err := object.DecodeObject(storage, encObj)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Str("obj", obj).Err(err).Msg("couldn't decode object")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
referencedHashes := utils.GetReferencedHashes(decObj)
|
||||||
|
for _, h := range referencedHashes {
|
||||||
|
jt.AddJob(h)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
package workers
|
package workers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"github.com/deletescape/goop/internal/utils"
|
|
||||||
"github.com/valyala/fasthttp"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
|
||||||
|
"github.com/deletescape/goop/internal/jobtracker"
|
||||||
|
"github.com/deletescape/goop/internal/utils"
|
||||||
|
"github.com/phuslu/log"
|
||||||
|
"github.com/valyala/fasthttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
var refRegex = regexp.MustCompile(`(?m)(refs(/[a-zA-Z0-9\-\.\_\*]+)+)`)
|
var refRegex = regexp.MustCompile(`(?m)(refs(/[a-zA-Z0-9\-\.\_\*]+)+)`)
|
||||||
|
@ -17,92 +18,102 @@ var branchRegex = regexp.MustCompile(`(?m)branch ["'](.+)["']`)
|
||||||
var checkedRefs = make(map[string]bool)
|
var checkedRefs = make(map[string]bool)
|
||||||
var checkedRefsMutex sync.Mutex
|
var checkedRefsMutex sync.Mutex
|
||||||
|
|
||||||
func FindRefWorker(c *fasthttp.Client, queue chan string, baseUrl, baseDir string, wg *sync.WaitGroup) {
|
func FindRefWorker(c *fasthttp.Client, baseUrl, baseDir string, jt *jobtracker.JobTracker) {
|
||||||
defer wg.Done()
|
|
||||||
var ctr int
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case path := <-queue:
|
case path := <-jt.Queue:
|
||||||
checkRatelimted()
|
findRefWork(c, baseUrl, baseDir, path, jt)
|
||||||
if path == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
ctr = 0
|
|
||||||
checkedRefsMutex.Lock()
|
|
||||||
if checked, ok := checkedRefs[path]; checked && ok {
|
|
||||||
// Ref has already been checked
|
|
||||||
checkedRefsMutex.Unlock()
|
|
||||||
continue
|
|
||||||
} else {
|
|
||||||
checkedRefs[path] = true
|
|
||||||
}
|
|
||||||
checkedRefsMutex.Unlock()
|
|
||||||
targetFile := utils.Url(baseDir, path)
|
|
||||||
if utils.Exists(targetFile) {
|
|
||||||
fmt.Printf("%s was downloaded already, skipping\n", targetFile)
|
|
||||||
content, err := ioutil.ReadFile(targetFile)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
}
|
|
||||||
for _, ref := range refRegex.FindAll(content, -1) {
|
|
||||||
queue <- utils.Url(".git", string(ref))
|
|
||||||
queue <- utils.Url(".git/logs", string(ref))
|
|
||||||
}
|
|
||||||
if path == ".git/config" || path == ".git/FETCH_HEAD" {
|
|
||||||
// TODO check the actual origin instead of just assuming origin here
|
|
||||||
for _, branch := range branchRegex.FindAllSubmatch(content, -1) {
|
|
||||||
queue <- utils.Url(".git/refs/remotes/origin", string(branch[1]))
|
|
||||||
queue <- utils.Url(".git/logs/refs/remotes/origin", string(branch[1]))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
uri := utils.Url(baseUrl, path)
|
|
||||||
code, body, err := c.Get(nil, uri)
|
|
||||||
fmt.Printf("[-] Fetching %s [%d]\n", uri, code)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if code == 200 {
|
|
||||||
if utils.IsHtml(body) {
|
|
||||||
fmt.Printf("warning: %s appears to be an html file, skipping\n", uri)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if utils.IsEmptyBytes(body) {
|
|
||||||
fmt.Printf("warning: %s appears to be an empty file, skipping\n", uri)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := utils.CreateParentFolders(targetFile); err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := ioutil.WriteFile(targetFile, body, os.ModePerm); err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, ref := range refRegex.FindAll(body, -1) {
|
|
||||||
queue <- utils.Url(".git", string(ref))
|
|
||||||
queue <- utils.Url(".git/logs", string(ref))
|
|
||||||
}
|
|
||||||
if path == ".git/config" || path == ".git/FETCH_HEAD" {
|
|
||||||
// TODO check the actual origin instead of just assuming origin here
|
|
||||||
for _, branch := range branchRegex.FindAllSubmatch(body, -1) {
|
|
||||||
queue <- utils.Url(".git/refs/remotes/origin", string(branch[1]))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if code == 429 {
|
|
||||||
setRatelimited()
|
|
||||||
queue <- path
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
// TODO: get rid of dirty hack somehow
|
if !jt.HasWork() {
|
||||||
if ctr >= graceTimes {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ctr++
|
jobtracker.Nap()
|
||||||
time.Sleep(gracePeriod)
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func findRefWork(c *fasthttp.Client, baseUrl, baseDir, path string, jt *jobtracker.JobTracker) {
|
||||||
|
jt.StartWork()
|
||||||
|
defer jt.EndWork()
|
||||||
|
|
||||||
|
// TODO: do we still need this check here?
|
||||||
|
if path == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
checkedRefsMutex.Lock()
|
||||||
|
if checked, ok := checkedRefs[path]; checked && ok {
|
||||||
|
// Ref has already been checked
|
||||||
|
checkedRefsMutex.Unlock()
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
checkedRefs[path] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
targetFile := utils.Url(baseDir, path)
|
||||||
|
if utils.Exists(targetFile) {
|
||||||
|
log.Info().Str("file", targetFile).Msg("already fetched, skipping redownload")
|
||||||
|
content, err := ioutil.ReadFile(targetFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Str("file", targetFile).Err(err).Msg("error while reading file")
|
||||||
|
}
|
||||||
|
for _, ref := range refRegex.FindAll(content, -1) {
|
||||||
|
jt.AddJob(utils.Url(".git", string(ref)))
|
||||||
|
jt.AddJob(utils.Url(".git/logs", string(ref)))
|
||||||
|
}
|
||||||
|
if path == ".git/config" || path == ".git/FETCH_HEAD" {
|
||||||
|
// TODO check the actual origin instead of just assuming origin here
|
||||||
|
for _, branch := range branchRegex.FindAllSubmatch(content, -1) {
|
||||||
|
jt.AddJob(utils.Url(".git/refs/remotes/origin", string(branch[1])))
|
||||||
|
jt.AddJob(utils.Url(".git/logs/refs/remotes/origin", string(branch[1])))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
uri := utils.Url(baseUrl, path)
|
||||||
|
code, body, err := c.Get(nil, uri)
|
||||||
|
if err == nil && code != 200 {
|
||||||
|
if code == 429 {
|
||||||
|
setRatelimited()
|
||||||
|
jt.AddJob(path)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Warn().Str("uri", uri).Int("code", code).Msg("failed to fetch ref")
|
||||||
|
return
|
||||||
|
} else if err != nil {
|
||||||
|
log.Error().Str("uri", uri).Int("code", code).Err(err).Msg("failed to fetch ref")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if utils.IsHtml(body) {
|
||||||
|
log.Warn().Str("uri", uri).Msg("file appears to be html, skipping")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if utils.IsEmptyBytes(body) {
|
||||||
|
log.Warn().Str("uri", uri).Msg("file appears to be empty, skipping")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := utils.CreateParentFolders(targetFile); err != nil {
|
||||||
|
log.Error().Str("uri", uri).Str("file", targetFile).Err(err).Msg("couldn't create parent directories")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := ioutil.WriteFile(targetFile, body, os.ModePerm); err != nil {
|
||||||
|
log.Error().Str("uri", uri).Str("file", targetFile).Err(err).Msg("clouldn't write file")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info().Str("uri", uri).Msg("fetched ref")
|
||||||
|
|
||||||
|
for _, ref := range refRegex.FindAll(body, -1) {
|
||||||
|
jt.AddJob(utils.Url(".git", string(ref)))
|
||||||
|
jt.AddJob(utils.Url(".git/logs", string(ref)))
|
||||||
|
}
|
||||||
|
if path == ".git/config" || path == ".git/FETCH_HEAD" {
|
||||||
|
// TODO check the actual origin instead of just assuming origin here
|
||||||
|
for _, branch := range branchRegex.FindAllSubmatch(body, -1) {
|
||||||
|
jt.AddJob(utils.Url(".git/refs/remotes/origin", string(branch[1])))
|
||||||
|
jt.AddJob(utils.Url(".git/logs/refs/remotes/origin", string(branch[1])))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
package workers
|
package workers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/phuslu/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
var rateLimited int32
|
var rateLimited int32
|
||||||
|
@ -13,7 +14,7 @@ var unsetter int32
|
||||||
func setRatelimited() {
|
func setRatelimited() {
|
||||||
if atomic.CompareAndSwapInt32(&rateLimited, 0, 1) {
|
if atomic.CompareAndSwapInt32(&rateLimited, 0, 1) {
|
||||||
atomic.StoreUint32(&ratelimitCount, atomic.LoadUint32(&ratelimitCount)+1)
|
atomic.StoreUint32(&ratelimitCount, atomic.LoadUint32(&ratelimitCount)+1)
|
||||||
fmt.Println("[-] Server is rate limiting us, starting to wait")
|
log.Warn().Uint32("count", atomic.LoadUint32(&ratelimitCount)).Msg("server is rate limiting us, waiting...")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,74 +1,87 @@
|
||||||
package workers
|
package workers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"github.com/deletescape/goop/internal/utils"
|
|
||||||
"github.com/valyala/fasthttp"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"time"
|
"github.com/deletescape/goop/internal/jobtracker"
|
||||||
|
"github.com/deletescape/goop/internal/utils"
|
||||||
|
"github.com/phuslu/log"
|
||||||
|
"github.com/valyala/fasthttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
func RecursiveDownloadWorker(c *fasthttp.Client, queue chan string, baseUrl, baseDir string, wg *sync.WaitGroup) {
|
func RecursiveDownloadWorker(c *fasthttp.Client, baseUrl, baseDir string, jt *jobtracker.JobTracker) {
|
||||||
defer wg.Done()
|
|
||||||
var ctr int
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case f := <-queue:
|
case f, ok := <-jt.Queue:
|
||||||
checkRatelimted()
|
if ok {
|
||||||
if f == "" {
|
recursiveDownload(c, baseUrl, baseDir, f, jt)
|
||||||
continue
|
|
||||||
}
|
|
||||||
ctr = 0
|
|
||||||
filePath := utils.Url(baseDir, f)
|
|
||||||
isDir := strings.HasSuffix(f, "/")
|
|
||||||
if !isDir && utils.Exists(filePath) {
|
|
||||||
fmt.Printf("%s was downloaded already, skipping\n", filePath)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
uri := utils.Url(baseUrl, f)
|
|
||||||
code, body, err := c.Get(nil, uri)
|
|
||||||
fmt.Printf("[-] Fetching %s [%d]\n", uri, code)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if code == 429 {
|
|
||||||
setRatelimited()
|
|
||||||
queue <- f
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if isDir {
|
|
||||||
if !utils.IsHtml(body) {
|
|
||||||
fmt.Printf("warning: %s doesn't appear to be an index", uri)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
indexedFiles, err := utils.GetIndexedFiles(body)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, idxf := range indexedFiles {
|
|
||||||
queue <- utils.Url(f, idxf)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if err := utils.CreateParentFolders(filePath); err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := ioutil.WriteFile(filePath, body, os.ModePerm); err != nil {
|
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
// TODO: get rid of dirty hack somehow
|
if !jt.HasWork() {
|
||||||
if ctr >= graceTimes {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ctr++
|
jobtracker.Nap()
|
||||||
time.Sleep(gracePeriod)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func recursiveDownload(c *fasthttp.Client, baseUrl, baseDir, f string, jt *jobtracker.JobTracker) {
|
||||||
|
jt.StartWork()
|
||||||
|
defer jt.EndWork()
|
||||||
|
|
||||||
|
if f == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
checkRatelimted()
|
||||||
|
|
||||||
|
filePath := utils.Url(baseDir, f)
|
||||||
|
isDir := strings.HasSuffix(f, "/")
|
||||||
|
if !isDir && utils.Exists(filePath) {
|
||||||
|
log.Info().Str("file", filePath).Msg("already fetched, skipping redownload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
uri := utils.Url(baseUrl, f)
|
||||||
|
code, body, err := c.Get(nil, uri)
|
||||||
|
if err == nil && code != 200 {
|
||||||
|
if code == 429 {
|
||||||
|
setRatelimited()
|
||||||
|
jt.AddJob(f)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Warn().Str("uri", uri).Int("code", code).Msg("failed to fetch file")
|
||||||
|
return
|
||||||
|
} else if err != nil {
|
||||||
|
log.Error().Str("uri", uri).Int("code", code).Err(err).Msg("failed to fetch file")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if isDir {
|
||||||
|
if !utils.IsHtml(body) {
|
||||||
|
log.Warn().Str("uri", uri).Msg("not a directory index, skipping")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
indexedFiles, err := utils.GetIndexedFiles(body)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Str("uri", uri).Err(err).Msg("couldn't get list of indexed files")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Info().Str("uri", uri).Msg("fetched directory listing")
|
||||||
|
for _, idxf := range indexedFiles {
|
||||||
|
jt.AddJob(utils.Url(f, idxf))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err := utils.CreateParentFolders(filePath); err != nil {
|
||||||
|
log.Error().Str("file", filePath).Err(err).Msg("couldn't create parent directories")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := ioutil.WriteFile(filePath, body, os.ModePerm); err != nil {
|
||||||
|
log.Error().Str("file", filePath).Err(err).Msg("couldn't write to file")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Info().Str("uri", uri).Msg("fetched file")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
20
main.go
20
main.go
|
@ -1,7 +1,23 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import "github.com/deletescape/goop/cmd"
|
import (
|
||||||
|
"os"
|
||||||
|
|
||||||
func main() {
|
"github.com/deletescape/goop/cmd"
|
||||||
|
"github.com/phuslu/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if log.IsTerminal(os.Stderr.Fd()) {
|
||||||
|
log.DefaultLogger = log.Logger{
|
||||||
|
TimeFormat: "15:04:05",
|
||||||
|
Caller: 1,
|
||||||
|
Writer: &log.ConsoleWriter{
|
||||||
|
ColorOutput: true,
|
||||||
|
QuoteString: true,
|
||||||
|
EndWithMessage: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
cmd.Execute()
|
cmd.Execute()
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,15 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/deletescape/goop/internal/jobtracker"
|
||||||
"github.com/deletescape/goop/internal/utils"
|
"github.com/deletescape/goop/internal/utils"
|
||||||
"github.com/deletescape/goop/internal/workers"
|
"github.com/deletescape/goop/internal/workers"
|
||||||
"github.com/go-git/go-billy/v5/osfs"
|
"github.com/go-git/go-billy/v5/osfs"
|
||||||
|
@ -14,17 +23,11 @@ import (
|
||||||
"github.com/go-git/go-git/v5/plumbing/object"
|
"github.com/go-git/go-git/v5/plumbing/object"
|
||||||
"github.com/go-git/go-git/v5/storage/filesystem"
|
"github.com/go-git/go-git/v5/storage/filesystem"
|
||||||
"github.com/go-git/go-git/v5/storage/filesystem/dotgit"
|
"github.com/go-git/go-git/v5/storage/filesystem/dotgit"
|
||||||
|
"github.com/phuslu/log"
|
||||||
"github.com/valyala/fasthttp"
|
"github.com/valyala/fasthttp"
|
||||||
"io/ioutil"
|
|
||||||
"net/url"
|
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TODO: support proxy environment variables
|
||||||
var c = &fasthttp.Client{
|
var c = &fasthttp.Client{
|
||||||
Name: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
|
Name: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
|
||||||
MaxConnsPerHost: utils.MaxInt(maxConcurrency+250, fasthttp.DefaultMaxConnsPerHost),
|
MaxConnsPerHost: utils.MaxInt(maxConcurrency+250, fasthttp.DefaultMaxConnsPerHost),
|
||||||
|
@ -35,18 +38,6 @@ var c = &fasthttp.Client{
|
||||||
MaxConnWaitTimeout: 10 * time.Second,
|
MaxConnWaitTimeout: 10 * time.Second,
|
||||||
}
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
|
|
||||||
func createQueue(scale int) chan string {
|
|
||||||
wg = sync.WaitGroup{}
|
|
||||||
return make(chan string, maxConcurrency*scale)
|
|
||||||
}
|
|
||||||
|
|
||||||
func waitForQueue(queue chan string) {
|
|
||||||
wg.Wait()
|
|
||||||
close(queue)
|
|
||||||
}
|
|
||||||
|
|
||||||
func CloneList(listFile, baseDir string, force, keep bool) error {
|
func CloneList(listFile, baseDir string, force, keep bool) error {
|
||||||
lf, err := os.Open(listFile)
|
lf, err := os.Open(listFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -64,17 +55,15 @@ func CloneList(listFile, baseDir string, force, keep bool) error {
|
||||||
if dir != "" {
|
if dir != "" {
|
||||||
parsed, err := url.Parse(u)
|
parsed, err := url.Parse(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
log.Error().Str("uri", u).Err(err).Msg("couldn't parse uri")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
dir = utils.Url(dir, parsed.Host)
|
dir = utils.Url(dir, parsed.Host)
|
||||||
}
|
}
|
||||||
fmt.Printf("[-] Downloading %s to %s\n", u, dir)
|
log.Info().Str("target", u).Str("dir", dir).Bool("force", force).Bool("keep", keep).Msg("starting download")
|
||||||
if err := Clone(u, dir, force, keep); err != nil {
|
if err := Clone(u, dir, force, keep); err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
log.Error().Str("target", u).Str("dir", dir).Bool("force", force).Bool("keep", keep).Msg("download failed")
|
||||||
}
|
}
|
||||||
fmt.Println()
|
|
||||||
fmt.Println()
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -100,55 +89,46 @@ func Clone(u, dir string, force, keep bool) error {
|
||||||
baseDir = parsed.Host
|
baseDir = parsed.Host
|
||||||
}
|
}
|
||||||
|
|
||||||
if !utils.Exists(baseDir) {
|
if utils.Exists(baseDir) {
|
||||||
if err := os.MkdirAll(baseDir, os.ModePerm); err != nil {
|
if !utils.IsFolder(baseDir) {
|
||||||
|
return fmt.Errorf("%s is not a directory", baseDir)
|
||||||
|
}
|
||||||
|
isEmpty, err := utils.IsEmpty(baseDir)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
if !isEmpty {
|
||||||
if !utils.IsFolder(baseDir) {
|
if force {
|
||||||
return fmt.Errorf("%s is not a directory", dir)
|
|
||||||
}
|
|
||||||
isEmpty, err := utils.IsEmpty(baseDir)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if !isEmpty {
|
|
||||||
if force || keep {
|
|
||||||
if !keep {
|
|
||||||
if err := os.RemoveAll(baseDir); err != nil {
|
if err := os.RemoveAll(baseDir); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := os.MkdirAll(baseDir, os.ModePerm); err != nil {
|
} else if !keep {
|
||||||
return err
|
return fmt.Errorf("%s is not empty", baseDir)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
return fmt.Errorf("%s is not empty", baseDir)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return FetchGit(baseUrl, baseDir)
|
return FetchGit(baseUrl, baseDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
func FetchGit(baseUrl, baseDir string) error {
|
func FetchGit(baseUrl, baseDir string) error {
|
||||||
fmt.Printf("[-] Testing %s/.git/HEAD ", baseUrl)
|
log.Info().Str("base", baseUrl).Msg("testing for .git/HEAD")
|
||||||
code, body, err := c.Get(nil, utils.Url(baseUrl, ".git/HEAD"))
|
code, body, err := c.Get(nil, utils.Url(baseUrl, ".git/HEAD"))
|
||||||
fmt.Printf("[%d]\n", code)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if code != 200 {
|
if code != 200 {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s/.git/HEAD does not exist\n", baseUrl)
|
log.Warn().Str("base", baseUrl).Int("code", code).Msg(".git/HEAD doesn't appear to exist, clone will most likely fail")
|
||||||
} else if !bytes.HasPrefix(body, refPrefix) {
|
} else if !bytes.HasPrefix(body, refPrefix) {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s/.git/HEAD is not a git HEAD file\n", baseUrl)
|
log.Warn().Str("base", baseUrl).Int("code", code).Msg(".git/HEAD doesn't appear to be a git HEAD file, clone will most likely fail")
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("[-] Testing %s/.git/ ", baseUrl)
|
log.Info().Str("base", baseUrl).Msg("testing if recursive download is possible")
|
||||||
code, body, err = c.Get(nil, utils.Url(baseUrl, ".git/"))
|
code, body, err = c.Get(body, utils.Url(baseUrl, ".git/"))
|
||||||
fmt.Printf("[%d]\n", code)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if utils.IgnoreError(err) {
|
if utils.IgnoreError(err) {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
log.Error().Str("base", baseUrl).Int("code", code).Err(err)
|
||||||
} else {
|
} else {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -160,48 +140,45 @@ func FetchGit(baseUrl, baseDir string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if utils.StringsContain(indexedFiles, "HEAD") {
|
if utils.StringsContain(indexedFiles, "HEAD") {
|
||||||
fmt.Println("[-] Fetching .git recursively")
|
log.Info().Str("base", baseUrl).Msg("fetching .git/ recursively")
|
||||||
queue := createQueue(2000)
|
jt := jobtracker.NewJobTracker()
|
||||||
wg.Add(maxConcurrency)
|
|
||||||
for w := 1; w <= maxConcurrency; w++ {
|
for w := 1; w <= maxConcurrency; w++ {
|
||||||
go workers.RecursiveDownloadWorker(c, queue, baseUrl, baseDir, &wg)
|
go workers.RecursiveDownloadWorker(c, baseUrl, baseDir, jt)
|
||||||
}
|
}
|
||||||
for _, f := range indexedFiles {
|
for _, f := range indexedFiles {
|
||||||
// TODO: add support for non top level git repos
|
// TODO: add support for non top level git repos
|
||||||
queue <- utils.Url(".git", f)
|
jt.AddJob(utils.Url(".git", f))
|
||||||
}
|
}
|
||||||
waitForQueue(queue)
|
jt.Wait()
|
||||||
fmt.Println("[-] Running git checkout .")
|
|
||||||
|
log.Info().Str("dir", baseDir).Msg("running git checkout .")
|
||||||
cmd := exec.Command("git", "checkout", ".")
|
cmd := exec.Command("git", "checkout", ".")
|
||||||
cmd.Dir = baseDir
|
cmd.Dir = baseDir
|
||||||
return cmd.Run()
|
return cmd.Run()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("[-] Fetching common files")
|
log.Info().Str("base", baseUrl).Msg("fetching common files")
|
||||||
queue := createQueue(len(commonFiles))
|
jt := jobtracker.NewJobTracker()
|
||||||
concurrency := utils.MinInt(maxConcurrency, len(commonFiles))
|
concurrency := utils.MinInt(maxConcurrency, len(commonFiles))
|
||||||
wg.Add(concurrency)
|
|
||||||
for w := 1; w <= concurrency; w++ {
|
for w := 1; w <= concurrency; w++ {
|
||||||
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, false)
|
go workers.DownloadWorker(c, baseUrl, baseDir, jt, false, false)
|
||||||
}
|
}
|
||||||
for _, f := range commonFiles {
|
for _, f := range commonFiles {
|
||||||
queue <- f
|
jt.AddJob(f)
|
||||||
}
|
}
|
||||||
waitForQueue(queue)
|
jt.Wait()
|
||||||
|
|
||||||
fmt.Println("[-] Finding refs")
|
log.Info().Str("base", baseUrl).Msg("finding refs")
|
||||||
queue = createQueue(100)
|
|
||||||
wg.Add(maxConcurrency)
|
|
||||||
for w := 1; w <= maxConcurrency; w++ {
|
for w := 1; w <= maxConcurrency; w++ {
|
||||||
go workers.FindRefWorker(c, queue, baseUrl, baseDir, &wg)
|
go workers.FindRefWorker(c, baseUrl, baseDir, jt)
|
||||||
}
|
}
|
||||||
for _, ref := range commonRefs {
|
for _, ref := range commonRefs {
|
||||||
queue <- ref
|
jt.AddJob(ref)
|
||||||
}
|
}
|
||||||
waitForQueue(queue)
|
jt.Wait()
|
||||||
|
|
||||||
fmt.Println("[-] Finding packs")
|
log.Info().Str("base", baseUrl).Msg("finding packs")
|
||||||
infoPacksPath := utils.Url(baseDir, ".git/objects/info/packs")
|
infoPacksPath := utils.Url(baseDir, ".git/objects/info/packs")
|
||||||
if utils.Exists(infoPacksPath) {
|
if utils.Exists(infoPacksPath) {
|
||||||
infoPacks, err := ioutil.ReadFile(infoPacksPath)
|
infoPacks, err := ioutil.ReadFile(infoPacksPath)
|
||||||
|
@ -209,20 +186,19 @@ func FetchGit(baseUrl, baseDir string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
hashes := packRegex.FindAllSubmatch(infoPacks, -1)
|
hashes := packRegex.FindAllSubmatch(infoPacks, -1)
|
||||||
queue = createQueue(len(hashes) * 3)
|
jt = jobtracker.NewJobTracker()
|
||||||
concurrency := utils.MinInt(maxConcurrency, len(hashes))
|
concurrency := utils.MinInt(maxConcurrency, len(hashes))
|
||||||
wg.Add(concurrency)
|
|
||||||
for w := 1; w <= concurrency; w++ {
|
for w := 1; w <= concurrency; w++ {
|
||||||
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, false)
|
go workers.DownloadWorker(c, baseUrl, baseDir, jt, false, false)
|
||||||
}
|
}
|
||||||
for _, sha1 := range hashes {
|
for _, sha1 := range hashes {
|
||||||
queue <- fmt.Sprintf(".git/objects/pack/pack-%s.idx", sha1[1])
|
jt.AddJob(fmt.Sprintf(".git/objects/pack/pack-%s.idx", sha1[1]))
|
||||||
queue <- fmt.Sprintf(".git/objects/pack/pack-%s.pack", sha1[1])
|
jt.AddJob(fmt.Sprintf(".git/objects/pack/pack-%s.pack", sha1[1]))
|
||||||
}
|
}
|
||||||
waitForQueue(queue)
|
jt.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("[-] Finding objects")
|
log.Info().Str("base", baseUrl).Msg("finding objects")
|
||||||
objs := make(map[string]bool) // object "set"
|
objs := make(map[string]bool) // object "set"
|
||||||
//var packed_objs [][]byte
|
//var packed_objs [][]byte
|
||||||
|
|
||||||
|
@ -262,11 +238,11 @@ func FetchGit(baseUrl, baseDir string) error {
|
||||||
refName := strings.TrimPrefix(path, refLogPrefix)
|
refName := strings.TrimPrefix(path, refLogPrefix)
|
||||||
filePath := utils.Url(gitRefsDir, refName)
|
filePath := utils.Url(gitRefsDir, refName)
|
||||||
if !utils.Exists(filePath) {
|
if !utils.Exists(filePath) {
|
||||||
fmt.Println("[-] Generating ref file for", refName)
|
log.Info().Str("dir", baseDir).Str("ref", refName).Msg("generating ref file")
|
||||||
|
|
||||||
content, err := ioutil.ReadFile(path)
|
content, err := ioutil.ReadFile(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
log.Error().Str("dir", baseDir).Str("ref", refName).Err(err).Msg("couldn't read reflog file")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -275,12 +251,12 @@ func FetchGit(baseUrl, baseDir string) error {
|
||||||
lastEntryObj := logObjs[len(logObjs)-1][1]
|
lastEntryObj := logObjs[len(logObjs)-1][1]
|
||||||
|
|
||||||
if err := utils.CreateParentFolders(filePath); err != nil {
|
if err := utils.CreateParentFolders(filePath); err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
log.Error().Str("file", filePath).Err(err).Msg("couldn't create parent directories")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := ioutil.WriteFile(filePath, lastEntryObj, os.ModePerm); err != nil {
|
if err := ioutil.WriteFile(filePath, lastEntryObj, os.ModePerm); err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
log.Error().Str("file", filePath).Err(err).Msg("couldn't write to file")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -297,7 +273,7 @@ func FetchGit(baseUrl, baseDir string) error {
|
||||||
|
|
||||||
content, err := ioutil.ReadFile(f)
|
content, err := ioutil.ReadFile(f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
log.Error().Str("file", f).Err(err).Msg("couldn't read reflog file")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,8 +292,7 @@ func FetchGit(baseUrl, baseDir string) error {
|
||||||
var idx index.Index
|
var idx index.Index
|
||||||
decoder := index.NewDecoder(f)
|
decoder := index.NewDecoder(f)
|
||||||
if err := decoder.Decode(&idx); err != nil {
|
if err := decoder.Decode(&idx); err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
log.Error().Str("dir", baseDir).Err(err).Msg("couldn't decode git index")
|
||||||
//return err
|
|
||||||
}
|
}
|
||||||
for _, entry := range idx.Entries {
|
for _, entry := range idx.Entries {
|
||||||
objs[entry.Hash.String()] = true
|
objs[entry.Hash.String()] = true
|
||||||
|
@ -329,63 +304,66 @@ func FetchGit(baseUrl, baseDir string) error {
|
||||||
objs[hash.String()] = true
|
objs[hash.String()] = true
|
||||||
encObj, err := storage.EncodedObject(plumbing.AnyObject, hash)
|
encObj, err := storage.EncodedObject(plumbing.AnyObject, hash)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error: %s\n", err)
|
return err
|
||||||
|
|
||||||
}
|
}
|
||||||
decObj, err := object.DecodeObject(storage, encObj)
|
decObj, err := object.DecodeObject(storage, encObj)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error: %s\n", err)
|
return err
|
||||||
}
|
}
|
||||||
for _, hash := range utils.GetReferencedHashes(decObj) {
|
for _, hash := range utils.GetReferencedHashes(decObj) {
|
||||||
objs[hash] = true
|
objs[hash] = true
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
log.Error().Str("dir", baseDir).Err(err).Msg("error while processing object files")
|
||||||
}
|
}
|
||||||
// TODO: find more objects to fetch in pack files and remove packed objects from list of objects to be fetched
|
// TODO: find more objects to fetch in pack files and remove packed objects from list of objects to be fetched
|
||||||
/*for _, pack := range storage.ObjectPacks() {
|
/*for _, pack := range storage.ObjectPacks() {
|
||||||
storage.IterEncodedObjects()
|
storage.IterEncodedObjects()
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
fmt.Println("[-] Fetching objects")
|
log.Info().Str("base", baseUrl).Msg("fetching object")
|
||||||
queue = createQueue(2000)
|
|
||||||
wg.Add(maxConcurrency)
|
|
||||||
for w := 1; w <= maxConcurrency; w++ {
|
for w := 1; w <= maxConcurrency; w++ {
|
||||||
go workers.FindObjectsWorker(c, queue, baseUrl, baseDir, &wg, storage)
|
go workers.FindObjectsWorker(c, baseUrl, baseDir, jt, storage)
|
||||||
}
|
}
|
||||||
for obj := range objs {
|
for obj := range objs {
|
||||||
queue <- obj
|
jt.AddJob(obj)
|
||||||
}
|
}
|
||||||
waitForQueue(queue)
|
jt.Wait()
|
||||||
|
|
||||||
fmt.Println("[-] Running git checkout .")
|
// TODO: does this even make sense???????
|
||||||
|
if !utils.Exists(baseDir) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info().Str("dir", baseDir).Msg("running git checkout .")
|
||||||
cmd := exec.Command("git", "checkout", ".")
|
cmd := exec.Command("git", "checkout", ".")
|
||||||
cmd.Dir = baseDir
|
cmd.Dir = baseDir
|
||||||
stderr := &bytes.Buffer{}
|
stderr := &bytes.Buffer{}
|
||||||
cmd.Stderr = stderr
|
cmd.Stderr = stderr
|
||||||
if err := cmd.Run(); err != nil {
|
if err := cmd.Run(); err != nil {
|
||||||
if exErr, ok := err.(*exec.ExitError); ok && exErr.ProcessState.ExitCode() == 255 || exErr.ProcessState.ExitCode() == 128 {
|
if exErr, ok := err.(*exec.ExitError); ok && (exErr.ProcessState.ExitCode() == 255 || exErr.ProcessState.ExitCode() == 128) {
|
||||||
fmt.Println("[-] Attempting to fetch missing files")
|
log.Info().Str("base", baseUrl).Str("dir", baseDir).Msg("attempting to fetch missing files")
|
||||||
out, err := ioutil.ReadAll(stderr)
|
out, err := ioutil.ReadAll(stderr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
errors := stdErrRegex.FindAllSubmatch(out, -1)
|
errors := stdErrRegex.FindAllSubmatch(out, -1)
|
||||||
queue = createQueue(len(errors) * 3)
|
jt = jobtracker.NewJobTracker()
|
||||||
concurrency := utils.MinInt(maxConcurrency, len(errors))
|
concurrency := utils.MinInt(maxConcurrency, len(errors))
|
||||||
wg.Add(concurrency)
|
|
||||||
for w := 1; w <= concurrency; w++ {
|
for w := 1; w <= concurrency; w++ {
|
||||||
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, true)
|
go workers.DownloadWorker(c, baseUrl, baseDir, jt, true, true)
|
||||||
}
|
}
|
||||||
for _, e := range errors {
|
for _, e := range errors {
|
||||||
if !bytes.HasSuffix(e[1], phpSuffix) {
|
if !bytes.HasSuffix(e[1], phpSuffix) {
|
||||||
queue <- string(e[1])
|
jt.AddJob(string(e[1]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
waitForQueue(queue)
|
jt.Wait()
|
||||||
|
|
||||||
// Fetch files marked as missing in status
|
// Fetch files marked as missing in status
|
||||||
|
// TODO: why do we parse status AND decode index ???????
|
||||||
cmd := exec.Command("git", "status")
|
cmd := exec.Command("git", "status")
|
||||||
cmd.Dir = baseDir
|
cmd.Dir = baseDir
|
||||||
stdout := &bytes.Buffer{}
|
stdout := &bytes.Buffer{}
|
||||||
|
@ -398,18 +376,17 @@ func FetchGit(baseUrl, baseDir string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
deleted := statusRegex.FindAllSubmatch(out, -1)
|
deleted := statusRegex.FindAllSubmatch(out, -1)
|
||||||
queue = createQueue(len(deleted) * 3)
|
|
||||||
concurrency = utils.MinInt(maxConcurrency, len(deleted))
|
concurrency = utils.MinInt(maxConcurrency, len(deleted))
|
||||||
wg.Add(concurrency)
|
jt = jobtracker.NewJobTracker()
|
||||||
for w := 1; w <= concurrency; w++ {
|
for w := 1; w <= concurrency; w++ {
|
||||||
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, true)
|
go workers.DownloadWorker(c, baseUrl, baseDir, jt, true, true)
|
||||||
}
|
}
|
||||||
for _, e := range deleted {
|
for _, e := range deleted {
|
||||||
if !bytes.HasSuffix(e[1], phpSuffix) {
|
if !bytes.HasSuffix(e[1], phpSuffix) {
|
||||||
queue <- string(e[1])
|
jt.AddJob(string(e[1]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
waitForQueue(queue)
|
jt.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iterate over index to find missing files
|
// Iterate over index to find missing files
|
||||||
|
@ -425,18 +402,17 @@ func FetchGit(baseUrl, baseDir string) error {
|
||||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||||
//return err
|
//return err
|
||||||
}
|
}
|
||||||
queue = createQueue(len(idx.Entries) * 3)
|
|
||||||
concurrency = utils.MinInt(maxConcurrency, len(idx.Entries))
|
concurrency = utils.MinInt(maxConcurrency, len(idx.Entries))
|
||||||
wg.Add(concurrency)
|
jt = jobtracker.NewJobTracker()
|
||||||
for w := 1; w <= concurrency; w++ {
|
for w := 1; w <= concurrency; w++ {
|
||||||
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, true)
|
go workers.DownloadWorker(c, baseUrl, baseDir, jt, true, true)
|
||||||
}
|
}
|
||||||
for _, entry := range idx.Entries {
|
for _, entry := range idx.Entries {
|
||||||
if !strings.HasSuffix(entry.Name, ".php") && !utils.Exists(utils.Url(baseDir, entry.Name)) {
|
if !strings.HasSuffix(entry.Name, ".php") && !utils.Exists(utils.Url(baseDir, entry.Name)) {
|
||||||
queue <- entry.Name
|
jt.AddJob(entry.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
waitForQueue(queue)
|
jt.Wait()
|
||||||
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue