force full concurrency wherever appropriate

This commit is contained in:
maia tillie arson crimew 2021-10-24 20:02:57 +02:00
parent cf3a38881f
commit a6c6710b3c
4 changed files with 13 additions and 14 deletions

2
go.mod
View File

@ -4,7 +4,7 @@ go 1.14
require (
github.com/PuerkitoBio/goquery v1.6.0
github.com/deletescape/jobtracker v0.0.0-20211024172446-273a3d20c697
github.com/deletescape/jobtracker v0.0.0-20211024175651-68fbc3d60d80
github.com/go-git/go-billy/v5 v5.0.0
github.com/go-git/go-git/v5 v5.2.0
github.com/phuslu/log v1.0.75

5
go.sum
View File

@ -47,8 +47,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deletescape/jobtracker v0.0.0-20211024172446-273a3d20c697 h1:JcKm+SN5bLCcCctX37LpuGSn0VKm4MzDdKs8moiiFas=
github.com/deletescape/jobtracker v0.0.0-20211024172446-273a3d20c697/go.mod h1:CjxCBk3b3NGcLI2Tt6C+n7sSLaCUJ5IK3EnqchH0Ec8=
github.com/deletescape/jobtracker v0.0.0-20211024175651-68fbc3d60d80 h1:BfCzWZ59nNm+tAXyOkIIVMVQWOP7CkpQ/Xj/GyhDuy8=
github.com/deletescape/jobtracker v0.0.0-20211024175651-68fbc3d60d80/go.mod h1:CjxCBk3b3NGcLI2Tt6C+n7sSLaCUJ5IK3EnqchH0Ec8=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg=
@ -232,7 +232,6 @@ golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnf
golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 h1:58fnuSXlxZmFdJyvtTFVmVhcMLU6v5fEb/ok4wyqtNU=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073 h1:xMPOj6Pz6UipU1wXLkrtqpHbR0AVFnyPEQq/wRWz9lM=
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=

View File

@ -174,7 +174,7 @@ func FetchGit(baseUrl, baseDir string) error {
log.Info().Str("base", baseUrl).Msg("fetching .git/ recursively")
jt := jobtracker.NewJobTracker(workers.RecursiveDownloadWorker, maxConcurrency, jobtracker.DefaultNapper)
jt.AddJobs(indexedFiles...)
jt.StartAndWait(&workers.RecursiveDownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir})
jt.StartAndWait(&workers.RecursiveDownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir}, true)
if err := checkout(baseDir); err != nil {
log.Error().Str("dir", baseDir).Err(err).Msg("failed to checkout")
@ -188,12 +188,12 @@ func FetchGit(baseUrl, baseDir string) error {
log.Info().Str("base", baseUrl).Msg("fetching common files")
jt := jobtracker.NewJobTracker(workers.DownloadWorker, maxConcurrency, jobtracker.DefaultNapper)
jt.AddJobs(commonFiles...)
jt.StartAndWait(workers.DownloadContext{C: c, BaseDir: baseDir, BaseUrl: baseUrl})
jt.StartAndWait(workers.DownloadContext{C: c, BaseDir: baseDir, BaseUrl: baseUrl}, false)
log.Info().Str("base", baseUrl).Msg("finding refs")
jt = jobtracker.NewJobTracker(workers.FindRefWorker, maxConcurrency, jobtracker.DefaultNapper)
jt.AddJobs(commonRefs...)
jt.StartAndWait(workers.FindRefContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir})
jt.StartAndWait(workers.FindRefContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir}, true)
log.Info().Str("base", baseUrl).Msg("finding packs")
infoPacksPath := utils.Url(baseDir, ".git/objects/info/packs")
@ -208,7 +208,7 @@ func FetchGit(baseUrl, baseDir string) error {
jt.AddJob(fmt.Sprintf(".git/objects/pack/pack-%s.idx", sha1[1]))
jt.AddJob(fmt.Sprintf(".git/objects/pack/pack-%s.pack", sha1[1]))
}
jt.StartAndWait(workers.DownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir})
jt.StartAndWait(workers.DownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir}, false)
}
log.Info().Str("base", baseUrl).Msg("finding objects")
@ -341,7 +341,7 @@ func FetchGit(baseUrl, baseDir string) error {
for obj := range objs {
jt.AddJob(obj)
}
jt.StartAndWait(workers.FindObjectsContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir, Storage: objStorage})
jt.StartAndWait(workers.FindObjectsContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir, Storage: objStorage}, true)
// exit early if we haven't managed to dump anything
if !utils.Exists(baseDir) {
@ -464,7 +464,7 @@ func fetchLfs(baseDir, baseUrl string) {
for _, hash := range hashes {
jt.AddJob(fmt.Sprintf(".git/lfs/objects/%s/%s/%s", hash[:2], hash[2:4], hash))
}
jt.StartAndWait(workers.DownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir})
jt.StartAndWait(workers.DownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir}, false)
}
}
@ -494,7 +494,7 @@ func fetchMissing(baseDir, baseUrl string, objStorage *filesystem.ObjectStorage)
jt.AddJob(entry.Name)
}
}
jt.StartAndWait(workers.DownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir, AllowHtml: true, AlllowEmpty: true})
jt.StartAndWait(workers.DownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir, AllowHtml: true, AlllowEmpty: true}, false)
jt = jobtracker.NewJobTracker(workers.CreateObjectWorker, maxConcurrency, jobtracker.DefaultNapper)
for _, f := range missingFiles {
@ -502,7 +502,7 @@ func fetchMissing(baseDir, baseUrl string, objStorage *filesystem.ObjectStorage)
jt.AddJob(f)
}
}
jt.StartAndWait(workers.CreateObjectContext{BaseDir: baseDir, Storage: objStorage, Index: &idx})
jt.StartAndWait(workers.CreateObjectContext{BaseDir: baseDir, Storage: objStorage, Index: &idx}, false)
}
}
}
@ -535,7 +535,7 @@ func fetchIgnored(baseDir, baseUrl string) error {
return err
}
jt.StartAndWait(workers.DownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir, AllowHtml: true, AlllowEmpty: true})
jt.StartAndWait(workers.DownloadContext{C: c, BaseUrl: baseUrl, BaseDir: baseDir, AllowHtml: true, AlllowEmpty: true}, false)
}
return nil
}

View File

@ -2,7 +2,7 @@ package goop
import "regexp"
const maxConcurrency = 60
const maxConcurrency = 40
var refPrefix = []byte{'r', 'e', 'f', ':'}
var (