Attempt to fetch missing files after checkout

This commit is contained in:
Tillie Kottmann 2020-11-21 18:52:38 +01:00
parent bc6e1064e3
commit 56ddf5ad00
3 changed files with 35 additions and 6 deletions

View File

@ -9,7 +9,7 @@ import (
"sync"
)
func DownloadWorker(c *fasthttp.Client, queue <-chan string, baseUrl, baseDir string, wg *sync.WaitGroup) {
func DownloadWorker(c *fasthttp.Client, queue <-chan string, baseUrl, baseDir string, wg *sync.WaitGroup, allowHtml bool) {
defer wg.Done()
for file := range queue {
if file == "" {
@ -28,7 +28,7 @@ func DownloadWorker(c *fasthttp.Client, queue <-chan string, baseUrl, baseDir st
continue
}
if code == 200 {
if utils.IsHtml(body) {
if !allowHtml && utils.IsHtml(body) {
fmt.Printf("warning: %s appears to be an html file, skipping\n", uri)
continue
}

View File

@ -183,7 +183,7 @@ func FetchGit(baseUrl, baseDir string) error {
concurrency := utils.MinInt(maxConcurrency, len(commonFiles))
wg.Add(concurrency)
for w := 1; w <= concurrency; w++ {
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg)
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, false)
}
for _, f := range commonFiles {
queue <- f
@ -214,7 +214,7 @@ func FetchGit(baseUrl, baseDir string) error {
concurrency := utils.MinInt(maxConcurrency, len(hashes))
wg.Add(concurrency)
for w := 1; w <= concurrency; w++ {
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg)
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, false)
}
for _, sha1 := range hashes {
queue <- fmt.Sprintf(".git/objects/pack/pack-%s.idx", sha1[1])
@ -315,7 +315,7 @@ func FetchGit(baseUrl, baseDir string) error {
}
return nil
}); err != nil {
return err
fmt.Fprintf(os.Stderr, "error: %s\n", err)
}
// TODO: find more objects to fetch in pack files and remove packed objects from list of objects to be fetched
/*for _, pack := range storage.ObjectPacks() {
@ -336,5 +336,32 @@ func FetchGit(baseUrl, baseDir string) error {
fmt.Println("[-] Running git checkout .")
cmd := exec.Command("git", "checkout", ".")
cmd.Dir = baseDir
return cmd.Run()
stderr := &bytes.Buffer{}
cmd.Stderr = stderr
if err := cmd.Run(); err != nil {
if exErr, ok := err.(*exec.ExitError); ok && exErr.ProcessState.ExitCode() == 255 {
fmt.Println("[-] Attempting to fetch missing files")
out, err := ioutil.ReadAll(stderr)
if err != nil {
return err
}
errors := stdErrRegex.FindAllSubmatch(out, -1)
queue = createQueue(len(errors) * 3)
concurrency := utils.MinInt(maxConcurrency, len(errors))
wg.Add(concurrency)
for w := 1; w <= concurrency; w++ {
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, true)
}
for _, e := range errors {
if !bytes.HasSuffix(e[1], phpSuffix) {
queue <- string(e[1])
}
}
close(queue)
wg.Wait()
} else {
return err
}
}
return nil
}

View File

@ -5,9 +5,11 @@ import "regexp"
const maxConcurrency = 30
var refPrefix = []byte{'r', 'e', 'f', ':'}
var phpSuffix = []byte{'.', 'p', 'h', 'p'}
var (
packRegex = regexp.MustCompile(`(?m)pack-([a-f0-9]{40})\.pack`)
objRegex = regexp.MustCompile(`(?m)(^|\s)([a-f0-9]{40})($|\s)`)
stdErrRegex = regexp.MustCompile(`error: unable to read sha1 file of (.+?) \(.*`)
)
var (
commonFiles = []string{