Attempt to fetch missing files after checkout

This commit is contained in:
Tillie Kottmann 2020-11-21 18:52:38 +01:00
parent bc6e1064e3
commit 56ddf5ad00
3 changed files with 35 additions and 6 deletions

View File

@ -9,7 +9,7 @@ import (
"sync" "sync"
) )
func DownloadWorker(c *fasthttp.Client, queue <-chan string, baseUrl, baseDir string, wg *sync.WaitGroup) { func DownloadWorker(c *fasthttp.Client, queue <-chan string, baseUrl, baseDir string, wg *sync.WaitGroup, allowHtml bool) {
defer wg.Done() defer wg.Done()
for file := range queue { for file := range queue {
if file == "" { if file == "" {
@ -28,7 +28,7 @@ func DownloadWorker(c *fasthttp.Client, queue <-chan string, baseUrl, baseDir st
continue continue
} }
if code == 200 { if code == 200 {
if utils.IsHtml(body) { if !allowHtml && utils.IsHtml(body) {
fmt.Printf("warning: %s appears to be an html file, skipping\n", uri) fmt.Printf("warning: %s appears to be an html file, skipping\n", uri)
continue continue
} }

View File

@ -183,7 +183,7 @@ func FetchGit(baseUrl, baseDir string) error {
concurrency := utils.MinInt(maxConcurrency, len(commonFiles)) concurrency := utils.MinInt(maxConcurrency, len(commonFiles))
wg.Add(concurrency) wg.Add(concurrency)
for w := 1; w <= concurrency; w++ { for w := 1; w <= concurrency; w++ {
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg) go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, false)
} }
for _, f := range commonFiles { for _, f := range commonFiles {
queue <- f queue <- f
@ -214,7 +214,7 @@ func FetchGit(baseUrl, baseDir string) error {
concurrency := utils.MinInt(maxConcurrency, len(hashes)) concurrency := utils.MinInt(maxConcurrency, len(hashes))
wg.Add(concurrency) wg.Add(concurrency)
for w := 1; w <= concurrency; w++ { for w := 1; w <= concurrency; w++ {
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg) go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, false)
} }
for _, sha1 := range hashes { for _, sha1 := range hashes {
queue <- fmt.Sprintf(".git/objects/pack/pack-%s.idx", sha1[1]) queue <- fmt.Sprintf(".git/objects/pack/pack-%s.idx", sha1[1])
@ -315,7 +315,7 @@ func FetchGit(baseUrl, baseDir string) error {
} }
return nil return nil
}); err != nil { }); err != nil {
return err fmt.Fprintf(os.Stderr, "error: %s\n", err)
} }
// TODO: find more objects to fetch in pack files and remove packed objects from list of objects to be fetched // TODO: find more objects to fetch in pack files and remove packed objects from list of objects to be fetched
/*for _, pack := range storage.ObjectPacks() { /*for _, pack := range storage.ObjectPacks() {
@ -336,5 +336,32 @@ func FetchGit(baseUrl, baseDir string) error {
fmt.Println("[-] Running git checkout .") fmt.Println("[-] Running git checkout .")
cmd := exec.Command("git", "checkout", ".") cmd := exec.Command("git", "checkout", ".")
cmd.Dir = baseDir cmd.Dir = baseDir
return cmd.Run() stderr := &bytes.Buffer{}
cmd.Stderr = stderr
if err := cmd.Run(); err != nil {
if exErr, ok := err.(*exec.ExitError); ok && exErr.ProcessState.ExitCode() == 255 {
fmt.Println("[-] Attempting to fetch missing files")
out, err := ioutil.ReadAll(stderr)
if err != nil {
return err
}
errors := stdErrRegex.FindAllSubmatch(out, -1)
queue = createQueue(len(errors) * 3)
concurrency := utils.MinInt(maxConcurrency, len(errors))
wg.Add(concurrency)
for w := 1; w <= concurrency; w++ {
go workers.DownloadWorker(c, queue, baseUrl, baseDir, &wg, true)
}
for _, e := range errors {
if !bytes.HasSuffix(e[1], phpSuffix) {
queue <- string(e[1])
}
}
close(queue)
wg.Wait()
} else {
return err
}
}
return nil
} }

View File

@ -5,9 +5,11 @@ import "regexp"
const maxConcurrency = 30 const maxConcurrency = 30
var refPrefix = []byte{'r', 'e', 'f', ':'} var refPrefix = []byte{'r', 'e', 'f', ':'}
var phpSuffix = []byte{'.', 'p', 'h', 'p'}
var ( var (
packRegex = regexp.MustCompile(`(?m)pack-([a-f0-9]{40})\.pack`) packRegex = regexp.MustCompile(`(?m)pack-([a-f0-9]{40})\.pack`)
objRegex = regexp.MustCompile(`(?m)(^|\s)([a-f0-9]{40})($|\s)`) objRegex = regexp.MustCompile(`(?m)(^|\s)([a-f0-9]{40})($|\s)`)
stdErrRegex = regexp.MustCompile(`error: unable to read sha1 file of (.+?) \(.*`)
) )
var ( var (
commonFiles = []string{ commonFiles = []string{