only create objects if hash matches the index

This commit is contained in:
maia tillie arson crimew 2021-10-21 15:09:46 +02:00
parent 7b9abaa163
commit ff3f22f076
2 changed files with 51 additions and 51 deletions

View File

@ -32,7 +32,6 @@ func createObjWork(baseDir, f string, jt *jobtracker.JobTracker, storage *filesy
fp := utils.Url(baseDir, f) fp := utils.Url(baseDir, f)
if idx != nil {
entry, err := idx.Entry(f) entry, err := idx.Entry(f)
if err != nil { if err != nil {
log.Error().Str("file", f).Err(err).Msg("file is not in index") log.Error().Str("file", f).Err(err).Msg("file is not in index")
@ -48,7 +47,6 @@ func createObjWork(baseDir, f string, jt *jobtracker.JobTracker, storage *filesy
os.Chown(fp, int(entry.UID), int(entry.GID)) os.Chown(fp, int(entry.UID), int(entry.GID))
os.Chtimes(fp, entry.ModifiedAt, entry.ModifiedAt) os.Chtimes(fp, entry.ModifiedAt, entry.ModifiedAt)
//log.Info().Str("file", f).Msg("updated from index") //log.Info().Str("file", f).Msg("updated from index")
}
content, err := ioutil.ReadFile(fp) content, err := ioutil.ReadFile(fp)
if err != nil { if err != nil {
@ -56,6 +54,12 @@ func createObjWork(baseDir, f string, jt *jobtracker.JobTracker, storage *filesy
return return
} }
hash := plumbing.ComputeHash(plumbing.BlobObject, content)
if entry.Hash != hash {
log.Warn().Str("file", f).Msg("hash does not match hash in index, skipping object creation")
return
}
obj := storage.NewEncodedObject() obj := storage.NewEncodedObject()
obj.SetSize(int64(len(content))) obj.SetSize(int64(len(content)))
obj.SetType(plumbing.BlobObject) obj.SetType(plumbing.BlobObject)

View File

@ -365,20 +365,20 @@ func FetchGit(baseUrl, baseDir string) error {
} }
jt.StartAndWait() jt.StartAndWait()
// TODO: does this even make sense???????
if !utils.Exists(baseDir) { if !utils.Exists(baseDir) {
return nil return nil
} }
// TODO: try to do using go-git
log.Info().Str("dir", baseDir).Msg("running git checkout .") log.Info().Str("dir", baseDir).Msg("running git checkout .")
cmd := exec.Command("git", "checkout", ".") cmd := exec.Command("git", "checkout", ".")
cmd.Dir = baseDir cmd.Dir = baseDir
stderr := &bytes.Buffer{} //stderr := &bytes.Buffer{}
cmd.Stderr = stderr //cmd.Stderr = stderr
if err := cmd.Run(); err != nil { if err := cmd.Run(); err != nil {
if exErr, ok := err.(*exec.ExitError); ok && (exErr.ProcessState.ExitCode() == 255 || exErr.ProcessState.ExitCode() == 128) { if exErr, ok := err.(*exec.ExitError); ok && (exErr.ProcessState.ExitCode() == 255 || exErr.ProcessState.ExitCode() == 128) {
log.Info().Str("base", baseUrl).Str("dir", baseDir).Msg("attempting to fetch missing files") log.Info().Str("base", baseUrl).Str("dir", baseDir).Msg("attempting to fetch missing files")
out, err := ioutil.ReadAll(stderr) /*out, err := ioutil.ReadAll(stderr)
if err != nil { if err != nil {
return err return err
} }
@ -397,7 +397,7 @@ func FetchGit(baseUrl, baseDir string) error {
} }
jt.StartAndWait() jt.StartAndWait()
/*// Fetch files marked as missing in status // Fetch files marked as missing in status
// TODO: why do we parse status AND decode index ??????? // TODO: why do we parse status AND decode index ???????
cmd := exec.Command("git", "status") cmd := exec.Command("git", "status")
cmd.Dir = baseDir cmd.Dir = baseDir
@ -425,9 +425,9 @@ func FetchGit(baseUrl, baseDir string) error {
}*/ }*/
// Iterate over index to find missing files // Iterate over index to find missing files
var idx index.Index
var hasIndex bool
if utils.Exists(indexPath) { if utils.Exists(indexPath) {
var missingFiles []string
var idx index.Index
f, err := os.Open(indexPath) f, err := os.Open(indexPath)
if err != nil { if err != nil {
return err return err
@ -435,11 +435,9 @@ func FetchGit(baseUrl, baseDir string) error {
defer f.Close() defer f.Close()
decoder := index.NewDecoder(f) decoder := index.NewDecoder(f)
if err := decoder.Decode(&idx); err != nil { if err := decoder.Decode(&idx); err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err) log.Error().Err(err).Msg("failed to decode index")
//return err //return err
} else { } else {
hasIndex = true
}
jt = jobtracker.NewJobTracker() jt = jobtracker.NewJobTracker()
for _, entry := range idx.Entries { for _, entry := range idx.Entries {
if !strings.HasSuffix(entry.Name, ".php") && !utils.Exists(utils.Url(baseDir, entry.Name)) { if !strings.HasSuffix(entry.Name, ".php") && !utils.Exists(utils.Url(baseDir, entry.Name)) {
@ -452,8 +450,8 @@ func FetchGit(baseUrl, baseDir string) error {
go workers.DownloadWorker(c, baseUrl, baseDir, jt, true, true) go workers.DownloadWorker(c, baseUrl, baseDir, jt, true, true)
} }
jt.StartAndWait() jt.StartAndWait()
}
//
jt = jobtracker.NewJobTracker() jt = jobtracker.NewJobTracker()
for _, f := range missingFiles { for _, f := range missingFiles {
if utils.Exists(utils.Url(baseDir, f)) { if utils.Exists(utils.Url(baseDir, f)) {
@ -461,14 +459,12 @@ func FetchGit(baseUrl, baseDir string) error {
} }
} }
concurrency = utils.MinInt(maxConcurrency, int(jt.QueuedJobs())) concurrency = utils.MinInt(maxConcurrency, int(jt.QueuedJobs()))
var idp *index.Index
if hasIndex {
idp = &idx
}
for w := 1; w <= concurrency; w++ { for w := 1; w <= concurrency; w++ {
go workers.CreateObjectWorker(baseDir, jt, storage, idp) go workers.CreateObjectWorker(baseDir, jt, storage, &idx)
} }
jt.StartAndWait() jt.StartAndWait()
}
}
} else { } else {
return err return err
} }