From 6eae4acc1754c94c2deafb3880422741c19340c2 Mon Sep 17 00:00:00 2001 From: maia tillie arson crimew Date: Tue, 26 Oct 2021 21:08:27 +0200 Subject: [PATCH] grab object hashes from more places --- pkg/goop/clone.go | 10 +++++++++- pkg/goop/consts.go | 6 +++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pkg/goop/clone.go b/pkg/goop/clone.go index 757a6a4..acd38c6 100644 --- a/pkg/goop/clone.go +++ b/pkg/goop/clone.go @@ -221,9 +221,15 @@ func FetchGit(baseUrl, baseDir string) error { files := []string{ utils.Url(baseDir, ".git/packed-refs"), utils.Url(baseDir, ".git/info/refs"), + utils.Url(baseDir, ".git/info/grafts"), + // utils.Url(baseDir, ".git/info/sparse-checkout"), // TODO: ? utils.Url(baseDir, ".git/FETCH_HEAD"), utils.Url(baseDir, ".git/ORIG_HEAD"), utils.Url(baseDir, ".git/HEAD"), + utils.Url(baseDir, ".git/objects/loose-object-idx"), // TODO: is this even a text file? + utils.Url(baseDir, ".git/objects/info/commit-graphs/commit-graph-chain"), + utils.Url(baseDir, ".git/objects/info/alternates"), + utils.Url(baseDir, ".git/objects/info/http-alternates"), } gitRefsDir := utils.Url(baseDir, ".git/refs") @@ -339,7 +345,9 @@ func FetchGit(baseUrl, baseDir string) error { storage.IterEncodedObjects() }*/ - log.Info().Str("base", baseUrl).Msg("fetching object") + // TODO: grab object hashes from commit graphs + + log.Info().Str("base", baseUrl).Msg("fetching objects") jt = jobtracker.NewJobTracker(workers.FindObjectsWorker, maxConcurrency, jobtracker.DefaultNapper) for obj := range objs { jt.AddJob(obj) diff --git a/pkg/goop/consts.go b/pkg/goop/consts.go index 2160145..2149c4d 100644 --- a/pkg/goop/consts.go +++ b/pkg/goop/consts.go @@ -7,14 +7,14 @@ const maxConcurrency = 40 var refPrefix = []byte{'r', 'e', 'f', ':'} var ( packRegex = regexp.MustCompile(`(?m)pack-([a-f0-9]{40})\.pack`) - objRegex = regexp.MustCompile(`(?m)(^|\s)([a-f0-9]{40})($|\s)`) + objRegex = regexp.MustCompile(`(?m)(^|\s)([a-f0-9]{40})($|\s)`) // TODO: add sha256 support refLogRegex = regexp.MustCompile(`(?m)^(?:[a-f0-9]{40}) ([a-f0-9]{40}) .*$`) ) var ( commonFiles = []string{ ".gitignore", ".gitattributes", - ".gitmodules", + ".gitmodules", // TODO: fetch submodules ".env", ".git/COMMIT_EDITMSG", ".git/description", @@ -43,7 +43,7 @@ var ( ".git/info/attributes", // TODO: can lfs filters be in here? ".git/info/sparse-checkout", // TODO: parse and process ".git/objects/loose-object-idx", // TODO: parse and process - ".git/objects/pack/multi-pack-index", // TODO: parse and process + ".git/objects/pack/multi-pack-index", // TODO: parse and process and fetch referenced packs (unless i'm missunderstanding this) } commonRefs = []string{ ".git/FETCH_HEAD",