grab object hashes from more places

This commit is contained in:
maia tillie arson crimew 2021-10-26 21:08:27 +02:00
parent 318039cb3c
commit 6eae4acc17
2 changed files with 12 additions and 4 deletions

View File

@ -221,9 +221,15 @@ func FetchGit(baseUrl, baseDir string) error {
files := []string{ files := []string{
utils.Url(baseDir, ".git/packed-refs"), utils.Url(baseDir, ".git/packed-refs"),
utils.Url(baseDir, ".git/info/refs"), utils.Url(baseDir, ".git/info/refs"),
utils.Url(baseDir, ".git/info/grafts"),
// utils.Url(baseDir, ".git/info/sparse-checkout"), // TODO: ?
utils.Url(baseDir, ".git/FETCH_HEAD"), utils.Url(baseDir, ".git/FETCH_HEAD"),
utils.Url(baseDir, ".git/ORIG_HEAD"), utils.Url(baseDir, ".git/ORIG_HEAD"),
utils.Url(baseDir, ".git/HEAD"), utils.Url(baseDir, ".git/HEAD"),
utils.Url(baseDir, ".git/objects/loose-object-idx"), // TODO: is this even a text file?
utils.Url(baseDir, ".git/objects/info/commit-graphs/commit-graph-chain"),
utils.Url(baseDir, ".git/objects/info/alternates"),
utils.Url(baseDir, ".git/objects/info/http-alternates"),
} }
gitRefsDir := utils.Url(baseDir, ".git/refs") gitRefsDir := utils.Url(baseDir, ".git/refs")
@ -339,7 +345,9 @@ func FetchGit(baseUrl, baseDir string) error {
storage.IterEncodedObjects() storage.IterEncodedObjects()
}*/ }*/
log.Info().Str("base", baseUrl).Msg("fetching object") // TODO: grab object hashes from commit graphs
log.Info().Str("base", baseUrl).Msg("fetching objects")
jt = jobtracker.NewJobTracker(workers.FindObjectsWorker, maxConcurrency, jobtracker.DefaultNapper) jt = jobtracker.NewJobTracker(workers.FindObjectsWorker, maxConcurrency, jobtracker.DefaultNapper)
for obj := range objs { for obj := range objs {
jt.AddJob(obj) jt.AddJob(obj)

View File

@ -7,14 +7,14 @@ const maxConcurrency = 40
var refPrefix = []byte{'r', 'e', 'f', ':'} var refPrefix = []byte{'r', 'e', 'f', ':'}
var ( var (
packRegex = regexp.MustCompile(`(?m)pack-([a-f0-9]{40})\.pack`) packRegex = regexp.MustCompile(`(?m)pack-([a-f0-9]{40})\.pack`)
objRegex = regexp.MustCompile(`(?m)(^|\s)([a-f0-9]{40})($|\s)`) objRegex = regexp.MustCompile(`(?m)(^|\s)([a-f0-9]{40})($|\s)`) // TODO: add sha256 support
refLogRegex = regexp.MustCompile(`(?m)^(?:[a-f0-9]{40}) ([a-f0-9]{40}) .*$`) refLogRegex = regexp.MustCompile(`(?m)^(?:[a-f0-9]{40}) ([a-f0-9]{40}) .*$`)
) )
var ( var (
commonFiles = []string{ commonFiles = []string{
".gitignore", ".gitignore",
".gitattributes", ".gitattributes",
".gitmodules", ".gitmodules", // TODO: fetch submodules
".env", ".env",
".git/COMMIT_EDITMSG", ".git/COMMIT_EDITMSG",
".git/description", ".git/description",
@ -43,7 +43,7 @@ var (
".git/info/attributes", // TODO: can lfs filters be in here? ".git/info/attributes", // TODO: can lfs filters be in here?
".git/info/sparse-checkout", // TODO: parse and process ".git/info/sparse-checkout", // TODO: parse and process
".git/objects/loose-object-idx", // TODO: parse and process ".git/objects/loose-object-idx", // TODO: parse and process
".git/objects/pack/multi-pack-index", // TODO: parse and process ".git/objects/pack/multi-pack-index", // TODO: parse and process and fetch referenced packs (unless i'm missunderstanding this)
} }
commonRefs = []string{ commonRefs = []string{
".git/FETCH_HEAD", ".git/FETCH_HEAD",