grab object hashes from more places

This commit is contained in:
maia tillie arson crimew 2021-10-26 21:08:27 +02:00
parent 318039cb3c
commit 6eae4acc17
2 changed files with 12 additions and 4 deletions

View File

@ -221,9 +221,15 @@ func FetchGit(baseUrl, baseDir string) error {
files := []string{
utils.Url(baseDir, ".git/packed-refs"),
utils.Url(baseDir, ".git/info/refs"),
utils.Url(baseDir, ".git/info/grafts"),
// utils.Url(baseDir, ".git/info/sparse-checkout"), // TODO: ?
utils.Url(baseDir, ".git/FETCH_HEAD"),
utils.Url(baseDir, ".git/ORIG_HEAD"),
utils.Url(baseDir, ".git/HEAD"),
utils.Url(baseDir, ".git/objects/loose-object-idx"), // TODO: is this even a text file?
utils.Url(baseDir, ".git/objects/info/commit-graphs/commit-graph-chain"),
utils.Url(baseDir, ".git/objects/info/alternates"),
utils.Url(baseDir, ".git/objects/info/http-alternates"),
}
gitRefsDir := utils.Url(baseDir, ".git/refs")
@ -339,7 +345,9 @@ func FetchGit(baseUrl, baseDir string) error {
storage.IterEncodedObjects()
}*/
log.Info().Str("base", baseUrl).Msg("fetching object")
// TODO: grab object hashes from commit graphs
log.Info().Str("base", baseUrl).Msg("fetching objects")
jt = jobtracker.NewJobTracker(workers.FindObjectsWorker, maxConcurrency, jobtracker.DefaultNapper)
for obj := range objs {
jt.AddJob(obj)

View File

@ -7,14 +7,14 @@ const maxConcurrency = 40
var refPrefix = []byte{'r', 'e', 'f', ':'}
var (
packRegex = regexp.MustCompile(`(?m)pack-([a-f0-9]{40})\.pack`)
objRegex = regexp.MustCompile(`(?m)(^|\s)([a-f0-9]{40})($|\s)`)
objRegex = regexp.MustCompile(`(?m)(^|\s)([a-f0-9]{40})($|\s)`) // TODO: add sha256 support
refLogRegex = regexp.MustCompile(`(?m)^(?:[a-f0-9]{40}) ([a-f0-9]{40}) .*$`)
)
var (
commonFiles = []string{
".gitignore",
".gitattributes",
".gitmodules",
".gitmodules", // TODO: fetch submodules
".env",
".git/COMMIT_EDITMSG",
".git/description",
@ -43,7 +43,7 @@ var (
".git/info/attributes", // TODO: can lfs filters be in here?
".git/info/sparse-checkout", // TODO: parse and process
".git/objects/loose-object-idx", // TODO: parse and process
".git/objects/pack/multi-pack-index", // TODO: parse and process
".git/objects/pack/multi-pack-index", // TODO: parse and process and fetch referenced packs (unless i'm missunderstanding this)
}
commonRefs = []string{
".git/FETCH_HEAD",