2020-10-31 11:02:59 +00:00
|
|
|
package workers
|
|
|
|
|
|
|
|
import (
|
2021-10-21 00:36:09 +01:00
|
|
|
"fmt"
|
2020-10-31 11:02:59 +00:00
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
|
|
|
"regexp"
|
2021-10-21 00:36:09 +01:00
|
|
|
"strings"
|
2020-10-31 11:02:59 +00:00
|
|
|
"sync"
|
2021-10-20 19:57:29 +01:00
|
|
|
|
|
|
|
"github.com/deletescape/goop/internal/utils"
|
2021-10-24 18:28:58 +01:00
|
|
|
"github.com/deletescape/jobtracker"
|
2021-10-20 19:57:29 +01:00
|
|
|
"github.com/phuslu/log"
|
|
|
|
"github.com/valyala/fasthttp"
|
2021-10-21 00:36:09 +01:00
|
|
|
"gopkg.in/ini.v1"
|
2020-10-31 11:02:59 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
var refRegex = regexp.MustCompile(`(?m)(refs(/[a-zA-Z0-9\-\.\_\*]+)+)`)
|
2020-11-21 18:47:49 +00:00
|
|
|
var branchRegex = regexp.MustCompile(`(?m)branch ["'](.+)["']`)
|
2020-10-31 11:02:59 +00:00
|
|
|
|
2020-11-12 13:32:12 +00:00
|
|
|
var checkedRefs = make(map[string]bool)
|
|
|
|
var checkedRefsMutex sync.Mutex
|
|
|
|
|
2021-10-24 17:52:27 +01:00
|
|
|
type FindRefContext struct {
|
|
|
|
C *fasthttp.Client
|
|
|
|
BaseUrl string
|
|
|
|
BaseDir string
|
2021-10-20 19:57:29 +01:00
|
|
|
}
|
|
|
|
|
2021-10-24 17:52:27 +01:00
|
|
|
func FindRefWorker(jt *jobtracker.JobTracker, path string, context jobtracker.Context) {
|
|
|
|
c := context.(FindRefContext)
|
2021-10-20 19:57:29 +01:00
|
|
|
|
2021-10-20 23:35:54 +01:00
|
|
|
checkRatelimted()
|
2021-10-20 19:57:29 +01:00
|
|
|
|
|
|
|
checkedRefsMutex.Lock()
|
|
|
|
if checked, ok := checkedRefs[path]; checked && ok {
|
|
|
|
// Ref has already been checked
|
|
|
|
checkedRefsMutex.Unlock()
|
|
|
|
return
|
|
|
|
} else {
|
|
|
|
checkedRefs[path] = true
|
|
|
|
}
|
2021-10-20 21:14:07 +01:00
|
|
|
checkedRefsMutex.Unlock()
|
2021-10-20 19:57:29 +01:00
|
|
|
|
2021-10-24 17:52:27 +01:00
|
|
|
targetFile := utils.Url(c.BaseDir, path)
|
2021-10-20 19:57:29 +01:00
|
|
|
if utils.Exists(targetFile) {
|
|
|
|
log.Info().Str("file", targetFile).Msg("already fetched, skipping redownload")
|
|
|
|
content, err := ioutil.ReadFile(targetFile)
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Str("file", targetFile).Err(err).Msg("error while reading file")
|
2021-10-21 00:36:09 +01:00
|
|
|
return
|
2021-10-20 19:57:29 +01:00
|
|
|
}
|
|
|
|
for _, ref := range refRegex.FindAll(content, -1) {
|
|
|
|
jt.AddJob(utils.Url(".git", string(ref)))
|
|
|
|
jt.AddJob(utils.Url(".git/logs", string(ref)))
|
|
|
|
}
|
2021-10-21 00:36:09 +01:00
|
|
|
if path == ".git/FETCH_HEAD" {
|
|
|
|
// TODO figure out actual remote instead of just assuming origin here (if possible)
|
2021-10-20 19:57:29 +01:00
|
|
|
for _, branch := range branchRegex.FindAllSubmatch(content, -1) {
|
2021-10-21 00:36:09 +01:00
|
|
|
jt.AddJob(fmt.Sprintf(".git/refs/remotes/origin/%s", branch[1]))
|
|
|
|
jt.AddJob(fmt.Sprintf(".git/logs/refs/remotes/origin/%s", branch[1]))
|
|
|
|
}
|
|
|
|
}
|
2021-10-26 19:44:43 +01:00
|
|
|
if path == ".git/config" || path == ".git/config.worktree" {
|
2021-10-21 00:36:09 +01:00
|
|
|
cfg, err := ini.Load(content)
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Str("file", targetFile).Err(err).Msg("failed to parse git config")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for _, sec := range cfg.Sections() {
|
|
|
|
if strings.HasPrefix(sec.Name(), "branch ") {
|
|
|
|
parts := strings.SplitN(sec.Name(), " ", 2)
|
|
|
|
branch := strings.Trim(parts[1], `"`)
|
|
|
|
remote := sec.Key("remote").String()
|
|
|
|
|
|
|
|
jt.AddJob(fmt.Sprintf(".git/refs/remotes/%s/%s", remote, branch))
|
|
|
|
jt.AddJob(fmt.Sprintf(".git/logs/refs/remotes/%s/%s", remote, branch))
|
|
|
|
}
|
2021-10-20 19:57:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-10-24 17:52:27 +01:00
|
|
|
uri := utils.Url(c.BaseUrl, path)
|
|
|
|
code, body, err := c.C.Get(nil, uri)
|
2021-10-20 19:57:29 +01:00
|
|
|
if err == nil && code != 200 {
|
|
|
|
if code == 429 {
|
|
|
|
setRatelimited()
|
|
|
|
jt.AddJob(path)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
log.Warn().Str("uri", uri).Int("code", code).Msg("failed to fetch ref")
|
|
|
|
return
|
|
|
|
} else if err != nil {
|
|
|
|
log.Error().Str("uri", uri).Int("code", code).Err(err).Msg("failed to fetch ref")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if utils.IsHtml(body) {
|
|
|
|
log.Warn().Str("uri", uri).Msg("file appears to be html, skipping")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if utils.IsEmptyBytes(body) {
|
|
|
|
log.Warn().Str("uri", uri).Msg("file appears to be empty, skipping")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if err := utils.CreateParentFolders(targetFile); err != nil {
|
|
|
|
log.Error().Str("uri", uri).Str("file", targetFile).Err(err).Msg("couldn't create parent directories")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if err := ioutil.WriteFile(targetFile, body, os.ModePerm); err != nil {
|
|
|
|
log.Error().Str("uri", uri).Str("file", targetFile).Err(err).Msg("clouldn't write file")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info().Str("uri", uri).Msg("fetched ref")
|
|
|
|
|
|
|
|
for _, ref := range refRegex.FindAll(body, -1) {
|
|
|
|
jt.AddJob(utils.Url(".git", string(ref)))
|
|
|
|
jt.AddJob(utils.Url(".git/logs", string(ref)))
|
|
|
|
}
|
2021-10-21 00:36:09 +01:00
|
|
|
if path == ".git/FETCH_HEAD" {
|
|
|
|
// TODO figure out actual remote instead of just assuming origin here (if possible)
|
2021-10-20 19:57:29 +01:00
|
|
|
for _, branch := range branchRegex.FindAllSubmatch(body, -1) {
|
2021-10-21 00:36:09 +01:00
|
|
|
jt.AddJob(fmt.Sprintf(".git/refs/remotes/origin/%s", branch[1]))
|
|
|
|
jt.AddJob(fmt.Sprintf(".git/logs/refs/remotes/origin/%s", branch[1]))
|
|
|
|
}
|
|
|
|
}
|
2021-10-26 19:44:43 +01:00
|
|
|
if path == ".git/config" || path == ".git/config.worktree" {
|
2021-10-21 00:36:09 +01:00
|
|
|
cfg, err := ini.Load(body)
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Str("file", targetFile).Err(err).Msg("failed to parse git config")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for _, sec := range cfg.Sections() {
|
|
|
|
if strings.HasPrefix(sec.Name(), "branch ") {
|
|
|
|
parts := strings.SplitN(sec.Name(), " ", 2)
|
|
|
|
branch := strings.Trim(parts[1], `"`)
|
|
|
|
remote := sec.Key("remote").String()
|
|
|
|
|
|
|
|
jt.AddJob(fmt.Sprintf(".git/refs/remotes/%s/%s", remote, branch))
|
|
|
|
jt.AddJob(fmt.Sprintf(".git/logs/refs/remotes/%s/%s", remote, branch))
|
|
|
|
}
|
2020-10-31 11:02:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|