AdGuardHome/scripts/vetted-filters/main.go

182 lines
4.5 KiB
Go
Raw Normal View History

2022-11-02 13:18:02 +00:00
// vetted-filters fetches the most recent Hostlists Registry filtering rule list
// index and transforms the filters from it to AdGuard Home's format.
package main
import (
"bytes"
2024-07-03 13:38:37 +01:00
"context"
2022-11-02 13:18:02 +00:00
"encoding/json"
"fmt"
2024-07-03 13:38:37 +01:00
"log/slog"
2022-11-02 13:18:02 +00:00
"net/http"
"net/url"
"os"
"time"
2024-07-03 13:38:37 +01:00
"github.com/AdguardTeam/golibs/logutil/slogutil"
2023-07-26 11:18:44 +01:00
"github.com/google/renameio/v2/maybe"
2022-11-02 13:18:02 +00:00
)
func main() {
2024-07-03 13:38:37 +01:00
ctx := context.Background()
l := slogutil.New(nil)
2022-11-02 13:18:02 +00:00
urlStr := "https://adguardteam.github.io/HostlistsRegistry/assets/filters.json"
if v, ok := os.LookupEnv("URL"); ok {
urlStr = v
}
// Validate the URL.
_, err := url.Parse(urlStr)
check(err)
c := &http.Client{
Timeout: 10 * time.Second,
}
resp, err := c.Get(urlStr)
check(err)
2024-07-03 13:38:37 +01:00
defer slogutil.CloseAndLog(ctx, l, resp.Body, slog.LevelError)
2022-11-02 13:18:02 +00:00
if resp.StatusCode != http.StatusOK {
panic(fmt.Errorf("expected code %d, got %d", http.StatusOK, resp.StatusCode))
}
hlFlt := &hlFilters{}
err = json.NewDecoder(resp.Body).Decode(hlFlt)
check(err)
aghFlt := &aghFilters{
Categories: map[string]*aghFiltersCategory{
"general": {
Name: "filter_category_general",
Description: "filter_category_general_desc",
},
"other": {
Name: "filter_category_other",
Description: "filter_category_other_desc",
},
"regional": {
Name: "filter_category_regional",
Description: "filter_category_regional_desc",
},
"security": {
Name: "filter_category_security",
Description: "filter_category_security_desc",
},
},
Filters: map[string]*aghFiltersFilter{},
}
for i, f := range hlFlt.Filters {
2024-07-03 13:38:37 +01:00
key := f.FilterKey
2022-11-02 13:18:02 +00:00
cat := f.category()
if cat == "" {
2024-07-03 13:38:37 +01:00
l.WarnContext(ctx, "no fitting category for filter", "key", key, "idx", i)
2022-11-02 13:18:02 +00:00
}
2024-07-03 13:38:37 +01:00
aghFlt.Filters[key] = &aghFiltersFilter{
2022-11-02 13:18:02 +00:00
Name: f.Name,
CategoryID: cat,
Homepage: f.Homepage,
2022-12-07 13:46:59 +00:00
// NOTE: The source URL in filters.json is not guaranteed to contain
// the URL of the filtering rule list. So, use our mirror for the
// vetted blocklists, which are mostly guaranteed to be valid and
// available lists.
Source: f.DownloadURL,
2022-11-02 13:18:02 +00:00
}
}
buf := &bytes.Buffer{}
_, _ = buf.WriteString(jsHeader)
enc := json.NewEncoder(buf)
enc.SetIndent("", " ")
err = enc.Encode(aghFlt)
check(err)
2024-07-03 13:38:37 +01:00
err = maybe.WriteFile("client/src/helpers/filters/filters.ts", buf.Bytes(), 0o644)
2022-11-02 13:18:02 +00:00
check(err)
}
// jsHeader is the header for the generated JavaScript file. It informs the
// reader that the file is generated and disables some style-related eslint
// checks.
const jsHeader = `// Code generated by go run ./scripts/vetted-filters/main.go; DO NOT EDIT.
/* eslint quote-props: 'off', quotes: 'off', comma-dangle: 'off', semi: 'off' */
export default `
// check is a simple error-checking helper for scripts.
func check(err error) {
if err != nil {
panic(err)
}
}
// hlFilters is the JSON structure for the Hostlists Registry rule list index.
type hlFilters struct {
Filters []*hlFiltersFilter `json:"filters"`
}
// hlFiltersFilter is the JSON structure for a filter in the Hostlists Registry.
type hlFiltersFilter struct {
2024-07-03 13:38:37 +01:00
DownloadURL string `json:"downloadUrl"`
FilterKey string `json:"filterKey"`
Homepage string `json:"homepage"`
Name string `json:"name"`
Tags []int `json:"tags"`
2022-11-02 13:18:02 +00:00
}
2024-07-03 13:38:37 +01:00
// Known tag IDs. Keep in sync with tags/metadata.json in the source repo.
const (
tagIDGeneral = 1
tagIDSecurity = 2
tagIDRegional = 3
tagIDOther = 4
)
2022-11-02 13:18:02 +00:00
// category returns the AdGuard Home category for this filter. If there is no
// fitting category, cat is empty.
func (f *hlFiltersFilter) category() (cat string) {
for _, t := range f.Tags {
switch t {
2024-07-03 13:38:37 +01:00
case tagIDGeneral:
2022-11-02 13:18:02 +00:00
return "general"
2024-07-03 13:38:37 +01:00
case tagIDSecurity:
2022-11-02 13:18:02 +00:00
return "security"
2024-07-03 13:38:37 +01:00
case tagIDRegional:
return "regional"
case tagIDOther:
return "other"
2022-11-02 13:18:02 +00:00
}
}
return ""
}
// aghFilters is the JSON structure for AdGuard Home's list of vetted filtering
2024-07-03 13:38:37 +01:00
// rule list in file client/src/helpers/filters/filters.ts.
2022-11-02 13:18:02 +00:00
type aghFilters struct {
Categories map[string]*aghFiltersCategory `json:"categories"`
Filters map[string]*aghFiltersFilter `json:"filters"`
}
// aghFiltersCategory is the JSON structure for a category in the vetted
// filtering rule list file.
type aghFiltersCategory struct {
Name string `json:"name"`
Description string `json:"description"`
}
// aghFiltersFilter is the JSON structure for a filter in the vetted filtering
// rule list file.
type aghFiltersFilter struct {
Name string `json:"name"`
CategoryID string `json:"categoryId"`
Homepage string `json:"homepage"`
Source string `json:"source"`
}