From 2611534de0662199a5feed03c1a0b97a7fdac564 Mon Sep 17 00:00:00 2001 From: Ainar Garipov Date: Thu, 21 Mar 2024 18:45:34 +0300 Subject: [PATCH] Pull request 2182: AG-20945-rule-list-id Squashed commit of the following: commit 87bad8c1c202902f344ad75c7b767f03d5123a4a Author: Ainar Garipov Date: Thu Mar 21 16:39:12 2024 +0300 all: imp lint, names, tests commit 284f8c7cc0c26d443342ad3d39007152714c0799 Author: Ainar Garipov Date: Thu Mar 21 15:37:54 2024 +0300 filtering: imp id handling --- CHANGELOG.md | 3 + SECURITY.md | 2 +- bamboo-specs/test.yaml | 9 +- internal/filtering/dnsrewrite.go | 6 +- internal/filtering/filter.go | 31 ++----- internal/filtering/filtering.go | 18 ++-- internal/filtering/http.go | 21 ++--- internal/filtering/idgenerator.go | 73 +++++++++++++++ .../filtering/idgenerator_internal_test.go | 88 +++++++++++++++++++ internal/filtering/rulelist/rulelist.go | 14 ++- .../filtering/safesearch/safesearch_test.go | 8 +- internal/querylog/decode.go | 8 +- 12 files changed, 222 insertions(+), 59 deletions(-) create mode 100644 internal/filtering/idgenerator.go create mode 100644 internal/filtering/idgenerator_internal_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index ce48676f..59a30bf1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ NOTE: Add new changes BELOW THIS COMMENT. ### Deprecated +- Currently, AdGuard Home uses a best-effort algorithm to fix invalid IDs of + filtering-rule lists on startup. This feature is deprecated, and invalid IDs + will cause errors on startup in a future version. - Node.JS 16. Future versions will require at least Node.JS 18 to build. [#5829]: https://github.com/AdguardTeam/AdGuardHome/issues/5829 diff --git a/SECURITY.md b/SECURITY.md index a9985fd4..e9415e2f 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -8,6 +8,6 @@ Please send your vulnerability reports to . To make sure > AdGuard Home API vulnerability: possible XSS attack -2. Make sure that the message body contains a clear description of the vulnerability. +1. Make sure that the message body contains a clear description of the vulnerability. If you have not received a reply to your email within 7 days, please make sure to follow up with us again at . Once again, make sure that the word “vulnerability” is in the subject line. diff --git a/bamboo-specs/test.yaml b/bamboo-specs/test.yaml index 29c15e78..70415e4a 100644 --- a/bamboo-specs/test.yaml +++ b/bamboo-specs/test.yaml @@ -74,7 +74,14 @@ set -e -f -u -x - make VERBOSE=1 go-deps go-tools go-lint go-test + make\ + GOMAXPROCS=1\ + VERBOSE=1\ + go-deps go-tools go-lint + + make\ + VERBOSE=1\ + go-test 'final-tasks': - 'clean' 'requirements': diff --git a/internal/filtering/dnsrewrite.go b/internal/filtering/dnsrewrite.go index 19b964a2..bba2f701 100644 --- a/internal/filtering/dnsrewrite.go +++ b/internal/filtering/dnsrewrite.go @@ -30,7 +30,7 @@ func (d *DNSFilter) processDNSRewrites(dnsr []*rules.NetworkRule) (res Result) { if dr.NewCNAME != "" { // NewCNAME rules have a higher priority than other rules. rules = []*ResultRule{{ - FilterListID: int64(nr.GetFilterListID()), + FilterListID: nr.GetFilterListID(), Text: nr.RuleText, }} @@ -46,14 +46,14 @@ func (d *DNSFilter) processDNSRewrites(dnsr []*rules.NetworkRule) (res Result) { dnsrr.RCode = dr.RCode dnsrr.Response[dr.RRType] = append(dnsrr.Response[dr.RRType], dr.Value) rules = append(rules, &ResultRule{ - FilterListID: int64(nr.GetFilterListID()), + FilterListID: nr.GetFilterListID(), Text: nr.RuleText, }) default: // RcodeRefused and other such codes have higher priority. Return // immediately. rules = []*ResultRule{{ - FilterListID: int64(nr.GetFilterListID()), + FilterListID: nr.GetFilterListID(), Text: nr.RuleText, }} dnsrr = &DNSRewriteResult{ diff --git a/internal/filtering/filter.go b/internal/filtering/filter.go index ce5a7e13..e134175b 100644 --- a/internal/filtering/filter.go +++ b/internal/filtering/filter.go @@ -22,11 +22,6 @@ import ( // filters. const filterDir = "filters" -// nextFilterID is a way to seed a unique ID generation. -// -// TODO(e.burkov): Use more deterministic approach. -var nextFilterID = time.Now().Unix() - // FilterYAML represents a filter list in the configuration file. // // TODO(e.burkov): Investigate if the field ordering is important. @@ -50,7 +45,10 @@ func (filter *FilterYAML) unload() { // Path to the filter contents func (filter *FilterYAML) Path(dataDir string) string { - return filepath.Join(dataDir, filterDir, strconv.FormatInt(filter.ID, 10)+".txt") + return filepath.Join( + dataDir, + filterDir, + strconv.FormatInt(int64(filter.ID), 10)+".txt") } // ensureName sets provided title or default name for the filter if it doesn't @@ -217,7 +215,10 @@ func (d *DNSFilter) loadFilters(array []FilterYAML) { for i := range array { filter := &array[i] // otherwise we're operating on a copy if filter.ID == 0 { - filter.ID = assignUniqueFilterID() + newID := d.idGen.next() + log.Info("filtering: warning: filter at index %d has no id; assigning to %d", i, newID) + + filter.ID = newID } if !filter.Enabled { @@ -247,22 +248,6 @@ func deduplicateFilters(filters []FilterYAML) (deduplicated []FilterYAML) { return filters[:lastIdx] } -// Set the next filter ID to max(filter.ID) + 1 -func updateUniqueFilterID(filters []FilterYAML) { - for _, filter := range filters { - if nextFilterID < filter.ID { - nextFilterID = filter.ID + 1 - } - } -} - -// TODO(e.burkov): Improve this inexhaustible source of races. -func assignUniqueFilterID() int64 { - value := nextFilterID - nextFilterID++ - return value -} - // tryRefreshFilters is like [refreshFilters], but backs down if the update is // already going on. // diff --git a/internal/filtering/filtering.go b/internal/filtering/filtering.go index 2203fe85..df2f4bc7 100644 --- a/internal/filtering/filtering.go +++ b/internal/filtering/filtering.go @@ -219,6 +219,9 @@ type Checker interface { // DNSFilter matches hostnames and DNS requests against filtering rules. type DNSFilter struct { + // idGen is used to generate IDs for package urlfilter. + idGen *idGenerator + // bufPool is a pool of buffers used for filtering-rule list parsing. bufPool *syncutil.Pool[[]byte] @@ -265,7 +268,7 @@ type Filter struct { Data []byte `yaml:"-"` // ID is automatically assigned when filter is added using nextFilterID. - ID int64 `yaml:"id"` + ID rulelist.URLFilterID `yaml:"id"` } // Reason holds an enum detailing why it was filtered or not filtered @@ -517,11 +520,13 @@ func (d *DNSFilter) ParentalBlockHost() (host string) { type ResultRule struct { // Text is the text of the rule. Text string `json:",omitempty"` + // IP is the host IP. It is nil unless the rule uses the // /etc/hosts syntax or the reason is FilteredSafeSearch. IP netip.Addr `json:",omitempty"` + // FilterListID is the ID of the rule's filter list. - FilterListID int64 `json:",omitempty"` + FilterListID rulelist.URLFilterID `json:",omitempty"` } // Result contains the result of a request check. @@ -692,7 +697,7 @@ func matchBlockedServicesRules( ruleText := rule.Text() res.Rules = []*ResultRule{{ - FilterListID: int64(rule.GetFilterListID()), + FilterListID: rule.GetFilterListID(), Text: ruleText, }} @@ -957,7 +962,7 @@ func makeResult(matchedRules []rules.Rule, reason Reason) (res Result) { resRules := make([]*ResultRule, len(matchedRules)) for i, mr := range matchedRules { resRules[i] = &ResultRule{ - FilterListID: int64(mr.GetFilterListID()), + FilterListID: mr.GetFilterListID(), Text: mr.Text(), } } @@ -978,6 +983,7 @@ func InitModule() { // be non-nil. func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) { d = &DNSFilter{ + idGen: newIDGenerator(int32(time.Now().Unix())), bufPool: syncutil.NewSlicePool[byte](rulelist.DefaultRuleBufSize), refreshLock: &sync.Mutex{}, safeBrowsingChecker: c.SafeBrowsingChecker, @@ -1041,8 +1047,8 @@ func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) { d.conf.Filters = deduplicateFilters(d.conf.Filters) d.conf.WhitelistFilters = deduplicateFilters(d.conf.WhitelistFilters) - updateUniqueFilterID(d.conf.Filters) - updateUniqueFilterID(d.conf.WhitelistFilters) + d.idGen.fix(d.conf.Filters) + d.idGen.fix(d.conf.WhitelistFilters) return d, nil } diff --git a/internal/filtering/http.go b/internal/filtering/http.go index 310338c0..3a13ccb3 100644 --- a/internal/filtering/http.go +++ b/internal/filtering/http.go @@ -13,6 +13,7 @@ import ( "time" "github.com/AdguardTeam/AdGuardHome/internal/aghhttp" + "github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist" "github.com/AdguardTeam/golibs/errors" "github.com/AdguardTeam/golibs/log" "github.com/miekg/dns" @@ -86,7 +87,7 @@ func (d *DNSFilter) handleFilteringAddURL(w http.ResponseWriter, r *http.Request Name: fj.Name, white: fj.Whitelist, Filter: Filter{ - ID: assignUniqueFilterID(), + ID: d.idGen.next(), }, } @@ -307,12 +308,12 @@ func (d *DNSFilter) handleFilteringRefresh(w http.ResponseWriter, r *http.Reques } type filterJSON struct { - URL string `json:"url"` - Name string `json:"name"` - LastUpdated string `json:"last_updated,omitempty"` - ID int64 `json:"id"` - RulesCount uint32 `json:"rules_count"` - Enabled bool `json:"enabled"` + URL string `json:"url"` + Name string `json:"name"` + LastUpdated string `json:"last_updated,omitempty"` + ID rulelist.URLFilterID `json:"id"` + RulesCount uint32 `json:"rules_count"` + Enabled bool `json:"enabled"` } type filteringConfig struct { @@ -388,8 +389,8 @@ func (d *DNSFilter) handleFilteringConfig(w http.ResponseWriter, r *http.Request } type checkHostRespRule struct { - Text string `json:"text"` - FilterListID int64 `json:"filter_list_id"` + Text string `json:"text"` + FilterListID rulelist.URLFilterID `json:"filter_list_id"` } type checkHostResp struct { @@ -412,7 +413,7 @@ type checkHostResp struct { // FilterID is the ID of the rule's filter list. // // Deprecated: Use Rules[*].FilterListID. - FilterID int64 `json:"filter_id"` + FilterID rulelist.URLFilterID `json:"filter_id"` } func (d *DNSFilter) handleCheckHost(w http.ResponseWriter, r *http.Request) { diff --git a/internal/filtering/idgenerator.go b/internal/filtering/idgenerator.go new file mode 100644 index 00000000..b7c0544b --- /dev/null +++ b/internal/filtering/idgenerator.go @@ -0,0 +1,73 @@ +package filtering + +import ( + "fmt" + "sync/atomic" + + "github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist" + "github.com/AdguardTeam/golibs/log" +) + +// idGenerator generates filtering-list IDs in a way broadly compatible with the +// legacy approach of AdGuard Home. +// +// TODO(a.garipov): Get rid of this once we switch completely to the new +// rule-list architecture. +type idGenerator struct { + current *atomic.Int32 +} + +// newIDGenerator returns a new ID generator initialized with the given seed +// value. +func newIDGenerator(seed int32) (g *idGenerator) { + g = &idGenerator{ + current: &atomic.Int32{}, + } + + g.current.Store(seed) + + return g +} + +// next returns the next ID from the generator. It is safe for concurrent use. +func (g *idGenerator) next() (id rulelist.URLFilterID) { + id32 := g.current.Add(1) + if id32 < 0 { + panic(fmt.Errorf("invalid current id value %d", id32)) + } + + return rulelist.URLFilterID(id32) +} + +// fix ensures that flts all have unique IDs. +func (g *idGenerator) fix(flts []FilterYAML) { + set := map[rulelist.URLFilterID]struct{}{} + for i, f := range flts { + id := f.ID + if id == 0 { + id = g.next() + flts[i].ID = id + } + + if _, ok := set[id]; !ok { + set[id] = struct{}{} + + continue + } + + newID := g.next() + for _, ok := set[newID]; ok; _, ok = set[newID] { + newID = g.next() + } + + log.Info( + "filtering: warning: filter at index %d has duplicate id %d; reassigning to %d", + i, + id, + newID, + ) + + flts[i].ID = newID + set[newID] = struct{}{} + } +} diff --git a/internal/filtering/idgenerator_internal_test.go b/internal/filtering/idgenerator_internal_test.go new file mode 100644 index 00000000..28dc5dea --- /dev/null +++ b/internal/filtering/idgenerator_internal_test.go @@ -0,0 +1,88 @@ +package filtering + +import ( + "testing" + + "github.com/AdguardTeam/AdGuardHome/internal/aghalg" + "github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist" + "github.com/stretchr/testify/assert" +) + +func TestIDGenerator_Fix(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + in []FilterYAML + }{{ + name: "nil", + in: nil, + }, { + name: "empty", + in: []FilterYAML{}, + }, { + name: "one_zero", + in: []FilterYAML{{}}, + }, { + name: "two_zeros", + in: []FilterYAML{{}, {}}, + }, { + name: "many_good", + in: []FilterYAML{{ + Filter: Filter{ + ID: 1, + }, + }, { + Filter: Filter{ + ID: 2, + }, + }, { + Filter: Filter{ + ID: 3, + }, + }}, + }, { + name: "two_dups", + in: []FilterYAML{{ + Filter: Filter{ + ID: 1, + }, + }, { + Filter: Filter{ + ID: 3, + }, + }, { + Filter: Filter{ + ID: 1, + }, + }, { + Filter: Filter{ + ID: 2, + }, + }}, + }} + + for _, tc := range testCases { + tc := tc + + t.Run(tc.name, func(t *testing.T) { + g := newIDGenerator(1) + g.fix(tc.in) + + assertUniqueIDs(t, tc.in) + }) + } +} + +// assertUniqueIDs is a test helper that asserts that the IDs of filters are +// unique. +func assertUniqueIDs(t testing.TB, flts []FilterYAML) { + t.Helper() + + uc := aghalg.UniqChecker[rulelist.URLFilterID]{} + for _, f := range flts { + uc.Add(f.ID) + } + + assert.NoError(t, uc.Validate()) +} diff --git a/internal/filtering/rulelist/rulelist.go b/internal/filtering/rulelist/rulelist.go index b9505e91..98981ecd 100644 --- a/internal/filtering/rulelist/rulelist.go +++ b/internal/filtering/rulelist/rulelist.go @@ -23,8 +23,6 @@ const DefaultMaxRuleListSize = 64 * datasize.MB // URLFilterID is a semantic type-alias for IDs used for working with package // urlfilter. -// -// TODO(a.garipov): Use everywhere in package filtering. type URLFilterID = int // The IDs of built-in filter lists. @@ -37,12 +35,12 @@ type URLFilterID = int // // TODO(d.kolyshev): Add URLFilterIDLegacyRewrite here and to the UI. const ( - URLFilterIDCustom = 0 - URLFilterIDEtcHosts = -1 - URLFilterIDBlockedService = -2 - URLFilterIDParentalControl = -3 - URLFilterIDSafeBrowsing = -4 - URLFilterIDSafeSearch = -5 + URLFilterIDCustom URLFilterID = 0 + URLFilterIDEtcHosts URLFilterID = -1 + URLFilterIDBlockedService URLFilterID = -2 + URLFilterIDParentalControl URLFilterID = -3 + URLFilterIDSafeBrowsing URLFilterID = -4 + URLFilterIDSafeSearch URLFilterID = -5 ) // UID is the type for the unique IDs of filtering-rule lists. diff --git a/internal/filtering/safesearch/safesearch_test.go b/internal/filtering/safesearch/safesearch_test.go index 1ad4bf11..127b2ae1 100644 --- a/internal/filtering/safesearch/safesearch_test.go +++ b/internal/filtering/safesearch/safesearch_test.go @@ -70,7 +70,7 @@ func TestDefault_CheckHost_yandex(t *testing.T) { require.Len(t, res.Rules, 1) assert.Equal(t, yandexIP, res.Rules[0].IP) - assert.EqualValues(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID) + assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID) } } @@ -90,7 +90,7 @@ func TestDefault_CheckHost_yandexAAAA(t *testing.T) { require.Len(t, res.Rules, 1) assert.Empty(t, res.Rules[0].IP) - assert.EqualValues(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID) + assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID) } func TestDefault_CheckHost_google(t *testing.T) { @@ -129,7 +129,7 @@ func TestDefault_CheckHost_google(t *testing.T) { require.Len(t, res.Rules, 1) assert.Equal(t, wantIP, res.Rules[0].IP) - assert.EqualValues(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID) + assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID) }) } } @@ -181,7 +181,7 @@ func TestDefault_CheckHost_duckduckgoAAAA(t *testing.T) { require.Len(t, res.Rules, 1) assert.Empty(t, res.Rules[0].IP) - assert.EqualValues(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID) + assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID) } func TestDefault_Update(t *testing.T) { diff --git a/internal/querylog/decode.go b/internal/querylog/decode.go index f1ff5ad1..d4dea04e 100644 --- a/internal/querylog/decode.go +++ b/internal/querylog/decode.go @@ -11,6 +11,7 @@ import ( "time" "github.com/AdguardTeam/AdGuardHome/internal/filtering" + "github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist" "github.com/AdguardTeam/golibs/errors" "github.com/AdguardTeam/golibs/log" "github.com/AdguardTeam/urlfilter/rules" @@ -179,7 +180,8 @@ func decodeResultRuleKey(key string, i int, dec *json.Decoder, ent *logEntry) { case "FilterListID": ent.Result.Rules, vToken = decodeVTokenAndAddRule(key, i, dec, ent.Result.Rules) if n, ok := vToken.(json.Number); ok { - ent.Result.Rules[i].FilterListID, _ = n.Int64() + id, _ := n.Int64() + ent.Result.Rules[i].FilterListID = rulelist.URLFilterID(id) } case "IP": ent.Result.Rules, vToken = decodeVTokenAndAddRule(key, i, dec, ent.Result.Rules) @@ -582,7 +584,7 @@ var resultHandlers = map[string]logEntryHandler{ return nil } - i, err := n.Int64() + id, err := n.Int64() if err != nil { return err } @@ -593,7 +595,7 @@ var resultHandlers = map[string]logEntryHandler{ l++ } - ent.Result.Rules[l-1].FilterListID = i + ent.Result.Rules[l-1].FilterListID = rulelist.URLFilterID(id) return nil },