Pull request 2182: AG-20945-rule-list-id

Squashed commit of the following:

commit 87bad8c1c2
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date:   Thu Mar 21 16:39:12 2024 +0300

    all: imp lint, names, tests

commit 284f8c7cc0
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date:   Thu Mar 21 15:37:54 2024 +0300

    filtering: imp id handling
This commit is contained in:
Ainar Garipov 2024-03-21 18:45:34 +03:00
parent 70c88f2ba2
commit 2611534de0
12 changed files with 222 additions and 59 deletions

View File

@ -30,6 +30,9 @@ NOTE: Add new changes BELOW THIS COMMENT.
### Deprecated
- Currently, AdGuard Home uses a best-effort algorithm to fix invalid IDs of
filtering-rule lists on startup. This feature is deprecated, and invalid IDs
will cause errors on startup in a future version.
- Node.JS 16. Future versions will require at least Node.JS 18 to build.
[#5829]: https://github.com/AdguardTeam/AdGuardHome/issues/5829

View File

@ -8,6 +8,6 @@ Please send your vulnerability reports to <security@adguard.com>. To make sure
> AdGuard Home API vulnerability: possible XSS attack
2. Make sure that the message body contains a clear description of the vulnerability.
1. Make sure that the message body contains a clear description of the vulnerability.
If you have not received a reply to your email within 7 days, please make sure to follow up with us again at <security@adguard.com>. Once again, make sure that the word “vulnerability” is in the subject line.

View File

@ -74,7 +74,14 @@
set -e -f -u -x
make VERBOSE=1 go-deps go-tools go-lint go-test
make\
GOMAXPROCS=1\
VERBOSE=1\
go-deps go-tools go-lint
make\
VERBOSE=1\
go-test
'final-tasks':
- 'clean'
'requirements':

View File

@ -30,7 +30,7 @@ func (d *DNSFilter) processDNSRewrites(dnsr []*rules.NetworkRule) (res Result) {
if dr.NewCNAME != "" {
// NewCNAME rules have a higher priority than other rules.
rules = []*ResultRule{{
FilterListID: int64(nr.GetFilterListID()),
FilterListID: nr.GetFilterListID(),
Text: nr.RuleText,
}}
@ -46,14 +46,14 @@ func (d *DNSFilter) processDNSRewrites(dnsr []*rules.NetworkRule) (res Result) {
dnsrr.RCode = dr.RCode
dnsrr.Response[dr.RRType] = append(dnsrr.Response[dr.RRType], dr.Value)
rules = append(rules, &ResultRule{
FilterListID: int64(nr.GetFilterListID()),
FilterListID: nr.GetFilterListID(),
Text: nr.RuleText,
})
default:
// RcodeRefused and other such codes have higher priority. Return
// immediately.
rules = []*ResultRule{{
FilterListID: int64(nr.GetFilterListID()),
FilterListID: nr.GetFilterListID(),
Text: nr.RuleText,
}}
dnsrr = &DNSRewriteResult{

View File

@ -22,11 +22,6 @@ import (
// filters.
const filterDir = "filters"
// nextFilterID is a way to seed a unique ID generation.
//
// TODO(e.burkov): Use more deterministic approach.
var nextFilterID = time.Now().Unix()
// FilterYAML represents a filter list in the configuration file.
//
// TODO(e.burkov): Investigate if the field ordering is important.
@ -50,7 +45,10 @@ func (filter *FilterYAML) unload() {
// Path to the filter contents
func (filter *FilterYAML) Path(dataDir string) string {
return filepath.Join(dataDir, filterDir, strconv.FormatInt(filter.ID, 10)+".txt")
return filepath.Join(
dataDir,
filterDir,
strconv.FormatInt(int64(filter.ID), 10)+".txt")
}
// ensureName sets provided title or default name for the filter if it doesn't
@ -217,7 +215,10 @@ func (d *DNSFilter) loadFilters(array []FilterYAML) {
for i := range array {
filter := &array[i] // otherwise we're operating on a copy
if filter.ID == 0 {
filter.ID = assignUniqueFilterID()
newID := d.idGen.next()
log.Info("filtering: warning: filter at index %d has no id; assigning to %d", i, newID)
filter.ID = newID
}
if !filter.Enabled {
@ -247,22 +248,6 @@ func deduplicateFilters(filters []FilterYAML) (deduplicated []FilterYAML) {
return filters[:lastIdx]
}
// Set the next filter ID to max(filter.ID) + 1
func updateUniqueFilterID(filters []FilterYAML) {
for _, filter := range filters {
if nextFilterID < filter.ID {
nextFilterID = filter.ID + 1
}
}
}
// TODO(e.burkov): Improve this inexhaustible source of races.
func assignUniqueFilterID() int64 {
value := nextFilterID
nextFilterID++
return value
}
// tryRefreshFilters is like [refreshFilters], but backs down if the update is
// already going on.
//

View File

@ -219,6 +219,9 @@ type Checker interface {
// DNSFilter matches hostnames and DNS requests against filtering rules.
type DNSFilter struct {
// idGen is used to generate IDs for package urlfilter.
idGen *idGenerator
// bufPool is a pool of buffers used for filtering-rule list parsing.
bufPool *syncutil.Pool[[]byte]
@ -265,7 +268,7 @@ type Filter struct {
Data []byte `yaml:"-"`
// ID is automatically assigned when filter is added using nextFilterID.
ID int64 `yaml:"id"`
ID rulelist.URLFilterID `yaml:"id"`
}
// Reason holds an enum detailing why it was filtered or not filtered
@ -517,11 +520,13 @@ func (d *DNSFilter) ParentalBlockHost() (host string) {
type ResultRule struct {
// Text is the text of the rule.
Text string `json:",omitempty"`
// IP is the host IP. It is nil unless the rule uses the
// /etc/hosts syntax or the reason is FilteredSafeSearch.
IP netip.Addr `json:",omitempty"`
// FilterListID is the ID of the rule's filter list.
FilterListID int64 `json:",omitempty"`
FilterListID rulelist.URLFilterID `json:",omitempty"`
}
// Result contains the result of a request check.
@ -692,7 +697,7 @@ func matchBlockedServicesRules(
ruleText := rule.Text()
res.Rules = []*ResultRule{{
FilterListID: int64(rule.GetFilterListID()),
FilterListID: rule.GetFilterListID(),
Text: ruleText,
}}
@ -957,7 +962,7 @@ func makeResult(matchedRules []rules.Rule, reason Reason) (res Result) {
resRules := make([]*ResultRule, len(matchedRules))
for i, mr := range matchedRules {
resRules[i] = &ResultRule{
FilterListID: int64(mr.GetFilterListID()),
FilterListID: mr.GetFilterListID(),
Text: mr.Text(),
}
}
@ -978,6 +983,7 @@ func InitModule() {
// be non-nil.
func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) {
d = &DNSFilter{
idGen: newIDGenerator(int32(time.Now().Unix())),
bufPool: syncutil.NewSlicePool[byte](rulelist.DefaultRuleBufSize),
refreshLock: &sync.Mutex{},
safeBrowsingChecker: c.SafeBrowsingChecker,
@ -1041,8 +1047,8 @@ func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) {
d.conf.Filters = deduplicateFilters(d.conf.Filters)
d.conf.WhitelistFilters = deduplicateFilters(d.conf.WhitelistFilters)
updateUniqueFilterID(d.conf.Filters)
updateUniqueFilterID(d.conf.WhitelistFilters)
d.idGen.fix(d.conf.Filters)
d.idGen.fix(d.conf.WhitelistFilters)
return d, nil
}

View File

@ -13,6 +13,7 @@ import (
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghhttp"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
"github.com/miekg/dns"
@ -86,7 +87,7 @@ func (d *DNSFilter) handleFilteringAddURL(w http.ResponseWriter, r *http.Request
Name: fj.Name,
white: fj.Whitelist,
Filter: Filter{
ID: assignUniqueFilterID(),
ID: d.idGen.next(),
},
}
@ -307,12 +308,12 @@ func (d *DNSFilter) handleFilteringRefresh(w http.ResponseWriter, r *http.Reques
}
type filterJSON struct {
URL string `json:"url"`
Name string `json:"name"`
LastUpdated string `json:"last_updated,omitempty"`
ID int64 `json:"id"`
RulesCount uint32 `json:"rules_count"`
Enabled bool `json:"enabled"`
URL string `json:"url"`
Name string `json:"name"`
LastUpdated string `json:"last_updated,omitempty"`
ID rulelist.URLFilterID `json:"id"`
RulesCount uint32 `json:"rules_count"`
Enabled bool `json:"enabled"`
}
type filteringConfig struct {
@ -388,8 +389,8 @@ func (d *DNSFilter) handleFilteringConfig(w http.ResponseWriter, r *http.Request
}
type checkHostRespRule struct {
Text string `json:"text"`
FilterListID int64 `json:"filter_list_id"`
Text string `json:"text"`
FilterListID rulelist.URLFilterID `json:"filter_list_id"`
}
type checkHostResp struct {
@ -412,7 +413,7 @@ type checkHostResp struct {
// FilterID is the ID of the rule's filter list.
//
// Deprecated: Use Rules[*].FilterListID.
FilterID int64 `json:"filter_id"`
FilterID rulelist.URLFilterID `json:"filter_id"`
}
func (d *DNSFilter) handleCheckHost(w http.ResponseWriter, r *http.Request) {

View File

@ -0,0 +1,73 @@
package filtering
import (
"fmt"
"sync/atomic"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/log"
)
// idGenerator generates filtering-list IDs in a way broadly compatible with the
// legacy approach of AdGuard Home.
//
// TODO(a.garipov): Get rid of this once we switch completely to the new
// rule-list architecture.
type idGenerator struct {
current *atomic.Int32
}
// newIDGenerator returns a new ID generator initialized with the given seed
// value.
func newIDGenerator(seed int32) (g *idGenerator) {
g = &idGenerator{
current: &atomic.Int32{},
}
g.current.Store(seed)
return g
}
// next returns the next ID from the generator. It is safe for concurrent use.
func (g *idGenerator) next() (id rulelist.URLFilterID) {
id32 := g.current.Add(1)
if id32 < 0 {
panic(fmt.Errorf("invalid current id value %d", id32))
}
return rulelist.URLFilterID(id32)
}
// fix ensures that flts all have unique IDs.
func (g *idGenerator) fix(flts []FilterYAML) {
set := map[rulelist.URLFilterID]struct{}{}
for i, f := range flts {
id := f.ID
if id == 0 {
id = g.next()
flts[i].ID = id
}
if _, ok := set[id]; !ok {
set[id] = struct{}{}
continue
}
newID := g.next()
for _, ok := set[newID]; ok; _, ok = set[newID] {
newID = g.next()
}
log.Info(
"filtering: warning: filter at index %d has duplicate id %d; reassigning to %d",
i,
id,
newID,
)
flts[i].ID = newID
set[newID] = struct{}{}
}
}

View File

@ -0,0 +1,88 @@
package filtering
import (
"testing"
"github.com/AdguardTeam/AdGuardHome/internal/aghalg"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/stretchr/testify/assert"
)
func TestIDGenerator_Fix(t *testing.T) {
t.Parallel()
testCases := []struct {
name string
in []FilterYAML
}{{
name: "nil",
in: nil,
}, {
name: "empty",
in: []FilterYAML{},
}, {
name: "one_zero",
in: []FilterYAML{{}},
}, {
name: "two_zeros",
in: []FilterYAML{{}, {}},
}, {
name: "many_good",
in: []FilterYAML{{
Filter: Filter{
ID: 1,
},
}, {
Filter: Filter{
ID: 2,
},
}, {
Filter: Filter{
ID: 3,
},
}},
}, {
name: "two_dups",
in: []FilterYAML{{
Filter: Filter{
ID: 1,
},
}, {
Filter: Filter{
ID: 3,
},
}, {
Filter: Filter{
ID: 1,
},
}, {
Filter: Filter{
ID: 2,
},
}},
}}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
g := newIDGenerator(1)
g.fix(tc.in)
assertUniqueIDs(t, tc.in)
})
}
}
// assertUniqueIDs is a test helper that asserts that the IDs of filters are
// unique.
func assertUniqueIDs(t testing.TB, flts []FilterYAML) {
t.Helper()
uc := aghalg.UniqChecker[rulelist.URLFilterID]{}
for _, f := range flts {
uc.Add(f.ID)
}
assert.NoError(t, uc.Validate())
}

View File

@ -23,8 +23,6 @@ const DefaultMaxRuleListSize = 64 * datasize.MB
// URLFilterID is a semantic type-alias for IDs used for working with package
// urlfilter.
//
// TODO(a.garipov): Use everywhere in package filtering.
type URLFilterID = int
// The IDs of built-in filter lists.
@ -37,12 +35,12 @@ type URLFilterID = int
//
// TODO(d.kolyshev): Add URLFilterIDLegacyRewrite here and to the UI.
const (
URLFilterIDCustom = 0
URLFilterIDEtcHosts = -1
URLFilterIDBlockedService = -2
URLFilterIDParentalControl = -3
URLFilterIDSafeBrowsing = -4
URLFilterIDSafeSearch = -5
URLFilterIDCustom URLFilterID = 0
URLFilterIDEtcHosts URLFilterID = -1
URLFilterIDBlockedService URLFilterID = -2
URLFilterIDParentalControl URLFilterID = -3
URLFilterIDSafeBrowsing URLFilterID = -4
URLFilterIDSafeSearch URLFilterID = -5
)
// UID is the type for the unique IDs of filtering-rule lists.

View File

@ -70,7 +70,7 @@ func TestDefault_CheckHost_yandex(t *testing.T) {
require.Len(t, res.Rules, 1)
assert.Equal(t, yandexIP, res.Rules[0].IP)
assert.EqualValues(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
}
}
@ -90,7 +90,7 @@ func TestDefault_CheckHost_yandexAAAA(t *testing.T) {
require.Len(t, res.Rules, 1)
assert.Empty(t, res.Rules[0].IP)
assert.EqualValues(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
}
func TestDefault_CheckHost_google(t *testing.T) {
@ -129,7 +129,7 @@ func TestDefault_CheckHost_google(t *testing.T) {
require.Len(t, res.Rules, 1)
assert.Equal(t, wantIP, res.Rules[0].IP)
assert.EqualValues(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
})
}
}
@ -181,7 +181,7 @@ func TestDefault_CheckHost_duckduckgoAAAA(t *testing.T) {
require.Len(t, res.Rules, 1)
assert.Empty(t, res.Rules[0].IP)
assert.EqualValues(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
}
func TestDefault_Update(t *testing.T) {

View File

@ -11,6 +11,7 @@ import (
"time"
"github.com/AdguardTeam/AdGuardHome/internal/filtering"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/urlfilter/rules"
@ -179,7 +180,8 @@ func decodeResultRuleKey(key string, i int, dec *json.Decoder, ent *logEntry) {
case "FilterListID":
ent.Result.Rules, vToken = decodeVTokenAndAddRule(key, i, dec, ent.Result.Rules)
if n, ok := vToken.(json.Number); ok {
ent.Result.Rules[i].FilterListID, _ = n.Int64()
id, _ := n.Int64()
ent.Result.Rules[i].FilterListID = rulelist.URLFilterID(id)
}
case "IP":
ent.Result.Rules, vToken = decodeVTokenAndAddRule(key, i, dec, ent.Result.Rules)
@ -582,7 +584,7 @@ var resultHandlers = map[string]logEntryHandler{
return nil
}
i, err := n.Int64()
id, err := n.Int64()
if err != nil {
return err
}
@ -593,7 +595,7 @@ var resultHandlers = map[string]logEntryHandler{
l++
}
ent.Result.Rules[l-1].FilterListID = i
ent.Result.Rules[l-1].FilterListID = rulelist.URLFilterID(id)
return nil
},