Pull request 2109: AG-20945-rule-list-filter
Squashed commit of the following:
commit 2da8c1754f349a9b7f8b629de8f0c892b9bae4dc
Merge: 5cea6a6a2 4fc6bf504
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Tue Dec 19 21:14:07 2023 +0300
Merge branch 'master' into AG-20945-rule-list-filter
commit 5cea6a6a2bed88f645828ab5b4e7de09f9bf91ec
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Tue Dec 19 17:53:21 2023 +0300
filtering/rulelist: imp docs, tests
commit f01434b37a3f0070d71eb0ae72ad8eb2f4922147
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Thu Dec 14 19:17:02 2023 +0300
filtering/rulelist: imp names
commit fe2bf68e6b99673b216b5c4ba867a5f4ed788d22
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Thu Dec 14 19:07:53 2023 +0300
all: go mod tidy
commit c7081d3486a78e8402dc8fe0223111a6fccdd19f
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Thu Dec 14 19:03:33 2023 +0300
filtering/rulelist: add filter
This commit is contained in:
parent
4fc6bf504e
commit
0920bb99fe
3
go.mod
3
go.mod
|
@ -9,6 +9,7 @@ require (
|
|||
github.com/NYTimes/gziphandler v1.1.1
|
||||
github.com/ameshkov/dnscrypt/v2 v2.2.7
|
||||
github.com/bluele/gcache v0.0.2
|
||||
github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b
|
||||
github.com/digineo/go-ipset/v2 v2.2.1
|
||||
github.com/dimfeld/httptreemux/v5 v5.5.0
|
||||
github.com/fsnotify/fsnotify v1.7.0
|
||||
|
@ -16,7 +17,7 @@ require (
|
|||
github.com/google/go-cmp v0.6.0
|
||||
github.com/google/gopacket v1.1.19
|
||||
github.com/google/renameio/v2 v2.0.0
|
||||
github.com/google/uuid v1.4.0
|
||||
github.com/google/uuid v1.5.0
|
||||
github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2
|
||||
github.com/josharian/native v1.1.1-0.20230202152459-5c7d0dd6ab86
|
||||
github.com/kardianos/service v1.2.2
|
||||
|
|
6
go.sum
6
go.sum
|
@ -18,6 +18,8 @@ github.com/beefsack/go-rate v0.0.0-20220214233405-116f4ca011a0 h1:0b2vaepXIfMsG+
|
|||
github.com/beefsack/go-rate v0.0.0-20220214233405-116f4ca011a0/go.mod h1:6YNgTHLutezwnBvyneBbwvB8C82y3dcoOj5EQJIdGXA=
|
||||
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
|
||||
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
|
||||
github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b h1:6+ZFm0flnudZzdSE0JxlhR2hKnGPcNB35BjQf4RYQDY=
|
||||
github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
|
@ -46,8 +48,8 @@ github.com/google/pprof v0.0.0-20231205033806-a5a03c77bf08/go.mod h1:czg5+yv1E0Z
|
|||
github.com/google/renameio/v2 v2.0.0 h1:UifI23ZTGY8Tt29JbYFiuyIU3eX+RNFtUwefq9qAhxg=
|
||||
github.com/google/renameio/v2 v2.0.0/go.mod h1:BtmJXm5YlszgC+TD4HOEEUFgkJP3nLxehU6hfe7jRt4=
|
||||
github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
|
||||
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
|
||||
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/hugelgupf/socketpair v0.0.0-20190730060125-05d35a94e714 h1:/jC7qQFrv8CrSJVmaolDVOxTfS9kc36uB6H40kdbQq8=
|
||||
github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2 h1:9K06NfxkBh25x56yVhWWlKFE8YpicaSfHwoV8SFbueA=
|
||||
github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2/go.mod h1:3A9PQ1cunSDF/1rbTq99Ts4pVnycWg+vlPkfeD2NLFI=
|
||||
|
|
|
@ -24,23 +24,25 @@ func validateFilterURL(urlStr string) (err error) {
|
|||
|
||||
if filepath.IsAbs(urlStr) {
|
||||
_, err = os.Stat(urlStr)
|
||||
if err != nil {
|
||||
// Don't wrap the error since it's informative enough as is.
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
// Don't wrap the error since it's informative enough as is.
|
||||
return err
|
||||
}
|
||||
|
||||
u, err := url.ParseRequestURI(urlStr)
|
||||
if err != nil {
|
||||
// Don't wrap the error since it's informative enough as is.
|
||||
return err
|
||||
} else if s := u.Scheme; s != aghhttp.SchemeHTTP && s != aghhttp.SchemeHTTPS {
|
||||
}
|
||||
|
||||
if s := u.Scheme; s != aghhttp.SchemeHTTP && s != aghhttp.SchemeHTTPS {
|
||||
return &url.Error{
|
||||
Op: "Check scheme",
|
||||
URL: urlStr,
|
||||
Err: fmt.Errorf("only %v allowed", []string{aghhttp.SchemeHTTP, aghhttp.SchemeHTTPS}),
|
||||
Err: fmt.Errorf("only %v allowed", []string{
|
||||
aghhttp.SchemeHTTP,
|
||||
aghhttp.SchemeHTTPS,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,338 @@
|
|||
package rulelist
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/AdguardTeam/AdGuardHome/internal/aghrenameio"
|
||||
"github.com/AdguardTeam/golibs/errors"
|
||||
"github.com/AdguardTeam/golibs/ioutil"
|
||||
"github.com/AdguardTeam/golibs/log"
|
||||
"github.com/AdguardTeam/urlfilter/filterlist"
|
||||
"github.com/c2h5oh/datasize"
|
||||
)
|
||||
|
||||
// Filter contains information about a single rule-list filter.
|
||||
//
|
||||
// TODO(a.garipov): Use.
|
||||
type Filter struct {
|
||||
// url is the URL of this rule list. Supported schemes are:
|
||||
// - http
|
||||
// - https
|
||||
// - file
|
||||
url *url.URL
|
||||
|
||||
// ruleList is the last successfully compiled [filterlist.RuleList].
|
||||
ruleList filterlist.RuleList
|
||||
|
||||
// updated is the time of the last successful update.
|
||||
updated time.Time
|
||||
|
||||
// name is the human-readable name of this rule-list filter.
|
||||
name string
|
||||
|
||||
// uid is the unique ID of this rule-list filter.
|
||||
uid UID
|
||||
|
||||
// urlFilterID is used for working with package urlfilter.
|
||||
urlFilterID URLFilterID
|
||||
|
||||
// rulesCount contains the number of rules in this rule-list filter.
|
||||
rulesCount int
|
||||
|
||||
// checksum is a CRC32 hash used to quickly check if the rules within a list
|
||||
// file have changed.
|
||||
checksum uint32
|
||||
|
||||
// enabled, if true, means that this rule-list filter is used for filtering.
|
||||
//
|
||||
// TODO(a.garipov): Take into account.
|
||||
enabled bool
|
||||
}
|
||||
|
||||
// FilterConfig contains the configuration for a [Filter].
|
||||
type FilterConfig struct {
|
||||
// URL is the URL of this rule-list filter. Supported schemes are:
|
||||
// - http
|
||||
// - https
|
||||
// - file
|
||||
URL *url.URL
|
||||
|
||||
// Name is the human-readable name of this rule-list filter. If not set, it
|
||||
// is either taken from the rule-list data or generated synthetically from
|
||||
// the UID.
|
||||
Name string
|
||||
|
||||
// UID is the unique ID of this rule-list filter.
|
||||
UID UID
|
||||
|
||||
// URLFilterID is used for working with package urlfilter.
|
||||
URLFilterID URLFilterID
|
||||
|
||||
// Enabled, if true, means that this rule-list filter is used for filtering.
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
// NewFilter creates a new rule-list filter. The filter is not refreshed, so a
|
||||
// refresh should be performed before use.
|
||||
func NewFilter(c *FilterConfig) (f *Filter, err error) {
|
||||
if c.URL == nil {
|
||||
return nil, errors.Error("no url")
|
||||
}
|
||||
|
||||
switch s := c.URL.Scheme; s {
|
||||
case "http", "https", "file":
|
||||
// Go on.
|
||||
default:
|
||||
return nil, fmt.Errorf("bad url scheme: %q", s)
|
||||
}
|
||||
|
||||
return &Filter{
|
||||
url: c.URL,
|
||||
name: c.Name,
|
||||
uid: c.UID,
|
||||
urlFilterID: c.URLFilterID,
|
||||
enabled: c.Enabled,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Refresh updates the data in the rule-list filter. parseBuf is the initial
|
||||
// buffer used to parse information from the data. cli and maxSize are only
|
||||
// used when f is a URL-based list.
|
||||
func (f *Filter) Refresh(
|
||||
ctx context.Context,
|
||||
parseBuf []byte,
|
||||
cli *http.Client,
|
||||
cacheDir string,
|
||||
maxSize datasize.ByteSize,
|
||||
) (parseRes *ParseResult, err error) {
|
||||
cachePath := filepath.Join(cacheDir, f.uid.String()+".txt")
|
||||
|
||||
switch s := f.url.Scheme; s {
|
||||
case "http", "https":
|
||||
parseRes, err = f.setFromHTTP(ctx, parseBuf, cli, cachePath, maxSize.Bytes())
|
||||
case "file":
|
||||
parseRes, err = f.setFromFile(parseBuf, f.url.Path, cachePath)
|
||||
default:
|
||||
// Since the URL has been prevalidated in New, consider this a
|
||||
// programmer error.
|
||||
panic(fmt.Errorf("bad url scheme: %q", s))
|
||||
}
|
||||
if err != nil {
|
||||
// Don't wrap the error, because it's informative enough as is.
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if f.checksum != parseRes.Checksum {
|
||||
f.checksum = parseRes.Checksum
|
||||
f.rulesCount = parseRes.RulesCount
|
||||
f.setName(parseRes.Title)
|
||||
f.updated = time.Now()
|
||||
}
|
||||
|
||||
return parseRes, nil
|
||||
}
|
||||
|
||||
// setFromHTTP sets the rule-list filter's data from its URL. It also caches
|
||||
// the data into a file.
|
||||
func (f *Filter) setFromHTTP(
|
||||
ctx context.Context,
|
||||
parseBuf []byte,
|
||||
cli *http.Client,
|
||||
cachePath string,
|
||||
maxSize uint64,
|
||||
) (parseRes *ParseResult, err error) {
|
||||
defer func() { err = errors.Annotate(err, "setting from http: %w") }()
|
||||
|
||||
text, parseRes, err := f.readFromHTTP(ctx, parseBuf, cli, cachePath, maxSize)
|
||||
if err != nil {
|
||||
// Don't wrap the error, because it's informative enough as is.
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// TODO(a.garipov): Add filterlist.BytesRuleList.
|
||||
f.ruleList = &filterlist.StringRuleList{
|
||||
ID: f.urlFilterID,
|
||||
RulesText: text,
|
||||
IgnoreCosmetic: true,
|
||||
}
|
||||
|
||||
return parseRes, nil
|
||||
}
|
||||
|
||||
// readFromHTTP reads the data from the rule-list filter's URL into the cache
|
||||
// file as well as returns it as a string. The data is filtered through a
|
||||
// parser and so is free from comments, unnecessary whitespace, etc.
|
||||
func (f *Filter) readFromHTTP(
|
||||
ctx context.Context,
|
||||
parseBuf []byte,
|
||||
cli *http.Client,
|
||||
cachePath string,
|
||||
maxSize uint64,
|
||||
) (text string, parseRes *ParseResult, err error) {
|
||||
urlStr := f.url.String()
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("making request for http url %q: %w", urlStr, err)
|
||||
}
|
||||
|
||||
resp, err := cli.Do(req)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("requesting from http url: %w", err)
|
||||
}
|
||||
defer func() { err = errors.WithDeferred(err, resp.Body.Close()) }()
|
||||
|
||||
// TODO(a.garipov): Use [agdhttp.CheckStatus] when it's moved to golibs.
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", nil, fmt.Errorf("got status code %d, want %d", resp.StatusCode, http.StatusOK)
|
||||
}
|
||||
|
||||
fltFile, err := aghrenameio.NewPendingFile(cachePath, 0o644)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("creating temp file: %w", err)
|
||||
}
|
||||
defer func() { err = aghrenameio.WithDeferredCleanup(err, fltFile) }()
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
mw := io.MultiWriter(buf, fltFile)
|
||||
|
||||
parser := NewParser()
|
||||
httpBody := ioutil.LimitReader(resp.Body, maxSize)
|
||||
parseRes, err = parser.Parse(mw, httpBody, parseBuf)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("parsing response from http url %q: %w", urlStr, err)
|
||||
}
|
||||
|
||||
return buf.String(), parseRes, nil
|
||||
}
|
||||
|
||||
// setName sets the title using either the already-present name, the given title
|
||||
// from the rule-list data, or a synthetic name.
|
||||
func (f *Filter) setName(title string) {
|
||||
if f.name != "" {
|
||||
return
|
||||
}
|
||||
|
||||
if title != "" {
|
||||
f.name = title
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
f.name = fmt.Sprintf("List %s", f.uid)
|
||||
}
|
||||
|
||||
// setFromFile sets the rule-list filter's data from a file path. It also
|
||||
// caches the data into a file.
|
||||
//
|
||||
// TODO(a.garipov): Retest on Windows once rule-list updater is committed. See
|
||||
// if calling Close is necessary here.
|
||||
func (f *Filter) setFromFile(
|
||||
parseBuf []byte,
|
||||
filePath string,
|
||||
cachePath string,
|
||||
) (parseRes *ParseResult, err error) {
|
||||
defer func() { err = errors.Annotate(err, "setting from file: %w") }()
|
||||
|
||||
parseRes, err = parseIntoCache(parseBuf, filePath, cachePath)
|
||||
if err != nil {
|
||||
// Don't wrap the error, because it's informative enough as is.
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = f.Close()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("closing old rule list: %w", err)
|
||||
}
|
||||
|
||||
rl, err := filterlist.NewFileRuleList(f.urlFilterID, cachePath, true)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening new rule list: %w", err)
|
||||
}
|
||||
|
||||
f.ruleList = rl
|
||||
|
||||
return parseRes, nil
|
||||
}
|
||||
|
||||
// parseIntoCache copies the relevant the data from filePath into cachePath
|
||||
// while also parsing it.
|
||||
func parseIntoCache(
|
||||
parseBuf []byte,
|
||||
filePath string,
|
||||
cachePath string,
|
||||
) (parseRes *ParseResult, err error) {
|
||||
tmpFile, err := aghrenameio.NewPendingFile(cachePath, 0o644)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating temp file: %w", err)
|
||||
}
|
||||
defer func() { err = aghrenameio.WithDeferredCleanup(err, tmpFile) }()
|
||||
|
||||
// #nosec G304 -- Assume that cachePath is always cacheDir joined with a
|
||||
// uid using [filepath.Join].
|
||||
f, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening src file: %w", err)
|
||||
}
|
||||
defer func() { err = errors.WithDeferred(err, f.Close()) }()
|
||||
|
||||
parser := NewParser()
|
||||
parseRes, err = parser.Parse(tmpFile, f, parseBuf)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("copying src file: %w", err)
|
||||
}
|
||||
|
||||
return parseRes, nil
|
||||
}
|
||||
|
||||
// Close closes the underlying rule list.
|
||||
func (f *Filter) Close() (err error) {
|
||||
if f.ruleList == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return f.ruleList.Close()
|
||||
}
|
||||
|
||||
// filterUpdate represents a single ongoing rule-list filter update.
|
||||
//
|
||||
//lint:ignore U1000 TODO(a.garipov): Use.
|
||||
type filterUpdate struct {
|
||||
httpCli *http.Client
|
||||
cacheDir string
|
||||
name string
|
||||
parseBuf []byte
|
||||
maxSize datasize.ByteSize
|
||||
}
|
||||
|
||||
// process runs an update of a single rule-list.
|
||||
func (u *filterUpdate) process(ctx context.Context, f *Filter) (err error) {
|
||||
prevChecksum := f.checksum
|
||||
parseRes, err := f.Refresh(ctx, u.parseBuf, u.httpCli, u.cacheDir, u.maxSize)
|
||||
if err != nil {
|
||||
return fmt.Errorf("updating %s: %w", f.uid, err)
|
||||
}
|
||||
|
||||
if prevChecksum == parseRes.Checksum {
|
||||
log.Info("filtering: filter %q: filter %q: no change", u.name, f.uid)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Info(
|
||||
"filtering: updated filter %q: filter %q: %d bytes, %d rules",
|
||||
u.name,
|
||||
f.uid,
|
||||
parseRes.BytesWritten,
|
||||
parseRes.RulesCount,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
package rulelist_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
|
||||
"github.com/AdguardTeam/golibs/testutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFilter_Refresh(t *testing.T) {
|
||||
cacheDir := t.TempDir()
|
||||
uid := rulelist.MustNewUID()
|
||||
|
||||
initialFile := filepath.Join(cacheDir, "initial.txt")
|
||||
initialData := []byte(
|
||||
testRuleTextTitle +
|
||||
testRuleTextBlocked,
|
||||
)
|
||||
writeErr := os.WriteFile(initialFile, initialData, 0o644)
|
||||
require.NoError(t, writeErr)
|
||||
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
pt := testutil.PanicT{}
|
||||
|
||||
_, err := io.WriteString(w, testRuleTextTitle+testRuleTextBlocked)
|
||||
require.NoError(pt, err)
|
||||
}))
|
||||
|
||||
srvURL, urlErr := url.Parse(srv.URL)
|
||||
require.NoError(t, urlErr)
|
||||
|
||||
testCases := []struct {
|
||||
url *url.URL
|
||||
name string
|
||||
wantNewErrMsg string
|
||||
}{{
|
||||
url: nil,
|
||||
name: "nil_url",
|
||||
wantNewErrMsg: "no url",
|
||||
}, {
|
||||
url: &url.URL{
|
||||
Scheme: "ftp",
|
||||
},
|
||||
name: "bad_scheme",
|
||||
wantNewErrMsg: `bad url scheme: "ftp"`,
|
||||
}, {
|
||||
name: "file",
|
||||
url: &url.URL{
|
||||
Scheme: "file",
|
||||
Path: initialFile,
|
||||
},
|
||||
wantNewErrMsg: "",
|
||||
}, {
|
||||
name: "http",
|
||||
url: srvURL,
|
||||
wantNewErrMsg: "",
|
||||
}}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
f, err := rulelist.NewFilter(&rulelist.FilterConfig{
|
||||
URL: tc.url,
|
||||
Name: tc.name,
|
||||
UID: uid,
|
||||
URLFilterID: testURLFilterID,
|
||||
Enabled: true,
|
||||
})
|
||||
if tc.wantNewErrMsg != "" {
|
||||
assert.EqualError(t, err, tc.wantNewErrMsg)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
testutil.CleanupAndRequireSuccess(t, f.Close)
|
||||
|
||||
require.NotNil(t, f)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
|
||||
t.Cleanup(cancel)
|
||||
|
||||
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||
cli := &http.Client{
|
||||
Timeout: testTimeout,
|
||||
}
|
||||
|
||||
res, err := f.Refresh(ctx, buf, cli, cacheDir, rulelist.DefaultMaxRuleListSize)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, testTitle, res.Title)
|
||||
assert.Equal(t, len(testRuleTextBlocked), res.BytesWritten)
|
||||
assert.Equal(t, 1, res.RulesCount)
|
||||
|
||||
// Check that the cached file exists.
|
||||
_, err = os.Stat(filepath.Join(cacheDir, uid.String()+".txt"))
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
}
|
|
@ -69,12 +69,12 @@ func TestParser_Parse(t *testing.T) {
|
|||
wantWritten: len(testRuleTextBlocked) + len(testRuleTextHTML),
|
||||
}, {
|
||||
name: "title",
|
||||
in: "! Title: Test Title \n" +
|
||||
in: testRuleTextTitle +
|
||||
"! Title: Bad, Ignored Title\n" +
|
||||
testRuleTextBlocked,
|
||||
wantDst: testRuleTextBlocked,
|
||||
wantErrMsg: "",
|
||||
wantTitle: "Test Title",
|
||||
wantTitle: testTitle,
|
||||
wantRulesNum: 1,
|
||||
wantWritten: len(testRuleTextBlocked),
|
||||
}, {
|
||||
|
@ -87,14 +87,14 @@ func TestParser_Parse(t *testing.T) {
|
|||
wantWritten: len(testRuleTextCosmetic),
|
||||
}, {
|
||||
name: "bad_char",
|
||||
in: "! Title: Test Title \n" +
|
||||
in: testRuleTextTitle +
|
||||
testRuleTextBlocked +
|
||||
">>>\x7F<<<",
|
||||
wantDst: testRuleTextBlocked,
|
||||
wantErrMsg: "line 3: " +
|
||||
"character 4: " +
|
||||
"likely binary character '\\x7f'",
|
||||
wantTitle: "Test Title",
|
||||
wantTitle: testTitle,
|
||||
wantRulesNum: 1,
|
||||
wantWritten: len(testRuleTextBlocked),
|
||||
}, {
|
||||
|
|
|
@ -1,9 +1,55 @@
|
|||
// Package rulelist contains the implementation of the standard rule-list
|
||||
// filter that wraps an urlfilter filtering-engine.
|
||||
//
|
||||
// TODO(a.garipov): Expand.
|
||||
// TODO(a.garipov): Add a new update worker.
|
||||
package rulelist
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/c2h5oh/datasize"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// DefaultRuleBufSize is the default length of a buffer used to read a line with
|
||||
// a filtering rule, in bytes.
|
||||
//
|
||||
// TODO(a.garipov): Consider using [datasize.ByteSize]. It is currently only
|
||||
// used as an int.
|
||||
const DefaultRuleBufSize = 1024
|
||||
|
||||
// DefaultMaxRuleListSize is the default maximum filtering-rule list size.
|
||||
const DefaultMaxRuleListSize = 64 * datasize.MB
|
||||
|
||||
// URLFilterID is a semantic type-alias for IDs used for working with package
|
||||
// urlfilter.
|
||||
type URLFilterID = int
|
||||
|
||||
// UID is the type for the unique IDs of filtering-rule lists.
|
||||
type UID uuid.UUID
|
||||
|
||||
// NewUID returns a new filtering-rule list UID. Any error returned is an error
|
||||
// from the cryptographic randomness reader.
|
||||
func NewUID() (uid UID, err error) {
|
||||
uuidv7, err := uuid.NewV7()
|
||||
|
||||
return UID(uuidv7), err
|
||||
}
|
||||
|
||||
// MustNewUID is a wrapper around [NewUID] that panics if there is an error.
|
||||
func MustNewUID() (uid UID) {
|
||||
uid, err := NewUID()
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected uuidv7 error: %w", err))
|
||||
}
|
||||
|
||||
return uid
|
||||
}
|
||||
|
||||
// type check
|
||||
var _ fmt.Stringer = UID{}
|
||||
|
||||
// String implements the [fmt.Stringer] interface for UID.
|
||||
func (id UID) String() (s string) {
|
||||
return uuid.UUID(id).String()
|
||||
}
|
||||
|
|
|
@ -1,16 +1,34 @@
|
|||
package rulelist_test
|
||||
|
||||
import "time"
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
|
||||
"github.com/AdguardTeam/golibs/testutil"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
testutil.DiscardLogOutput(m)
|
||||
}
|
||||
|
||||
// testTimeout is the common timeout for tests.
|
||||
const testTimeout = 1 * time.Second
|
||||
|
||||
// Common texts for tests.
|
||||
// testURLFilterID is the common [rulelist.URLFilterID] for tests.
|
||||
const testURLFilterID rulelist.URLFilterID = 1
|
||||
|
||||
// testTitle is the common title for tests.
|
||||
const testTitle = "Test Title"
|
||||
|
||||
// Common rule texts for tests.
|
||||
const (
|
||||
testRuleTextBadTab = "||bad-tab-and-comment.example^\t# A comment.\n"
|
||||
testRuleTextBlocked = "||blocked.example^\n"
|
||||
testRuleTextBlocked2 = "||blocked-2.example^\n"
|
||||
testRuleTextEtcHostsTab = "0.0.0.0 tab..example^\t# A comment.\n"
|
||||
testRuleTextHTML = "<!DOCTYPE html>\n"
|
||||
testRuleTextTitle = "! Title: " + testTitle + " \n"
|
||||
|
||||
// testRuleTextCosmetic is a cosmetic rule with a zero-width non-joiner.
|
||||
//
|
||||
|
|
Loading…
Reference in New Issue