Pull request 2109: AG-20945-rule-list-filter
Squashed commit of the following:
commit 2da8c1754f349a9b7f8b629de8f0c892b9bae4dc
Merge: 5cea6a6a2 4fc6bf504
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Tue Dec 19 21:14:07 2023 +0300
Merge branch 'master' into AG-20945-rule-list-filter
commit 5cea6a6a2bed88f645828ab5b4e7de09f9bf91ec
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Tue Dec 19 17:53:21 2023 +0300
filtering/rulelist: imp docs, tests
commit f01434b37a3f0070d71eb0ae72ad8eb2f4922147
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Thu Dec 14 19:17:02 2023 +0300
filtering/rulelist: imp names
commit fe2bf68e6b99673b216b5c4ba867a5f4ed788d22
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Thu Dec 14 19:07:53 2023 +0300
all: go mod tidy
commit c7081d3486a78e8402dc8fe0223111a6fccdd19f
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date: Thu Dec 14 19:03:33 2023 +0300
filtering/rulelist: add filter
This commit is contained in:
parent
4fc6bf504e
commit
0920bb99fe
3
go.mod
3
go.mod
|
@ -9,6 +9,7 @@ require (
|
||||||
github.com/NYTimes/gziphandler v1.1.1
|
github.com/NYTimes/gziphandler v1.1.1
|
||||||
github.com/ameshkov/dnscrypt/v2 v2.2.7
|
github.com/ameshkov/dnscrypt/v2 v2.2.7
|
||||||
github.com/bluele/gcache v0.0.2
|
github.com/bluele/gcache v0.0.2
|
||||||
|
github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b
|
||||||
github.com/digineo/go-ipset/v2 v2.2.1
|
github.com/digineo/go-ipset/v2 v2.2.1
|
||||||
github.com/dimfeld/httptreemux/v5 v5.5.0
|
github.com/dimfeld/httptreemux/v5 v5.5.0
|
||||||
github.com/fsnotify/fsnotify v1.7.0
|
github.com/fsnotify/fsnotify v1.7.0
|
||||||
|
@ -16,7 +17,7 @@ require (
|
||||||
github.com/google/go-cmp v0.6.0
|
github.com/google/go-cmp v0.6.0
|
||||||
github.com/google/gopacket v1.1.19
|
github.com/google/gopacket v1.1.19
|
||||||
github.com/google/renameio/v2 v2.0.0
|
github.com/google/renameio/v2 v2.0.0
|
||||||
github.com/google/uuid v1.4.0
|
github.com/google/uuid v1.5.0
|
||||||
github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2
|
github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2
|
||||||
github.com/josharian/native v1.1.1-0.20230202152459-5c7d0dd6ab86
|
github.com/josharian/native v1.1.1-0.20230202152459-5c7d0dd6ab86
|
||||||
github.com/kardianos/service v1.2.2
|
github.com/kardianos/service v1.2.2
|
||||||
|
|
6
go.sum
6
go.sum
|
@ -18,6 +18,8 @@ github.com/beefsack/go-rate v0.0.0-20220214233405-116f4ca011a0 h1:0b2vaepXIfMsG+
|
||||||
github.com/beefsack/go-rate v0.0.0-20220214233405-116f4ca011a0/go.mod h1:6YNgTHLutezwnBvyneBbwvB8C82y3dcoOj5EQJIdGXA=
|
github.com/beefsack/go-rate v0.0.0-20220214233405-116f4ca011a0/go.mod h1:6YNgTHLutezwnBvyneBbwvB8C82y3dcoOj5EQJIdGXA=
|
||||||
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
|
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
|
||||||
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
|
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
|
||||||
|
github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b h1:6+ZFm0flnudZzdSE0JxlhR2hKnGPcNB35BjQf4RYQDY=
|
||||||
|
github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
@ -46,8 +48,8 @@ github.com/google/pprof v0.0.0-20231205033806-a5a03c77bf08/go.mod h1:czg5+yv1E0Z
|
||||||
github.com/google/renameio/v2 v2.0.0 h1:UifI23ZTGY8Tt29JbYFiuyIU3eX+RNFtUwefq9qAhxg=
|
github.com/google/renameio/v2 v2.0.0 h1:UifI23ZTGY8Tt29JbYFiuyIU3eX+RNFtUwefq9qAhxg=
|
||||||
github.com/google/renameio/v2 v2.0.0/go.mod h1:BtmJXm5YlszgC+TD4HOEEUFgkJP3nLxehU6hfe7jRt4=
|
github.com/google/renameio/v2 v2.0.0/go.mod h1:BtmJXm5YlszgC+TD4HOEEUFgkJP3nLxehU6hfe7jRt4=
|
||||||
github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
|
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
|
||||||
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/hugelgupf/socketpair v0.0.0-20190730060125-05d35a94e714 h1:/jC7qQFrv8CrSJVmaolDVOxTfS9kc36uB6H40kdbQq8=
|
github.com/hugelgupf/socketpair v0.0.0-20190730060125-05d35a94e714 h1:/jC7qQFrv8CrSJVmaolDVOxTfS9kc36uB6H40kdbQq8=
|
||||||
github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2 h1:9K06NfxkBh25x56yVhWWlKFE8YpicaSfHwoV8SFbueA=
|
github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2 h1:9K06NfxkBh25x56yVhWWlKFE8YpicaSfHwoV8SFbueA=
|
||||||
github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2/go.mod h1:3A9PQ1cunSDF/1rbTq99Ts4pVnycWg+vlPkfeD2NLFI=
|
github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2/go.mod h1:3A9PQ1cunSDF/1rbTq99Ts4pVnycWg+vlPkfeD2NLFI=
|
||||||
|
|
|
@ -24,23 +24,25 @@ func validateFilterURL(urlStr string) (err error) {
|
||||||
|
|
||||||
if filepath.IsAbs(urlStr) {
|
if filepath.IsAbs(urlStr) {
|
||||||
_, err = os.Stat(urlStr)
|
_, err = os.Stat(urlStr)
|
||||||
if err != nil {
|
|
||||||
// Don't wrap the error since it's informative enough as is.
|
// Don't wrap the error since it's informative enough as is.
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
u, err := url.ParseRequestURI(urlStr)
|
u, err := url.ParseRequestURI(urlStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Don't wrap the error since it's informative enough as is.
|
// Don't wrap the error since it's informative enough as is.
|
||||||
return err
|
return err
|
||||||
} else if s := u.Scheme; s != aghhttp.SchemeHTTP && s != aghhttp.SchemeHTTPS {
|
}
|
||||||
|
|
||||||
|
if s := u.Scheme; s != aghhttp.SchemeHTTP && s != aghhttp.SchemeHTTPS {
|
||||||
return &url.Error{
|
return &url.Error{
|
||||||
Op: "Check scheme",
|
Op: "Check scheme",
|
||||||
URL: urlStr,
|
URL: urlStr,
|
||||||
Err: fmt.Errorf("only %v allowed", []string{aghhttp.SchemeHTTP, aghhttp.SchemeHTTPS}),
|
Err: fmt.Errorf("only %v allowed", []string{
|
||||||
|
aghhttp.SchemeHTTP,
|
||||||
|
aghhttp.SchemeHTTPS,
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,338 @@
|
||||||
|
package rulelist
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/AdguardTeam/AdGuardHome/internal/aghrenameio"
|
||||||
|
"github.com/AdguardTeam/golibs/errors"
|
||||||
|
"github.com/AdguardTeam/golibs/ioutil"
|
||||||
|
"github.com/AdguardTeam/golibs/log"
|
||||||
|
"github.com/AdguardTeam/urlfilter/filterlist"
|
||||||
|
"github.com/c2h5oh/datasize"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Filter contains information about a single rule-list filter.
|
||||||
|
//
|
||||||
|
// TODO(a.garipov): Use.
|
||||||
|
type Filter struct {
|
||||||
|
// url is the URL of this rule list. Supported schemes are:
|
||||||
|
// - http
|
||||||
|
// - https
|
||||||
|
// - file
|
||||||
|
url *url.URL
|
||||||
|
|
||||||
|
// ruleList is the last successfully compiled [filterlist.RuleList].
|
||||||
|
ruleList filterlist.RuleList
|
||||||
|
|
||||||
|
// updated is the time of the last successful update.
|
||||||
|
updated time.Time
|
||||||
|
|
||||||
|
// name is the human-readable name of this rule-list filter.
|
||||||
|
name string
|
||||||
|
|
||||||
|
// uid is the unique ID of this rule-list filter.
|
||||||
|
uid UID
|
||||||
|
|
||||||
|
// urlFilterID is used for working with package urlfilter.
|
||||||
|
urlFilterID URLFilterID
|
||||||
|
|
||||||
|
// rulesCount contains the number of rules in this rule-list filter.
|
||||||
|
rulesCount int
|
||||||
|
|
||||||
|
// checksum is a CRC32 hash used to quickly check if the rules within a list
|
||||||
|
// file have changed.
|
||||||
|
checksum uint32
|
||||||
|
|
||||||
|
// enabled, if true, means that this rule-list filter is used for filtering.
|
||||||
|
//
|
||||||
|
// TODO(a.garipov): Take into account.
|
||||||
|
enabled bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilterConfig contains the configuration for a [Filter].
|
||||||
|
type FilterConfig struct {
|
||||||
|
// URL is the URL of this rule-list filter. Supported schemes are:
|
||||||
|
// - http
|
||||||
|
// - https
|
||||||
|
// - file
|
||||||
|
URL *url.URL
|
||||||
|
|
||||||
|
// Name is the human-readable name of this rule-list filter. If not set, it
|
||||||
|
// is either taken from the rule-list data or generated synthetically from
|
||||||
|
// the UID.
|
||||||
|
Name string
|
||||||
|
|
||||||
|
// UID is the unique ID of this rule-list filter.
|
||||||
|
UID UID
|
||||||
|
|
||||||
|
// URLFilterID is used for working with package urlfilter.
|
||||||
|
URLFilterID URLFilterID
|
||||||
|
|
||||||
|
// Enabled, if true, means that this rule-list filter is used for filtering.
|
||||||
|
Enabled bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFilter creates a new rule-list filter. The filter is not refreshed, so a
|
||||||
|
// refresh should be performed before use.
|
||||||
|
func NewFilter(c *FilterConfig) (f *Filter, err error) {
|
||||||
|
if c.URL == nil {
|
||||||
|
return nil, errors.Error("no url")
|
||||||
|
}
|
||||||
|
|
||||||
|
switch s := c.URL.Scheme; s {
|
||||||
|
case "http", "https", "file":
|
||||||
|
// Go on.
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("bad url scheme: %q", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Filter{
|
||||||
|
url: c.URL,
|
||||||
|
name: c.Name,
|
||||||
|
uid: c.UID,
|
||||||
|
urlFilterID: c.URLFilterID,
|
||||||
|
enabled: c.Enabled,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Refresh updates the data in the rule-list filter. parseBuf is the initial
|
||||||
|
// buffer used to parse information from the data. cli and maxSize are only
|
||||||
|
// used when f is a URL-based list.
|
||||||
|
func (f *Filter) Refresh(
|
||||||
|
ctx context.Context,
|
||||||
|
parseBuf []byte,
|
||||||
|
cli *http.Client,
|
||||||
|
cacheDir string,
|
||||||
|
maxSize datasize.ByteSize,
|
||||||
|
) (parseRes *ParseResult, err error) {
|
||||||
|
cachePath := filepath.Join(cacheDir, f.uid.String()+".txt")
|
||||||
|
|
||||||
|
switch s := f.url.Scheme; s {
|
||||||
|
case "http", "https":
|
||||||
|
parseRes, err = f.setFromHTTP(ctx, parseBuf, cli, cachePath, maxSize.Bytes())
|
||||||
|
case "file":
|
||||||
|
parseRes, err = f.setFromFile(parseBuf, f.url.Path, cachePath)
|
||||||
|
default:
|
||||||
|
// Since the URL has been prevalidated in New, consider this a
|
||||||
|
// programmer error.
|
||||||
|
panic(fmt.Errorf("bad url scheme: %q", s))
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
// Don't wrap the error, because it's informative enough as is.
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if f.checksum != parseRes.Checksum {
|
||||||
|
f.checksum = parseRes.Checksum
|
||||||
|
f.rulesCount = parseRes.RulesCount
|
||||||
|
f.setName(parseRes.Title)
|
||||||
|
f.updated = time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseRes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// setFromHTTP sets the rule-list filter's data from its URL. It also caches
|
||||||
|
// the data into a file.
|
||||||
|
func (f *Filter) setFromHTTP(
|
||||||
|
ctx context.Context,
|
||||||
|
parseBuf []byte,
|
||||||
|
cli *http.Client,
|
||||||
|
cachePath string,
|
||||||
|
maxSize uint64,
|
||||||
|
) (parseRes *ParseResult, err error) {
|
||||||
|
defer func() { err = errors.Annotate(err, "setting from http: %w") }()
|
||||||
|
|
||||||
|
text, parseRes, err := f.readFromHTTP(ctx, parseBuf, cli, cachePath, maxSize)
|
||||||
|
if err != nil {
|
||||||
|
// Don't wrap the error, because it's informative enough as is.
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(a.garipov): Add filterlist.BytesRuleList.
|
||||||
|
f.ruleList = &filterlist.StringRuleList{
|
||||||
|
ID: f.urlFilterID,
|
||||||
|
RulesText: text,
|
||||||
|
IgnoreCosmetic: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseRes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// readFromHTTP reads the data from the rule-list filter's URL into the cache
|
||||||
|
// file as well as returns it as a string. The data is filtered through a
|
||||||
|
// parser and so is free from comments, unnecessary whitespace, etc.
|
||||||
|
func (f *Filter) readFromHTTP(
|
||||||
|
ctx context.Context,
|
||||||
|
parseBuf []byte,
|
||||||
|
cli *http.Client,
|
||||||
|
cachePath string,
|
||||||
|
maxSize uint64,
|
||||||
|
) (text string, parseRes *ParseResult, err error) {
|
||||||
|
urlStr := f.url.String()
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, fmt.Errorf("making request for http url %q: %w", urlStr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := cli.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, fmt.Errorf("requesting from http url: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { err = errors.WithDeferred(err, resp.Body.Close()) }()
|
||||||
|
|
||||||
|
// TODO(a.garipov): Use [agdhttp.CheckStatus] when it's moved to golibs.
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return "", nil, fmt.Errorf("got status code %d, want %d", resp.StatusCode, http.StatusOK)
|
||||||
|
}
|
||||||
|
|
||||||
|
fltFile, err := aghrenameio.NewPendingFile(cachePath, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, fmt.Errorf("creating temp file: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { err = aghrenameio.WithDeferredCleanup(err, fltFile) }()
|
||||||
|
|
||||||
|
buf := &bytes.Buffer{}
|
||||||
|
mw := io.MultiWriter(buf, fltFile)
|
||||||
|
|
||||||
|
parser := NewParser()
|
||||||
|
httpBody := ioutil.LimitReader(resp.Body, maxSize)
|
||||||
|
parseRes, err = parser.Parse(mw, httpBody, parseBuf)
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, fmt.Errorf("parsing response from http url %q: %w", urlStr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.String(), parseRes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// setName sets the title using either the already-present name, the given title
|
||||||
|
// from the rule-list data, or a synthetic name.
|
||||||
|
func (f *Filter) setName(title string) {
|
||||||
|
if f.name != "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if title != "" {
|
||||||
|
f.name = title
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
f.name = fmt.Sprintf("List %s", f.uid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// setFromFile sets the rule-list filter's data from a file path. It also
|
||||||
|
// caches the data into a file.
|
||||||
|
//
|
||||||
|
// TODO(a.garipov): Retest on Windows once rule-list updater is committed. See
|
||||||
|
// if calling Close is necessary here.
|
||||||
|
func (f *Filter) setFromFile(
|
||||||
|
parseBuf []byte,
|
||||||
|
filePath string,
|
||||||
|
cachePath string,
|
||||||
|
) (parseRes *ParseResult, err error) {
|
||||||
|
defer func() { err = errors.Annotate(err, "setting from file: %w") }()
|
||||||
|
|
||||||
|
parseRes, err = parseIntoCache(parseBuf, filePath, cachePath)
|
||||||
|
if err != nil {
|
||||||
|
// Don't wrap the error, because it's informative enough as is.
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = f.Close()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("closing old rule list: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rl, err := filterlist.NewFileRuleList(f.urlFilterID, cachePath, true)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("opening new rule list: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f.ruleList = rl
|
||||||
|
|
||||||
|
return parseRes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseIntoCache copies the relevant the data from filePath into cachePath
|
||||||
|
// while also parsing it.
|
||||||
|
func parseIntoCache(
|
||||||
|
parseBuf []byte,
|
||||||
|
filePath string,
|
||||||
|
cachePath string,
|
||||||
|
) (parseRes *ParseResult, err error) {
|
||||||
|
tmpFile, err := aghrenameio.NewPendingFile(cachePath, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("creating temp file: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { err = aghrenameio.WithDeferredCleanup(err, tmpFile) }()
|
||||||
|
|
||||||
|
// #nosec G304 -- Assume that cachePath is always cacheDir joined with a
|
||||||
|
// uid using [filepath.Join].
|
||||||
|
f, err := os.Open(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("opening src file: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { err = errors.WithDeferred(err, f.Close()) }()
|
||||||
|
|
||||||
|
parser := NewParser()
|
||||||
|
parseRes, err = parser.Parse(tmpFile, f, parseBuf)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("copying src file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseRes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close closes the underlying rule list.
|
||||||
|
func (f *Filter) Close() (err error) {
|
||||||
|
if f.ruleList == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return f.ruleList.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// filterUpdate represents a single ongoing rule-list filter update.
|
||||||
|
//
|
||||||
|
//lint:ignore U1000 TODO(a.garipov): Use.
|
||||||
|
type filterUpdate struct {
|
||||||
|
httpCli *http.Client
|
||||||
|
cacheDir string
|
||||||
|
name string
|
||||||
|
parseBuf []byte
|
||||||
|
maxSize datasize.ByteSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// process runs an update of a single rule-list.
|
||||||
|
func (u *filterUpdate) process(ctx context.Context, f *Filter) (err error) {
|
||||||
|
prevChecksum := f.checksum
|
||||||
|
parseRes, err := f.Refresh(ctx, u.parseBuf, u.httpCli, u.cacheDir, u.maxSize)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("updating %s: %w", f.uid, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if prevChecksum == parseRes.Checksum {
|
||||||
|
log.Info("filtering: filter %q: filter %q: no change", u.name, f.uid)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info(
|
||||||
|
"filtering: updated filter %q: filter %q: %d bytes, %d rules",
|
||||||
|
u.name,
|
||||||
|
f.uid,
|
||||||
|
parseRes.BytesWritten,
|
||||||
|
parseRes.RulesCount,
|
||||||
|
)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -0,0 +1,107 @@
|
||||||
|
package rulelist_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
|
||||||
|
"github.com/AdguardTeam/golibs/testutil"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFilter_Refresh(t *testing.T) {
|
||||||
|
cacheDir := t.TempDir()
|
||||||
|
uid := rulelist.MustNewUID()
|
||||||
|
|
||||||
|
initialFile := filepath.Join(cacheDir, "initial.txt")
|
||||||
|
initialData := []byte(
|
||||||
|
testRuleTextTitle +
|
||||||
|
testRuleTextBlocked,
|
||||||
|
)
|
||||||
|
writeErr := os.WriteFile(initialFile, initialData, 0o644)
|
||||||
|
require.NoError(t, writeErr)
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
pt := testutil.PanicT{}
|
||||||
|
|
||||||
|
_, err := io.WriteString(w, testRuleTextTitle+testRuleTextBlocked)
|
||||||
|
require.NoError(pt, err)
|
||||||
|
}))
|
||||||
|
|
||||||
|
srvURL, urlErr := url.Parse(srv.URL)
|
||||||
|
require.NoError(t, urlErr)
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
url *url.URL
|
||||||
|
name string
|
||||||
|
wantNewErrMsg string
|
||||||
|
}{{
|
||||||
|
url: nil,
|
||||||
|
name: "nil_url",
|
||||||
|
wantNewErrMsg: "no url",
|
||||||
|
}, {
|
||||||
|
url: &url.URL{
|
||||||
|
Scheme: "ftp",
|
||||||
|
},
|
||||||
|
name: "bad_scheme",
|
||||||
|
wantNewErrMsg: `bad url scheme: "ftp"`,
|
||||||
|
}, {
|
||||||
|
name: "file",
|
||||||
|
url: &url.URL{
|
||||||
|
Scheme: "file",
|
||||||
|
Path: initialFile,
|
||||||
|
},
|
||||||
|
wantNewErrMsg: "",
|
||||||
|
}, {
|
||||||
|
name: "http",
|
||||||
|
url: srvURL,
|
||||||
|
wantNewErrMsg: "",
|
||||||
|
}}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
f, err := rulelist.NewFilter(&rulelist.FilterConfig{
|
||||||
|
URL: tc.url,
|
||||||
|
Name: tc.name,
|
||||||
|
UID: uid,
|
||||||
|
URLFilterID: testURLFilterID,
|
||||||
|
Enabled: true,
|
||||||
|
})
|
||||||
|
if tc.wantNewErrMsg != "" {
|
||||||
|
assert.EqualError(t, err, tc.wantNewErrMsg)
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
testutil.CleanupAndRequireSuccess(t, f.Close)
|
||||||
|
|
||||||
|
require.NotNil(t, f)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
|
||||||
|
t.Cleanup(cancel)
|
||||||
|
|
||||||
|
buf := make([]byte, rulelist.DefaultRuleBufSize)
|
||||||
|
cli := &http.Client{
|
||||||
|
Timeout: testTimeout,
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := f.Refresh(ctx, buf, cli, cacheDir, rulelist.DefaultMaxRuleListSize)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, testTitle, res.Title)
|
||||||
|
assert.Equal(t, len(testRuleTextBlocked), res.BytesWritten)
|
||||||
|
assert.Equal(t, 1, res.RulesCount)
|
||||||
|
|
||||||
|
// Check that the cached file exists.
|
||||||
|
_, err = os.Stat(filepath.Join(cacheDir, uid.String()+".txt"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -69,12 +69,12 @@ func TestParser_Parse(t *testing.T) {
|
||||||
wantWritten: len(testRuleTextBlocked) + len(testRuleTextHTML),
|
wantWritten: len(testRuleTextBlocked) + len(testRuleTextHTML),
|
||||||
}, {
|
}, {
|
||||||
name: "title",
|
name: "title",
|
||||||
in: "! Title: Test Title \n" +
|
in: testRuleTextTitle +
|
||||||
"! Title: Bad, Ignored Title\n" +
|
"! Title: Bad, Ignored Title\n" +
|
||||||
testRuleTextBlocked,
|
testRuleTextBlocked,
|
||||||
wantDst: testRuleTextBlocked,
|
wantDst: testRuleTextBlocked,
|
||||||
wantErrMsg: "",
|
wantErrMsg: "",
|
||||||
wantTitle: "Test Title",
|
wantTitle: testTitle,
|
||||||
wantRulesNum: 1,
|
wantRulesNum: 1,
|
||||||
wantWritten: len(testRuleTextBlocked),
|
wantWritten: len(testRuleTextBlocked),
|
||||||
}, {
|
}, {
|
||||||
|
@ -87,14 +87,14 @@ func TestParser_Parse(t *testing.T) {
|
||||||
wantWritten: len(testRuleTextCosmetic),
|
wantWritten: len(testRuleTextCosmetic),
|
||||||
}, {
|
}, {
|
||||||
name: "bad_char",
|
name: "bad_char",
|
||||||
in: "! Title: Test Title \n" +
|
in: testRuleTextTitle +
|
||||||
testRuleTextBlocked +
|
testRuleTextBlocked +
|
||||||
">>>\x7F<<<",
|
">>>\x7F<<<",
|
||||||
wantDst: testRuleTextBlocked,
|
wantDst: testRuleTextBlocked,
|
||||||
wantErrMsg: "line 3: " +
|
wantErrMsg: "line 3: " +
|
||||||
"character 4: " +
|
"character 4: " +
|
||||||
"likely binary character '\\x7f'",
|
"likely binary character '\\x7f'",
|
||||||
wantTitle: "Test Title",
|
wantTitle: testTitle,
|
||||||
wantRulesNum: 1,
|
wantRulesNum: 1,
|
||||||
wantWritten: len(testRuleTextBlocked),
|
wantWritten: len(testRuleTextBlocked),
|
||||||
}, {
|
}, {
|
||||||
|
|
|
@ -1,9 +1,55 @@
|
||||||
// Package rulelist contains the implementation of the standard rule-list
|
// Package rulelist contains the implementation of the standard rule-list
|
||||||
// filter that wraps an urlfilter filtering-engine.
|
// filter that wraps an urlfilter filtering-engine.
|
||||||
//
|
//
|
||||||
// TODO(a.garipov): Expand.
|
// TODO(a.garipov): Add a new update worker.
|
||||||
package rulelist
|
package rulelist
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/c2h5oh/datasize"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
)
|
||||||
|
|
||||||
// DefaultRuleBufSize is the default length of a buffer used to read a line with
|
// DefaultRuleBufSize is the default length of a buffer used to read a line with
|
||||||
// a filtering rule, in bytes.
|
// a filtering rule, in bytes.
|
||||||
|
//
|
||||||
|
// TODO(a.garipov): Consider using [datasize.ByteSize]. It is currently only
|
||||||
|
// used as an int.
|
||||||
const DefaultRuleBufSize = 1024
|
const DefaultRuleBufSize = 1024
|
||||||
|
|
||||||
|
// DefaultMaxRuleListSize is the default maximum filtering-rule list size.
|
||||||
|
const DefaultMaxRuleListSize = 64 * datasize.MB
|
||||||
|
|
||||||
|
// URLFilterID is a semantic type-alias for IDs used for working with package
|
||||||
|
// urlfilter.
|
||||||
|
type URLFilterID = int
|
||||||
|
|
||||||
|
// UID is the type for the unique IDs of filtering-rule lists.
|
||||||
|
type UID uuid.UUID
|
||||||
|
|
||||||
|
// NewUID returns a new filtering-rule list UID. Any error returned is an error
|
||||||
|
// from the cryptographic randomness reader.
|
||||||
|
func NewUID() (uid UID, err error) {
|
||||||
|
uuidv7, err := uuid.NewV7()
|
||||||
|
|
||||||
|
return UID(uuidv7), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustNewUID is a wrapper around [NewUID] that panics if there is an error.
|
||||||
|
func MustNewUID() (uid UID) {
|
||||||
|
uid, err := NewUID()
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("unexpected uuidv7 error: %w", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
return uid
|
||||||
|
}
|
||||||
|
|
||||||
|
// type check
|
||||||
|
var _ fmt.Stringer = UID{}
|
||||||
|
|
||||||
|
// String implements the [fmt.Stringer] interface for UID.
|
||||||
|
func (id UID) String() (s string) {
|
||||||
|
return uuid.UUID(id).String()
|
||||||
|
}
|
||||||
|
|
|
@ -1,16 +1,34 @@
|
||||||
package rulelist_test
|
package rulelist_test
|
||||||
|
|
||||||
import "time"
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
|
||||||
|
"github.com/AdguardTeam/golibs/testutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMain(m *testing.M) {
|
||||||
|
testutil.DiscardLogOutput(m)
|
||||||
|
}
|
||||||
|
|
||||||
// testTimeout is the common timeout for tests.
|
// testTimeout is the common timeout for tests.
|
||||||
const testTimeout = 1 * time.Second
|
const testTimeout = 1 * time.Second
|
||||||
|
|
||||||
// Common texts for tests.
|
// testURLFilterID is the common [rulelist.URLFilterID] for tests.
|
||||||
|
const testURLFilterID rulelist.URLFilterID = 1
|
||||||
|
|
||||||
|
// testTitle is the common title for tests.
|
||||||
|
const testTitle = "Test Title"
|
||||||
|
|
||||||
|
// Common rule texts for tests.
|
||||||
const (
|
const (
|
||||||
testRuleTextBadTab = "||bad-tab-and-comment.example^\t# A comment.\n"
|
testRuleTextBadTab = "||bad-tab-and-comment.example^\t# A comment.\n"
|
||||||
testRuleTextBlocked = "||blocked.example^\n"
|
testRuleTextBlocked = "||blocked.example^\n"
|
||||||
|
testRuleTextBlocked2 = "||blocked-2.example^\n"
|
||||||
testRuleTextEtcHostsTab = "0.0.0.0 tab..example^\t# A comment.\n"
|
testRuleTextEtcHostsTab = "0.0.0.0 tab..example^\t# A comment.\n"
|
||||||
testRuleTextHTML = "<!DOCTYPE html>\n"
|
testRuleTextHTML = "<!DOCTYPE html>\n"
|
||||||
|
testRuleTextTitle = "! Title: " + testTitle + " \n"
|
||||||
|
|
||||||
// testRuleTextCosmetic is a cosmetic rule with a zero-width non-joiner.
|
// testRuleTextCosmetic is a cosmetic rule with a zero-width non-joiner.
|
||||||
//
|
//
|
||||||
|
|
Loading…
Reference in New Issue