dnsfilter -- avoid using regexps when simple suffix match is enough.
This covers 96.98% of all adguard dns rules.
This commit is contained in:
parent
9e939e5754
commit
cb97a254a5
|
@ -67,6 +67,10 @@ type rule struct {
|
||||||
// user-supplied data
|
// user-supplied data
|
||||||
listID uint32
|
listID uint32
|
||||||
|
|
||||||
|
// suffix matching
|
||||||
|
isSuffix bool
|
||||||
|
suffix string
|
||||||
|
|
||||||
// compiled regexp
|
// compiled regexp
|
||||||
compiled *regexp.Regexp
|
compiled *regexp.Regexp
|
||||||
|
|
||||||
|
@ -387,12 +391,21 @@ func (rule *rule) extractShortcut() {
|
||||||
|
|
||||||
func (rule *rule) compile() error {
|
func (rule *rule) compile() error {
|
||||||
rule.RLock()
|
rule.RLock()
|
||||||
isCompiled := rule.compiled != nil
|
isCompiled := rule.isSuffix || rule.compiled != nil
|
||||||
rule.RUnlock()
|
rule.RUnlock()
|
||||||
if isCompiled {
|
if isCompiled {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isSuffix, suffix := getSuffix(rule.text)
|
||||||
|
if isSuffix {
|
||||||
|
rule.Lock()
|
||||||
|
rule.isSuffix = isSuffix
|
||||||
|
rule.suffix = suffix
|
||||||
|
rule.Unlock()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
expr, err := ruleToRegexp(rule.text)
|
expr, err := ruleToRegexp(rule.text)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -417,7 +430,16 @@ func (rule *rule) match(host string) (Result, error) {
|
||||||
return res, err
|
return res, err
|
||||||
}
|
}
|
||||||
rule.RLock()
|
rule.RLock()
|
||||||
matched := rule.compiled.MatchString(host)
|
matched := false
|
||||||
|
if rule.isSuffix {
|
||||||
|
if host == rule.suffix {
|
||||||
|
matched = true
|
||||||
|
} else if strings.HasSuffix(host, "."+rule.suffix) {
|
||||||
|
matched = true
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
matched = rule.compiled.MatchString(host)
|
||||||
|
}
|
||||||
rule.RUnlock()
|
rule.RUnlock()
|
||||||
if matched {
|
if matched {
|
||||||
res.Reason = FilteredBlackList
|
res.Reason = FilteredBlackList
|
||||||
|
|
|
@ -195,7 +195,7 @@ func TestRuleToRegexp(t *testing.T) {
|
||||||
{"/doubleclick/", "doubleclick", nil},
|
{"/doubleclick/", "doubleclick", nil},
|
||||||
{"/", "", ErrInvalidSyntax},
|
{"/", "", ErrInvalidSyntax},
|
||||||
{`|double*?.+[]|(){}#$\|`, `^double.*\?\.\+\[\]\|\(\)\{\}\#\$\\$`, nil},
|
{`|double*?.+[]|(){}#$\|`, `^double.*\?\.\+\[\]\|\(\)\{\}\#\$\\$`, nil},
|
||||||
{`||doubleclick.net^`, `^([a-z0-9-_.]+\.)?doubleclick\.net([^ a-zA-Z0-9.%]|$)`, nil},
|
{`||doubleclick.net^`, `(?:^|\.)doubleclick\.net$`, nil},
|
||||||
}
|
}
|
||||||
for _, testcase := range tests {
|
for _, testcase := range tests {
|
||||||
converted, err := ruleToRegexp(testcase.rule)
|
converted, err := ruleToRegexp(testcase.rule)
|
||||||
|
@ -208,6 +208,38 @@ func TestRuleToRegexp(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSuffixRule(t *testing.T) {
|
||||||
|
for _, testcase := range []struct {
|
||||||
|
rule string
|
||||||
|
isSuffix bool
|
||||||
|
suffix string
|
||||||
|
}{
|
||||||
|
{`||doubleclick.net^`, true, `doubleclick.net`}, // entire string or subdomain match
|
||||||
|
{`||doubleclick.net|`, true, `doubleclick.net`}, // entire string or subdomain match
|
||||||
|
{`|doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||||
|
{`*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||||
|
{`doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||||
|
{`|*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||||
|
{`||*doubleclick.net^`, false, ``}, // TODO: ends with doubleclick.net
|
||||||
|
{`||*doubleclick.net|`, false, ``}, // TODO: ends with doubleclick.net
|
||||||
|
{`||*doublec*lick.net^`, false, ``}, // has a wildcard inside, has to be regexp
|
||||||
|
{`||*doublec|lick.net^`, false, ``}, // has a special symbol inside, has to be regexp
|
||||||
|
{`/abracadabra/`, false, ``}, // regexp, not anchored
|
||||||
|
{`/abracadabra$/`, false, ``}, // TODO: simplify simple suffix regexes
|
||||||
|
} {
|
||||||
|
isSuffix, suffix := getSuffix(testcase.rule)
|
||||||
|
if testcase.isSuffix != isSuffix {
|
||||||
|
t.Errorf("Results do not match for \"%s\": got %v expected %v", testcase.rule, isSuffix, testcase.isSuffix)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if testcase.isSuffix && testcase.suffix != suffix {
|
||||||
|
t.Errorf("Result suffix does not match for \"%s\": got \"%s\" expected \"%s\"", testcase.rule, suffix, testcase.suffix)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// trace("\"%s\": %v: %s", testcase.rule, isSuffix, suffix)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// helper functions
|
// helper functions
|
||||||
//
|
//
|
||||||
|
|
|
@ -5,8 +5,8 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func ruleToRegexp(rule string) (string, error) {
|
func ruleToRegexp(rule string) (string, error) {
|
||||||
const hostStart = "^([a-z0-9-_.]+\\.)?"
|
const hostStart = `(?:^|\.)`
|
||||||
const hostEnd = "([^ a-zA-Z0-9.%]|$)"
|
const hostEnd = `$`
|
||||||
|
|
||||||
// empty or short rule -- do nothing
|
// empty or short rule -- do nothing
|
||||||
if !isValidRule(rule) {
|
if !isValidRule(rule) {
|
||||||
|
@ -49,3 +49,38 @@ func ruleToRegexp(rule string) (string, error) {
|
||||||
|
|
||||||
return sb.String(), nil
|
return sb.String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handle suffix rule ||example.com^ -- either entire string is example.com or *.example.com
|
||||||
|
func getSuffix(rule string) (bool, string) {
|
||||||
|
// if starts with / and ends with /, it's already a regexp
|
||||||
|
// TODO: if a regexp is simple `/abracadabra$/`, then simplify it maybe?
|
||||||
|
if rule[0] == '/' && rule[len(rule)-1] == '/' {
|
||||||
|
return false, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// must start with ||
|
||||||
|
if rule[0] != '|' || rule[1] != '|' {
|
||||||
|
return false, ""
|
||||||
|
}
|
||||||
|
rule = rule[2:]
|
||||||
|
|
||||||
|
// suffix rule must end with ^ or |
|
||||||
|
lastChar := rule[len(rule)-1]
|
||||||
|
if lastChar != '^' && lastChar != '|' {
|
||||||
|
return false, ""
|
||||||
|
}
|
||||||
|
// last char was checked, eat it
|
||||||
|
rule = rule[:len(rule)-1]
|
||||||
|
|
||||||
|
// check that it doesn't have any special characters inside
|
||||||
|
for _, r := range rule {
|
||||||
|
switch r {
|
||||||
|
case '|':
|
||||||
|
return false, ""
|
||||||
|
case '*':
|
||||||
|
return false, ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, rule
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue