diff --git a/.gitignore b/.gitignore index 86a6ee69..a863c779 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,13 @@ debug /Corefile /dnsfilter.txt /querylog.json -/querylog.json.1 \ No newline at end of file +/querylog.json.1 + +# Test output +dnsfilter/dnsfilter.TestLotsOfRulesLotsOfHostsMemoryUsage1.pprof +dnsfilter/dnsfilter.TestLotsOfRulesLotsOfHostsMemoryUsage2.pprof +dnsfilter/dnsfilter.TestLotsOfRulesLotsOfHostsMemoryUsage3.pprof +dnsfilter/dnsfilter.TestLotsOfRulesMemoryUsage1.pprof +dnsfilter/dnsfilter.TestLotsOfRulesMemoryUsage2.pprof +dnsfilter/dnsfilter.TestLotsOfRulesMemoryUsage3.pprof +tests/top-1m.csv diff --git a/coredns_plugin/coredns_plugin.go b/coredns_plugin/coredns_plugin.go index 8bd8b74c..d12031ae 100644 --- a/coredns_plugin/coredns_plugin.go +++ b/coredns_plugin/coredns_plugin.go @@ -62,7 +62,6 @@ type plug struct { d *dnsfilter.Dnsfilter Next plugin.Handler upstream upstream.Upstream - hosts map[string]net.IP settings plugSettings sync.RWMutex @@ -82,7 +81,6 @@ func setupPlugin(c *caddy.Controller) (*plug, error) { p := &plug{ settings: defaultPluginSettings, d: dnsfilter.New(), - hosts: make(map[string]net.IP), } filterFileNames := []string{} @@ -149,9 +147,7 @@ func setupPlugin(c *caddy.Controller) (*plug, error) { scanner := bufio.NewScanner(file) for scanner.Scan() { text := scanner.Text() - if p.parseEtcHosts(text) { - continue - } + err = p.d.AddRule(text, uint32(i)) if err == dnsfilter.ErrInvalidSyntax { continue @@ -231,28 +227,6 @@ func setup(c *caddy.Controller) error { return nil } -func (p *plug) parseEtcHosts(text string) bool { - if pos := strings.IndexByte(text, '#'); pos != -1 { - text = text[0:pos] - } - fields := strings.Fields(text) - if len(fields) < 2 { - return false - } - addr := net.ParseIP(fields[0]) - if addr == nil { - return false - } - for _, host := range fields[1:] { - // debug logging for duplicate values, pretty common if you subscribe to many hosts files - // if val, ok := p.hosts[host]; ok { - // log.Printf("warning: host %s already has value %s, will overwrite it with %s", host, val, addr) - // } - p.hosts[host] = addr - } - return true -} - func (p *plug) onShutdown() error { p.Lock() p.d.Destroy() @@ -432,27 +406,6 @@ func (p *plug) serveDNSInternal(ctx context.Context, w dns.ResponseWriter, r *dn } p.RUnlock() - // is it in hosts? - if val, ok := p.hosts[host]; ok { - // it is, if it's a loopback host, reply with NXDOMAIN - // TODO: research if it's better than 127.0.0.1 - if false && val.IsLoopback() { - rcode, err := p.writeNXdomain(ctx, w, r) - if err != nil { - return rcode, dnsfilter.Result{}, err - } - return rcode, dnsfilter.Result{Reason: dnsfilter.FilteredInvalid}, err - } - // it's not a loopback host, replace it with value specified - rcode, err := p.replaceHostWithValAndReply(ctx, w, r, host, val.String(), question) - if err != nil { - return rcode, dnsfilter.Result{}, err - } - // TODO: This must be handled in the dnsfilter and not here! - rule := val.String() + " " + host - return rcode, dnsfilter.Result{Reason: dnsfilter.FilteredBlackList, Rule: rule}, err - } - // needs to be filtered instead p.RLock() result, err := p.d.CheckHost(host) @@ -482,12 +435,22 @@ func (p *plug) serveDNSInternal(ctx context.Context, w dns.ResponseWriter, r *dn } return rcode, result, err case dnsfilter.FilteredBlackList: - // return NXdomain - rcode, err := p.writeNXdomain(ctx, w, r) - if err != nil { - return rcode, dnsfilter.Result{}, err + + if result.Ip == nil { + // return NXDomain + rcode, err := p.writeNXdomain(ctx, w, r) + if err != nil { + return rcode, dnsfilter.Result{}, err + } + return rcode, result, err + } else { + // This is a hosts-syntax rule + rcode, err := p.replaceHostWithValAndReply(ctx, w, r, host, result.Ip.String(), question) + if err != nil { + return rcode, dnsfilter.Result{}, err + } + return rcode, result, err } - return rcode, result, err case dnsfilter.FilteredInvalid: // return NXdomain rcode, err := p.writeNXdomain(ctx, w, r) diff --git a/coredns_plugin/coredns_plugin_test.go b/coredns_plugin/coredns_plugin_test.go index 17210b63..2f65cf9a 100644 --- a/coredns_plugin/coredns_plugin_test.go +++ b/coredns_plugin/coredns_plugin_test.go @@ -40,41 +40,6 @@ func TestSetup(t *testing.T) { } } -func TestEtcHostsParse(t *testing.T) { - addr := "216.239.38.120" - text := []byte(fmt.Sprintf(" %s google.com www.google.com # enforce google's safesearch ", addr)) - tmpfile, err := ioutil.TempFile("", "") - if err != nil { - t.Fatal(err) - } - if _, err = tmpfile.Write(text); err != nil { - t.Fatal(err) - } - if err = tmpfile.Close(); err != nil { - t.Fatal(err) - } - - defer os.Remove(tmpfile.Name()) - - c := caddy.NewTestController("dns", fmt.Sprintf("dnsfilter %s", tmpfile.Name())) - p, err := setupPlugin(c) - if err != nil { - t.Fatal(err) - } - - if len(p.hosts) != 2 { - t.Fatal("Expected p.hosts to have two keys") - } - - val, ok := p.hosts["google.com"] - if !ok { - t.Fatal("Expected google.com to be set in p.hosts") - } - if !val.Equal(net.ParseIP(addr)) { - t.Fatalf("Expected google.com's value %s to match %s", val, addr) - } -} - func TestEtcHostsFilter(t *testing.T) { text := []byte("127.0.0.1 doubleclick.net\n" + "127.0.0.1 example.org example.net www.example.org www.example.net") tmpfile, err := ioutil.TempFile("", "") diff --git a/dnsfilter/dnsfilter.go b/dnsfilter/dnsfilter.go index 3b3627f8..49ee721c 100644 --- a/dnsfilter/dnsfilter.go +++ b/dnsfilter/dnsfilter.go @@ -9,6 +9,7 @@ import ( "fmt" "io/ioutil" "log" + "net" "net/http" "regexp" "strings" @@ -56,6 +57,7 @@ type rule struct { text string // text without @@ decorators or $ options shortcut string // for speeding up lookup originalText string // original text for reporting back to applications + ip net.IP // IP address (for the case when we're matching a hosts file) // options options []string // optional options after $ @@ -94,7 +96,7 @@ type Stats struct { // Dnsfilter holds added rules and performs hostname matches against the rules type Dnsfilter struct { - storage map[string]*rule // rule storage, not used for matching, needs to be key->value + storage map[string]bool // rule storage, not used for matching, just for filtering out duplicates storageMutex sync.RWMutex // rules are checked against these lists in the order defined here @@ -121,11 +123,11 @@ const ( NotFilteredError // there was a transitive error during check // reasons for filtering - FilteredBlackList // the host was matched to be advertising host - FilteredSafeBrowsing // the host was matched to be malicious/phishing - FilteredParental // the host was matched to be outside of parental control settings - FilteredInvalid // the request was invalid and was not processed - FilteredSafeSearch // the host was replaced with safesearch variant + FilteredBlackList // the host was matched to be advertising host + FilteredSafeBrowsing // the host was matched to be malicious/phishing + FilteredParental // the host was matched to be outside of parental control settings + FilteredInvalid // the request was invalid and was not processed + FilteredSafeSearch // the host was replaced with safesearch variant ) // these variables need to survive coredns reload @@ -137,9 +139,11 @@ var ( // Result holds state of hostname check type Result struct { - IsFiltered bool `json:",omitempty"` - Reason Reason `json:",omitempty"` - Rule string `json:",omitempty"` + IsFiltered bool `json:",omitempty"` // True if the host name is filtered + Reason Reason `json:",omitempty"` // Reason for blocking / unblocking + Rule string `json:",omitempty"` // Original rule text + Ip net.IP `json:",omitempty"` // Not nil only in the case of a hosts file syntax + FilterID uint32 `json:",omitempty"` // Filter ID the rule belongs to } // Matched can be used to see if any match at all was found, no matter filtered or not @@ -199,6 +203,7 @@ func (d *Dnsfilter) CheckHost(host string) (Result, error) { // type rulesTable struct { + rulesByHost map[string]*rule rulesByShortcut map[string][]*rule rulesLeftovers []*rule sync.RWMutex @@ -206,6 +211,7 @@ type rulesTable struct { func newRulesTable() *rulesTable { return &rulesTable{ + rulesByHost: make(map[string]*rule), rulesByShortcut: make(map[string][]*rule), rulesLeftovers: make([]*rule, 0), } @@ -213,16 +219,27 @@ func newRulesTable() *rulesTable { func (r *rulesTable) Add(rule *rule) { r.Lock() - if len(rule.shortcut) == shortcutLength && enableFastLookup { + + if rule.ip != nil { + // Hosts syntax + r.rulesByHost[rule.text] = rule + } else if //noinspection GoBoolExpressions + len(rule.shortcut) == shortcutLength && enableFastLookup { + + // Adblock syntax with a shortcut r.rulesByShortcut[rule.shortcut] = append(r.rulesByShortcut[rule.shortcut], rule) } else { + + // Adblock syntax -- too short to have a shortcut r.rulesLeftovers = append(r.rulesLeftovers, rule) } r.Unlock() } func (r *rulesTable) matchByHost(host string) (Result, error) { - res, err := r.searchShortcuts(host) + + // First: examine the hosts-syntax rules + res, err := r.searchByHost(host) if err != nil { return res, err } @@ -230,6 +247,16 @@ func (r *rulesTable) matchByHost(host string) (Result, error) { return res, nil } + // Second: examine the adblock-syntax rules with shortcuts + res, err = r.searchShortcuts(host) + if err != nil { + return res, err + } + if res.Reason.Matched() { + return res, nil + } + + // Third: examine the others res, err = r.searchLeftovers(host) if err != nil { return res, err @@ -241,6 +268,17 @@ func (r *rulesTable) matchByHost(host string) (Result, error) { return Result{}, nil } +func (r *rulesTable) searchByHost(host string) (Result, error) { + + rule, ok := r.rulesByHost[host] + + if ok { + return rule.match(host) + } + + return Result{}, nil +} + func (r *rulesTable) searchShortcuts(host string) (Result, error) { // check in shortcuts first for i := 0; i < len(host); i++ { @@ -424,8 +462,21 @@ func (rule *rule) compile() error { return nil } +// Checks if the rule matches the specified host and returns a corresponding Result object func (rule *rule) match(host string) (Result, error) { res := Result{} + + if rule.ip != nil && rule.text == host { + // This is a hosts-syntax rule -- just check that the hostname matches and return the result + return Result{ + IsFiltered: true, + Reason: FilteredBlackList, + Rule: rule.originalText, + Ip: rule.ip, + FilterID: rule.listID, + }, nil + } + err := rule.compile() if err != nil { return res, err @@ -686,6 +737,7 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error { d.storageMutex.RUnlock() if exists { // already added + // TODO: Change the error type return ErrInvalidSyntax } @@ -693,13 +745,20 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error { return ErrInvalidSyntax } + // First, check if this is a hosts-syntax rule + if d.parseEtcHosts(input, filterListID) { + // This is a valid hosts-syntax rule, no need for further parsing + return nil + } + + // Start parsing the rule rule := rule{ text: input, // will be modified originalText: input, listID: filterListID, } - // mark rule as whitelist if it starts with @@ + // Mark rule as whitelist if it starts with @@ if strings.HasPrefix(rule.text, "@@") { rule.isWhitelist = true rule.text = rule.text[2:] @@ -712,6 +771,7 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error { rule.extractShortcut() + //noinspection GoBoolExpressions if !enableDelayedCompilation { err := rule.compile() if err != nil { @@ -727,12 +787,44 @@ func (d *Dnsfilter) AddRule(input string, filterListID uint32) error { } d.storageMutex.Lock() - d.storage[input] = &rule + d.storage[input] = true d.storageMutex.Unlock() destination.Add(&rule) return nil } +// Parses the hosts-syntax rules. Returns false if the input string is not of hosts-syntax. +func (d *Dnsfilter) parseEtcHosts(input string, filterListID uint32) bool { + // Strip the trailing comment + ruleText := input + if pos := strings.IndexByte(ruleText, '#'); pos != -1 { + ruleText = ruleText[0:pos] + } + fields := strings.Fields(ruleText) + if len(fields) < 2 { + return false + } + addr := net.ParseIP(fields[0]) + if addr == nil { + return false + } + + d.storageMutex.Lock() + d.storage[input] = true + d.storageMutex.Unlock() + + for _, host := range fields[1:] { + rule := rule{ + text: host, + originalText: input, + listID: filterListID, + ip: addr, + } + d.blackList.Add(&rule) + } + return true +} + // matchHost is a low-level way to check only if hostname is filtered by rules, skipping expensive safebrowsing and parental lookups func (d *Dnsfilter) matchHost(host string) (Result, error) { lists := []*rulesTable{ @@ -761,7 +853,7 @@ func (d *Dnsfilter) matchHost(host string) (Result, error) { func New() *Dnsfilter { d := new(Dnsfilter) - d.storage = make(map[string]*rule) + d.storage = make(map[string]bool) d.important = newRulesTable() d.whiteList = newRulesTable() d.blackList = newRulesTable() diff --git a/dnsfilter/dnsfilter_test.go b/dnsfilter/dnsfilter_test.go index 19ae1c77..735d086e 100644 --- a/dnsfilter/dnsfilter_test.go +++ b/dnsfilter/dnsfilter_test.go @@ -281,6 +281,20 @@ func (d *Dnsfilter) checkMatch(t *testing.T, hostname string) { } } +func (d *Dnsfilter) checkMatchIp(t *testing.T, hostname string, ip string) { + t.Helper() + ret, err := d.CheckHost(hostname) + if err != nil { + t.Errorf("Error while matching host %s: %s", hostname, err) + } + if !ret.IsFiltered { + t.Errorf("Expected hostname %s to match", hostname) + } + if ret.Ip == nil || ret.Ip.String() != ip { + t.Errorf("Expected ip %s to match, actual: %v", ip, ret.Ip) + } +} + func (d *Dnsfilter) checkMatchEmpty(t *testing.T, hostname string) { t.Helper() ret, err := d.CheckHost(hostname) @@ -345,6 +359,20 @@ func TestSanityCheck(t *testing.T) { d.checkAddRuleFail(t, "lkfaojewhoawehfwacoefawr$@#$@3413841384") } +func TestEtcHostsMatching(t *testing.T) { + d := NewForTest() + defer d.Destroy() + + addr := "216.239.38.120" + text := fmt.Sprintf(" %s google.com www.google.com # enforce google's safesearch ", addr) + + d.checkAddRule(t, text) + d.checkMatchIp(t, "google.com", addr) + d.checkMatchIp(t, "www.google.com", addr) + d.checkMatchEmpty(t, "subdomain.google.com") + d.checkMatchEmpty(t, "example.org") +} + func TestSuffixMatching1(t *testing.T) { d := NewForTest() defer d.Destroy() diff --git a/dnsfilter/helpers.go b/dnsfilter/helpers.go index fb34cff9..68d4ba26 100644 --- a/dnsfilter/helpers.go +++ b/dnsfilter/helpers.go @@ -19,11 +19,17 @@ func isValidRule(rule string) bool { return false } + // Filter out all sorts of cosmetic rules: + // https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#cosmetic-rules masks := []string{ "##", "#@#", + "#?#", + "#@?#", "#$#", "#@$#", + "#?$#", + "#@?$#", "$$", "$@$", "#%#",