From 116bedd72716c9e5ef2f3b13673bcc74fc7c11b6 Mon Sep 17 00:00:00 2001 From: Eugene Burkov Date: Wed, 30 Jun 2021 11:04:48 +0300 Subject: [PATCH] Pull request: 3012 idna search Merge in DNS/adguard-home from 3012-idna-search to master Closes #3012. Squashed commit of the following: commit 6a9fbfe16860df5db5982a70cfbf040967b6e6ae Author: Eugene Burkov Date: Tue Jun 29 21:28:10 2021 +0300 querylog: add todo commit 31292ba1aeb9e91ff4f6abae7ffdf806a87cae66 Author: Eugene Burkov Date: Tue Jun 29 21:21:46 2021 +0300 querylog: imp docs, code commit 35757f76837cb8034f6079a351d01aa4706bfea7 Author: Eugene Burkov Date: Tue Jun 29 21:01:08 2021 +0300 queerylog: fix idn case match commit eecfc98b6449c5c7c5a23602e80e47002034bc25 Author: Eugene Burkov Date: Tue Jun 29 20:32:00 2021 +0300 querylog: imp code, docs commit 8aa6242fe92a9c2daa674b976595b13be96b0cf7 Author: Eugene Burkov Date: Tue Jun 29 20:00:54 2021 +0300 querylog: sup idn search --- CHANGELOG.md | 2 + internal/querylog/http.go | 72 ++++++++++++++++++++-------- internal/querylog/searchcriterion.go | 39 ++++++++++----- 3 files changed, 83 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2025e26f..e5e8dff7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ and this project adheres to ### Changed +- Query log search now supports internationalized domains ([#3012]). - Internationalized domains are now shown decoded in the query log with the original encoded version shown in request details. ([#3013]). - When /etc/hosts-type rules have several IPs for one host, all IPs are now @@ -83,6 +84,7 @@ released by then. [#2443]: https://github.com/AdguardTeam/AdGuardHome/issues/2443 [#2624]: https://github.com/AdguardTeam/AdGuardHome/issues/2624 [#2763]: https://github.com/AdguardTeam/AdGuardHome/issues/2763 +[#3012]: https://github.com/AdguardTeam/AdGuardHome/issues/3012 [#3013]: https://github.com/AdguardTeam/AdGuardHome/issues/3013 [#3136]: https://github.com/AdguardTeam/AdGuardHome/issues/3136 [#3162]: https://github.com/AdguardTeam/AdGuardHome/issues/3162 diff --git a/internal/querylog/http.go b/internal/querylog/http.go index 8bf5ee27..f7f5edb4 100644 --- a/internal/querylog/http.go +++ b/internal/querylog/http.go @@ -6,11 +6,13 @@ import ( "net/http" "net/url" "strconv" + "strings" "time" "github.com/AdguardTeam/AdGuardHome/internal/aghstrings" "github.com/AdguardTeam/golibs/jsonutil" "github.com/AdguardTeam/golibs/log" + "golang.org/x/net/idna" ) type qlogConfig struct { @@ -127,25 +129,53 @@ func getDoubleQuotesEnclosedValue(s *string) bool { } // parseSearchCriterion parses a search criterion from the query parameter. -func (l *queryLog) parseSearchCriterion(q url.Values, name string, ct criterionType) (ok bool, sc searchCriterion, err error) { +func (l *queryLog) parseSearchCriterion(q url.Values, name string, ct criterionType) ( + ok bool, + sc searchCriterion, + err error, +) { val := q.Get(name) - if len(val) == 0 { - return false, searchCriterion{}, nil + if val == "" { + return false, sc, nil } - c := searchCriterion{ + strict := getDoubleQuotesEnclosedValue(&val) + + var asciiVal string + switch ct { + case ctTerm: + // Decode lowercased value from punycode to make EqualFold and + // friends work properly with IDNAs. + // + // TODO(e.burkov): Make it work with parts of IDNAs somehow. + loweredVal := strings.ToLower(val) + if asciiVal, err = idna.ToASCII(loweredVal); err != nil { + log.Debug("can't convert %q to ascii: %s", val, err) + } else if asciiVal == loweredVal { + // Purge asciiVal to prevent checking the same value + // twice. + asciiVal = "" + } + case ctFilteringStatus: + if !aghstrings.InSlice(filteringStatusValues, val) { + return false, sc, fmt.Errorf("invalid value %s", val) + } + default: + return false, sc, fmt.Errorf( + "invalid criterion type %v: should be one of %v", + ct, + []criterionType{ctTerm, ctFilteringStatus}, + ) + } + + sc = searchCriterion{ criterionType: ct, value: val, - } - if getDoubleQuotesEnclosedValue(&c.value) { - c.strict = true + asciiVal: asciiVal, + strict: strict, } - if ct == ctFilteringStatus && !aghstrings.InSlice(filteringStatusValues, c.value) { - return false, c, fmt.Errorf("invalid value %s", c.value) - } - - return true, c, nil + return true, sc, nil } // parseSearchParams - parses "searchParams" from the HTTP request's query string @@ -175,15 +205,19 @@ func (l *queryLog) parseSearchParams(r *http.Request) (p *searchParams, err erro p.maxFileScanEntries = 0 } - paramNames := map[string]criterionType{ - "search": ctTerm, - "response_status": ctFilteringStatus, - } - - for k, v := range paramNames { + for _, v := range []struct { + urlField string + ct criterionType + }{{ + urlField: "search", + ct: ctTerm, + }, { + urlField: "response_status", + ct: ctFilteringStatus, + }} { var ok bool var c searchCriterion - ok, c, err = l.parseSearchCriterion(q, k, v) + ok, c, err = l.parseSearchCriterion(q, v.urlField, v.ct) if err != nil { return nil, err } diff --git a/internal/querylog/searchcriterion.go b/internal/querylog/searchcriterion.go index d726b5bc..25ffc216 100644 --- a/internal/querylog/searchcriterion.go +++ b/internal/querylog/searchcriterion.go @@ -11,10 +11,9 @@ import ( type criterionType int const ( - // ctTerm is for searching by the domain name, the client's IP - // address, the client's ID or the client's name. - // - // TODO(e.burkov): Make it support IDNA while #3012. + // ctTerm is for searching by the domain name, the client's IP address, + // the client's ID or the client's name. The domain name search + // supports IDNAs. ctTerm criterionType = iota // ctFilteringStatus is for searching by the filtering status. // @@ -47,6 +46,7 @@ var filteringStatusValues = []string{ // searchCriterion is a search criterion that is used to match a record. type searchCriterion struct { value string + asciiVal string criterionType criterionType // strict, if true, means that the criterion must be applied to the // whole value rather than the part of it. That is, equality and not @@ -54,14 +54,16 @@ type searchCriterion struct { strict bool } -func (c *searchCriterion) ctDomainOrClientCaseStrict( +func ctDomainOrClientCaseStrict( term string, + asciiTerm string, clientID string, name string, host string, ip string, ) (ok bool) { return strings.EqualFold(host, term) || + (asciiTerm != "" && strings.EqualFold(host, asciiTerm)) || strings.EqualFold(clientID, term) || strings.EqualFold(ip, term) || strings.EqualFold(name, term) @@ -98,8 +100,9 @@ func containsFold(s, substr string) (ok bool) { return false } -func (c *searchCriterion) ctDomainOrClientCaseNonStrict( +func ctDomainOrClientCaseNonStrict( term string, + asciiTerm string, clientID string, name string, host string, @@ -107,6 +110,7 @@ func (c *searchCriterion) ctDomainOrClientCaseNonStrict( ) (ok bool) { return containsFold(clientID, term) || containsFold(host, term) || + (asciiTerm != "" && containsFold(host, asciiTerm)) || containsFold(ip, term) || containsFold(name, term) } @@ -127,10 +131,24 @@ func (c *searchCriterion) quickMatch(line string, findClient quickMatchClientFun } if c.strict { - return c.ctDomainOrClientCaseStrict(c.value, clientID, name, host, ip) + return ctDomainOrClientCaseStrict( + c.value, + c.asciiVal, + clientID, + name, + host, + ip, + ) } - return c.ctDomainOrClientCaseNonStrict(c.value, clientID, name, host, ip) + return ctDomainOrClientCaseNonStrict( + c.value, + c.asciiVal, + clientID, + name, + host, + ip, + ) case ctFilteringStatus: // Go on, as we currently don't do quick matches against // filtering statuses. @@ -162,12 +180,11 @@ func (c *searchCriterion) ctDomainOrClientCase(e *logEntry) bool { } ip := e.IP.String() - term := strings.ToLower(c.value) if c.strict { - return c.ctDomainOrClientCaseStrict(term, clientID, name, host, ip) + return ctDomainOrClientCaseStrict(c.value, c.asciiVal, clientID, name, host, ip) } - return c.ctDomainOrClientCaseNonStrict(term, clientID, name, host, ip) + return ctDomainOrClientCaseNonStrict(c.value, c.asciiVal, clientID, name, host, ip) } func (c *searchCriterion) ctFilteringStatusCase(res filtering.Result) bool {