From 7c6557b05e1cde58f82a4c8d55037263a7bc16ff Mon Sep 17 00:00:00 2001 From: Ainar Garipov Date: Mon, 12 Apr 2021 18:22:11 +0300 Subject: [PATCH] Pull request: querylog: more opt Updates 1273. Squashed commit of the following: commit 167c0b5acaab8a2676de2cea556c861dc0efbc72 Author: Ainar Garipov Date: Mon Apr 12 18:12:43 2021 +0300 querylog: imp naming commit 5010ad113e46335011a721cbcc9fc9b1fc623722 Author: Ainar Garipov Date: Mon Apr 12 17:53:41 2021 +0300 querylog: more opt --- HACKING.md | 27 ++++ internal/querylog/decode.go | 15 --- internal/querylog/http.go | 18 +-- internal/querylog/qlog_test.go | 36 +++--- internal/querylog/qlogfile.go | 23 +++- internal/querylog/querylogfile.go | 2 +- internal/querylog/search.go | 35 ++++++ .../{searchcriteria.go => searchcriterion.go} | 115 ++++++++++++------ internal/querylog/searchparams.go | 18 ++- 9 files changed, 209 insertions(+), 80 deletions(-) rename internal/querylog/{searchcriteria.go => searchcriterion.go} (62%) diff --git a/HACKING.md b/HACKING.md index c0c35abd..561f07e7 100644 --- a/HACKING.md +++ b/HACKING.md @@ -243,6 +243,33 @@ on GitHub and most other Markdown renderers. --> * Use named returns to improve readability of function signatures. + * When naming a file which defines an enitity, use singular nouns, unless the + entity is some form of a container for other entities: + + ```go + // File: client.go + + package foo + + type Client struct { + // … + } + ``` + + ```go + // File: clients.go + + package foo + + type Clients []*Client + + // … + + type ClientsWithCache struct { + // … + } + ``` + ### Testing * Use `assert.NoError` and `require.NoError` instead of `assert.Nil` and diff --git a/internal/querylog/decode.go b/internal/querylog/decode.go index d4f34c40..cd62dde1 100644 --- a/internal/querylog/decode.go +++ b/internal/querylog/decode.go @@ -575,18 +575,3 @@ func decodeLogEntry(ent *logEntry, str string) { } } } - -// Get value from "key":"value" -func readJSONValue(s, name string) string { - i := strings.Index(s, "\""+name+"\":\"") - if i == -1 { - return "" - } - start := i + 1 + len(name) + 3 - i = strings.IndexByte(s[start:], '"') - if i == -1 { - return "" - } - end := start + i - return s[start:end] -} diff --git a/internal/querylog/http.go b/internal/querylog/http.go index 9fdf3a3d..cc26f8ab 100644 --- a/internal/querylog/http.go +++ b/internal/querylog/http.go @@ -126,16 +126,16 @@ func getDoubleQuotesEnclosedValue(s *string) bool { return false } -// parseSearchCriteria - parses "searchCriteria" from the specified query parameter -func (l *queryLog) parseSearchCriteria(q url.Values, name string, ct criteriaType) (bool, searchCriteria, error) { +// parseSearchCriterion parses a search criterion from the query parameter. +func (l *queryLog) parseSearchCriterion(q url.Values, name string, ct criterionType) (bool, searchCriterion, error) { val := q.Get(name) if len(val) == 0 { - return false, searchCriteria{}, nil + return false, searchCriterion{}, nil } - c := searchCriteria{ - criteriaType: ct, - value: val, + c := searchCriterion{ + criterionType: ct, + value: val, } if getDoubleQuotesEnclosedValue(&c.value) { c.strict = true @@ -175,15 +175,15 @@ func (l *queryLog) parseSearchParams(r *http.Request) (p *searchParams, err erro p.maxFileScanEntries = 0 } - paramNames := map[string]criteriaType{ + paramNames := map[string]criterionType{ "search": ctDomainOrClient, "response_status": ctFilteringStatus, } for k, v := range paramNames { var ok bool - var c searchCriteria - ok, c, err = l.parseSearchCriteria(q, k, v) + var c searchCriterion + ok, c, err = l.parseSearchCriterion(q, k, v) if err != nil { return nil, err } diff --git a/internal/querylog/qlog_test.go b/internal/querylog/qlog_test.go index 880da49e..af5e8813 100644 --- a/internal/querylog/qlog_test.go +++ b/internal/querylog/qlog_test.go @@ -53,11 +53,11 @@ func TestQueryLog(t *testing.T) { testCases := []struct { name string - sCr []searchCriteria + sCr []searchCriterion want []tcAssertion }{{ name: "all", - sCr: []searchCriteria{}, + sCr: []searchCriterion{}, want: []tcAssertion{ {num: 0, host: "example.com", answer: net.IPv4(1, 1, 1, 4), client: net.IPv4(2, 2, 2, 4)}, {num: 1, host: "test.example.org", answer: net.IPv4(1, 1, 1, 3), client: net.IPv4(2, 2, 2, 3)}, @@ -66,20 +66,20 @@ func TestQueryLog(t *testing.T) { }, }, { name: "by_domain_strict", - sCr: []searchCriteria{{ - criteriaType: ctDomainOrClient, - strict: true, - value: "TEST.example.org", + sCr: []searchCriterion{{ + criterionType: ctDomainOrClient, + strict: true, + value: "TEST.example.org", }}, want: []tcAssertion{{ num: 0, host: "test.example.org", answer: net.IPv4(1, 1, 1, 3), client: net.IPv4(2, 2, 2, 3), }}, }, { name: "by_domain_non-strict", - sCr: []searchCriteria{{ - criteriaType: ctDomainOrClient, - strict: false, - value: "example.ORG", + sCr: []searchCriterion{{ + criterionType: ctDomainOrClient, + strict: false, + value: "example.ORG", }}, want: []tcAssertion{ {num: 0, host: "test.example.org", answer: net.IPv4(1, 1, 1, 3), client: net.IPv4(2, 2, 2, 3)}, @@ -88,20 +88,20 @@ func TestQueryLog(t *testing.T) { }, }, { name: "by_client_ip_strict", - sCr: []searchCriteria{{ - criteriaType: ctDomainOrClient, - strict: true, - value: "2.2.2.2", + sCr: []searchCriterion{{ + criterionType: ctDomainOrClient, + strict: true, + value: "2.2.2.2", }}, want: []tcAssertion{{ num: 0, host: "example.org", answer: net.IPv4(1, 1, 1, 2), client: net.IPv4(2, 2, 2, 2), }}, }, { name: "by_client_ip_non-strict", - sCr: []searchCriteria{{ - criteriaType: ctDomainOrClient, - strict: false, - value: "2.2.2", + sCr: []searchCriterion{{ + criterionType: ctDomainOrClient, + strict: false, + value: "2.2.2", }}, want: []tcAssertion{ {num: 0, host: "example.com", answer: net.IPv4(1, 1, 1, 4), client: net.IPv4(2, 2, 2, 4)}, diff --git a/internal/querylog/qlogfile.go b/internal/querylog/qlogfile.go index 3aa56f6f..cf8d5f8c 100644 --- a/internal/querylog/qlogfile.go +++ b/internal/querylog/qlogfile.go @@ -4,6 +4,7 @@ import ( "fmt" "io" "os" + "strings" "sync" "time" @@ -321,11 +322,29 @@ func (q *QLogFile) readProbeLine(position int64) (string, int64, int64, error) { return string(buffer[startLine:endLine]), lineIdx, lineEndIdx, nil } +// readJSONvalue reads a JSON string in form of '"key":"value"'. prefix must be +// of the form '"key":"' to generate less garbage. +func readJSONValue(s, prefix string) string { + i := strings.Index(s, prefix) + if i == -1 { + return "" + } + + start := i + len(prefix) + i = strings.IndexByte(s[start:], '"') + if i == -1 { + return "" + } + + end := start + i + return s[start:end] +} + // readQLogTimestamp reads the timestamp field from the query log line func readQLogTimestamp(str string) int64 { - val := readJSONValue(str, "T") + val := readJSONValue(str, `"T":"`) if len(val) == 0 { - val = readJSONValue(str, "Time") + val = readJSONValue(str, `"Time":"`) } if len(val) == 0 { diff --git a/internal/querylog/querylogfile.go b/internal/querylog/querylogfile.go index 6ba859bb..7a170706 100644 --- a/internal/querylog/querylogfile.go +++ b/internal/querylog/querylogfile.go @@ -118,7 +118,7 @@ func (l *queryLog) readFileFirstTimeValue() int64 { } buf = buf[:r] - val := readJSONValue(string(buf), "T") + val := readJSONValue(string(buf), `"T":"`) t, err := time.Parse(time.RFC3339Nano, val) if err != nil { return -1 diff --git a/internal/querylog/search.go b/internal/querylog/search.go index f3b1d6b6..820e4376 100644 --- a/internal/querylog/search.go +++ b/internal/querylog/search.go @@ -171,6 +171,7 @@ func (l *queryLog) searchFiles( for total < params.maxFileScanEntries || params.maxFileScanEntries <= 0 { var e *logEntry var ts int64 + e, ts, err = l.readNextEntry(r, params, cache) if err != nil { if err == io.EOF { @@ -198,6 +199,29 @@ func (l *queryLog) searchFiles( return entries, oldest, total } +// quickMatchClientFinder is a wrapper around the usual client finding function +// to make it easier to use with quick matches. +type quickMatchClientFinder struct { + client func(clientID, ip string, cache clientCache) (c *Client, err error) + cache clientCache +} + +// findClient is a method that can be used as a quickMatchClientFinder. +func (f quickMatchClientFinder) findClient(clientID, ip string) (c *Client) { + var err error + c, err = f.client(clientID, ip, f.cache) + if err != nil { + log.Error("querylog: enriching file record for quick search:"+ + " for client %q (client id %q): %s", + ip, + clientID, + err, + ) + } + + return c +} + // readNextEntry reads the next log entry and checks if it matches the search // criteria. It optionally uses the client cache, if provided. e is nil if the // entry doesn't match the search criteria. ts is the timestamp of the @@ -213,6 +237,17 @@ func (l *queryLog) readNextEntry( return nil, 0, err } + clientFinder := quickMatchClientFinder{ + client: l.client, + cache: cache, + } + + if !params.quickMatch(line, clientFinder.findClient) { + ts = readQLogTimestamp(line) + + return nil, ts, nil + } + e = &logEntry{} decodeLogEntry(e, line) diff --git a/internal/querylog/searchcriteria.go b/internal/querylog/searchcriterion.go similarity index 62% rename from internal/querylog/searchcriteria.go rename to internal/querylog/searchcriterion.go index dd67f6b6..c0aca44f 100644 --- a/internal/querylog/searchcriteria.go +++ b/internal/querylog/searchcriterion.go @@ -6,15 +6,15 @@ import ( "github.com/AdguardTeam/AdGuardHome/internal/dnsfilter" ) -type criteriaType int +type criterionType int const ( // ctDomainOrClient is for searching by the domain name, the client's IP // address, or the clinet's ID. - ctDomainOrClient criteriaType = iota + ctDomainOrClient criterionType = iota // ctFilteringStatus is for searching by the filtering status. // - // See (*searchCriteria).ctFilteringStatusCase for details. + // See (*searchCriterion).ctFilteringStatusCase for details. ctFilteringStatus ) @@ -40,17 +40,83 @@ var filteringStatusValues = []string{ filteringStatusProcessed, } -// searchCriteria - every search request may contain a list of different search criteria -// we use each of them to match the query -type searchCriteria struct { - value string // search criteria value - criteriaType criteriaType // type of the criteria - strict bool // should we strictly match (equality) or not (indexOf) +// searchCriterion is a search criterion that is used to match a record. +type searchCriterion struct { + value string + criterionType criterionType + // strict, if true, means that the criterion must be applied to the + // whole value rather than the part of it. That is, equality and not + // containment. + strict bool } -// match - checks if the log entry matches this search criteria -func (c *searchCriteria) match(entry *logEntry) bool { - switch c.criteriaType { +func (c *searchCriterion) ctDomainOrClientCaseStrict( + term string, + clientID string, + name string, + host string, + ip string, +) (ok bool) { + return strings.EqualFold(host, term) || + strings.EqualFold(clientID, term) || + strings.EqualFold(ip, term) || + strings.EqualFold(name, term) +} + +func (c *searchCriterion) ctDomainOrClientCaseNonStrict( + term string, + clientID string, + name string, + host string, + ip string, +) (ok bool) { + // TODO(a.garipov): Write a performant, case-insensitive version of + // strings.Contains instead of generating garbage. Or, perhaps in the + // future, use a locale-appropriate matcher from golang.org/x/text. + clientID = strings.ToLower(clientID) + host = strings.ToLower(host) + ip = strings.ToLower(ip) + name = strings.ToLower(name) + term = strings.ToLower(term) + + return strings.Contains(clientID, term) || + strings.Contains(host, term) || + strings.Contains(ip, term) || + strings.Contains(name, term) +} + +// quickMatch quickly checks if the line matches the given search criterion. +// It returns false if the like doesn't match. This method is only here for +// optimisation purposes. +func (c *searchCriterion) quickMatch(line string, findClient quickMatchClientFunc) (ok bool) { + switch c.criterionType { + case ctDomainOrClient: + host := readJSONValue(line, `"QH":"`) + ip := readJSONValue(line, `"IP":"`) + clientID := readJSONValue(line, `"CID":"`) + + var name string + if cli := findClient(clientID, ip); cli != nil { + name = cli.Name + } + + if c.strict { + return c.ctDomainOrClientCaseStrict(c.value, clientID, name, host, ip) + } + + return c.ctDomainOrClientCaseNonStrict(c.value, clientID, name, host, ip) + case ctFilteringStatus: + // Go on, as we currently don't do quick matches against + // filtering statuses. + return true + default: + return true + } +} + +// match checks if the log entry matches this search criterion. +func (c *searchCriterion) match(entry *logEntry) bool { + switch c.criterionType { case ctDomainOrClient: return c.ctDomainOrClientCase(entry) case ctFilteringStatus: @@ -60,14 +126,7 @@ func (c *searchCriteria) match(entry *logEntry) bool { return false } -func (c *searchCriteria) ctDomainOrClientCaseStrict(term, clientID, name, host, ip string) bool { - return strings.EqualFold(host, term) || - strings.EqualFold(clientID, term) || - strings.EqualFold(ip, term) || - strings.EqualFold(name, term) -} - -func (c *searchCriteria) ctDomainOrClientCase(e *logEntry) bool { +func (c *searchCriterion) ctDomainOrClientCase(e *logEntry) bool { clientID := e.ClientID host := e.QHost @@ -82,22 +141,10 @@ func (c *searchCriteria) ctDomainOrClientCase(e *logEntry) bool { return c.ctDomainOrClientCaseStrict(term, clientID, name, host, ip) } - // TODO(a.garipov): Write a case-insensitive version of strings.Contains - // instead of generating garbage. Or, perhaps in the future, use - // a locale-appropriate matcher from golang.org/x/text. - clientID = strings.ToLower(clientID) - host = strings.ToLower(host) - ip = strings.ToLower(ip) - name = strings.ToLower(name) - term = strings.ToLower(term) - - return strings.Contains(clientID, term) || - strings.Contains(host, term) || - strings.Contains(ip, term) || - strings.Contains(name, term) + return c.ctDomainOrClientCaseNonStrict(term, clientID, name, host, ip) } -func (c *searchCriteria) ctFilteringStatusCase(res dnsfilter.Result) bool { +func (c *searchCriterion) ctFilteringStatusCase(res dnsfilter.Result) bool { switch c.value { case filteringStatusAll: return true diff --git a/internal/querylog/searchparams.go b/internal/querylog/searchparams.go index b4ec6f06..193956e6 100644 --- a/internal/querylog/searchparams.go +++ b/internal/querylog/searchparams.go @@ -5,7 +5,7 @@ import "time" // searchParams represent the search query sent by the client type searchParams struct { // searchCriteria - list of search criteria that we use to get filter results - searchCriteria []searchCriteria + searchCriteria []searchCriterion // olderThen - return entries that are older than this value // if not set - disregard it and return any value @@ -27,6 +27,22 @@ func newSearchParams() *searchParams { } } +// quickMatchClientFunc is a simplified client finder for quick matches. +type quickMatchClientFunc = func(clientID, ip string) (c *Client) + +// quickMatch quickly checks if the line matches the given search parameters. +// It returns false if the line doesn't match. This method is only here for +// optimisation purposes. +func (s *searchParams) quickMatch(line string, findClient quickMatchClientFunc) (ok bool) { + for _, c := range s.searchCriteria { + if !c.quickMatch(line, findClient) { + return false + } + } + + return true +} + // match - checks if the logEntry matches the searchParams func (s *searchParams) match(entry *logEntry) bool { if !s.olderThan.IsZero() && entry.Time.UnixNano() >= s.olderThan.UnixNano() {