Pull request: querylog: more opt

Updates 1273.

Squashed commit of the following:

commit 167c0b5acaab8a2676de2cea556c861dc0efbc72
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date:   Mon Apr 12 18:12:43 2021 +0300

    querylog: imp naming

commit 5010ad113e46335011a721cbcc9fc9b1fc623722
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date:   Mon Apr 12 17:53:41 2021 +0300

    querylog: more opt
This commit is contained in:
Ainar Garipov 2021-04-12 18:22:11 +03:00
parent 279350e4a3
commit 7c6557b05e
9 changed files with 209 additions and 80 deletions

View File

@ -243,6 +243,33 @@ on GitHub and most other Markdown renderers. -->
* Use named returns to improve readability of function signatures.
* When naming a file which defines an enitity, use singular nouns, unless the
entity is some form of a container for other entities:
```go
// File: client.go
package foo
type Client struct {
// …
}
```
```go
// File: clients.go
package foo
type Clients []*Client
// …
type ClientsWithCache struct {
// …
}
```
### <a id="testing" href="#testing">Testing</a>
* Use `assert.NoError` and `require.NoError` instead of `assert.Nil` and

View File

@ -575,18 +575,3 @@ func decodeLogEntry(ent *logEntry, str string) {
}
}
}
// Get value from "key":"value"
func readJSONValue(s, name string) string {
i := strings.Index(s, "\""+name+"\":\"")
if i == -1 {
return ""
}
start := i + 1 + len(name) + 3
i = strings.IndexByte(s[start:], '"')
if i == -1 {
return ""
}
end := start + i
return s[start:end]
}

View File

@ -126,16 +126,16 @@ func getDoubleQuotesEnclosedValue(s *string) bool {
return false
}
// parseSearchCriteria - parses "searchCriteria" from the specified query parameter
func (l *queryLog) parseSearchCriteria(q url.Values, name string, ct criteriaType) (bool, searchCriteria, error) {
// parseSearchCriterion parses a search criterion from the query parameter.
func (l *queryLog) parseSearchCriterion(q url.Values, name string, ct criterionType) (bool, searchCriterion, error) {
val := q.Get(name)
if len(val) == 0 {
return false, searchCriteria{}, nil
return false, searchCriterion{}, nil
}
c := searchCriteria{
criteriaType: ct,
value: val,
c := searchCriterion{
criterionType: ct,
value: val,
}
if getDoubleQuotesEnclosedValue(&c.value) {
c.strict = true
@ -175,15 +175,15 @@ func (l *queryLog) parseSearchParams(r *http.Request) (p *searchParams, err erro
p.maxFileScanEntries = 0
}
paramNames := map[string]criteriaType{
paramNames := map[string]criterionType{
"search": ctDomainOrClient,
"response_status": ctFilteringStatus,
}
for k, v := range paramNames {
var ok bool
var c searchCriteria
ok, c, err = l.parseSearchCriteria(q, k, v)
var c searchCriterion
ok, c, err = l.parseSearchCriterion(q, k, v)
if err != nil {
return nil, err
}

View File

@ -53,11 +53,11 @@ func TestQueryLog(t *testing.T) {
testCases := []struct {
name string
sCr []searchCriteria
sCr []searchCriterion
want []tcAssertion
}{{
name: "all",
sCr: []searchCriteria{},
sCr: []searchCriterion{},
want: []tcAssertion{
{num: 0, host: "example.com", answer: net.IPv4(1, 1, 1, 4), client: net.IPv4(2, 2, 2, 4)},
{num: 1, host: "test.example.org", answer: net.IPv4(1, 1, 1, 3), client: net.IPv4(2, 2, 2, 3)},
@ -66,20 +66,20 @@ func TestQueryLog(t *testing.T) {
},
}, {
name: "by_domain_strict",
sCr: []searchCriteria{{
criteriaType: ctDomainOrClient,
strict: true,
value: "TEST.example.org",
sCr: []searchCriterion{{
criterionType: ctDomainOrClient,
strict: true,
value: "TEST.example.org",
}},
want: []tcAssertion{{
num: 0, host: "test.example.org", answer: net.IPv4(1, 1, 1, 3), client: net.IPv4(2, 2, 2, 3),
}},
}, {
name: "by_domain_non-strict",
sCr: []searchCriteria{{
criteriaType: ctDomainOrClient,
strict: false,
value: "example.ORG",
sCr: []searchCriterion{{
criterionType: ctDomainOrClient,
strict: false,
value: "example.ORG",
}},
want: []tcAssertion{
{num: 0, host: "test.example.org", answer: net.IPv4(1, 1, 1, 3), client: net.IPv4(2, 2, 2, 3)},
@ -88,20 +88,20 @@ func TestQueryLog(t *testing.T) {
},
}, {
name: "by_client_ip_strict",
sCr: []searchCriteria{{
criteriaType: ctDomainOrClient,
strict: true,
value: "2.2.2.2",
sCr: []searchCriterion{{
criterionType: ctDomainOrClient,
strict: true,
value: "2.2.2.2",
}},
want: []tcAssertion{{
num: 0, host: "example.org", answer: net.IPv4(1, 1, 1, 2), client: net.IPv4(2, 2, 2, 2),
}},
}, {
name: "by_client_ip_non-strict",
sCr: []searchCriteria{{
criteriaType: ctDomainOrClient,
strict: false,
value: "2.2.2",
sCr: []searchCriterion{{
criterionType: ctDomainOrClient,
strict: false,
value: "2.2.2",
}},
want: []tcAssertion{
{num: 0, host: "example.com", answer: net.IPv4(1, 1, 1, 4), client: net.IPv4(2, 2, 2, 4)},

View File

@ -4,6 +4,7 @@ import (
"fmt"
"io"
"os"
"strings"
"sync"
"time"
@ -321,11 +322,29 @@ func (q *QLogFile) readProbeLine(position int64) (string, int64, int64, error) {
return string(buffer[startLine:endLine]), lineIdx, lineEndIdx, nil
}
// readJSONvalue reads a JSON string in form of '"key":"value"'. prefix must be
// of the form '"key":"' to generate less garbage.
func readJSONValue(s, prefix string) string {
i := strings.Index(s, prefix)
if i == -1 {
return ""
}
start := i + len(prefix)
i = strings.IndexByte(s[start:], '"')
if i == -1 {
return ""
}
end := start + i
return s[start:end]
}
// readQLogTimestamp reads the timestamp field from the query log line
func readQLogTimestamp(str string) int64 {
val := readJSONValue(str, "T")
val := readJSONValue(str, `"T":"`)
if len(val) == 0 {
val = readJSONValue(str, "Time")
val = readJSONValue(str, `"Time":"`)
}
if len(val) == 0 {

View File

@ -118,7 +118,7 @@ func (l *queryLog) readFileFirstTimeValue() int64 {
}
buf = buf[:r]
val := readJSONValue(string(buf), "T")
val := readJSONValue(string(buf), `"T":"`)
t, err := time.Parse(time.RFC3339Nano, val)
if err != nil {
return -1

View File

@ -171,6 +171,7 @@ func (l *queryLog) searchFiles(
for total < params.maxFileScanEntries || params.maxFileScanEntries <= 0 {
var e *logEntry
var ts int64
e, ts, err = l.readNextEntry(r, params, cache)
if err != nil {
if err == io.EOF {
@ -198,6 +199,29 @@ func (l *queryLog) searchFiles(
return entries, oldest, total
}
// quickMatchClientFinder is a wrapper around the usual client finding function
// to make it easier to use with quick matches.
type quickMatchClientFinder struct {
client func(clientID, ip string, cache clientCache) (c *Client, err error)
cache clientCache
}
// findClient is a method that can be used as a quickMatchClientFinder.
func (f quickMatchClientFinder) findClient(clientID, ip string) (c *Client) {
var err error
c, err = f.client(clientID, ip, f.cache)
if err != nil {
log.Error("querylog: enriching file record for quick search:"+
" for client %q (client id %q): %s",
ip,
clientID,
err,
)
}
return c
}
// readNextEntry reads the next log entry and checks if it matches the search
// criteria. It optionally uses the client cache, if provided. e is nil if the
// entry doesn't match the search criteria. ts is the timestamp of the
@ -213,6 +237,17 @@ func (l *queryLog) readNextEntry(
return nil, 0, err
}
clientFinder := quickMatchClientFinder{
client: l.client,
cache: cache,
}
if !params.quickMatch(line, clientFinder.findClient) {
ts = readQLogTimestamp(line)
return nil, ts, nil
}
e = &logEntry{}
decodeLogEntry(e, line)

View File

@ -6,15 +6,15 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/dnsfilter"
)
type criteriaType int
type criterionType int
const (
// ctDomainOrClient is for searching by the domain name, the client's IP
// address, or the clinet's ID.
ctDomainOrClient criteriaType = iota
ctDomainOrClient criterionType = iota
// ctFilteringStatus is for searching by the filtering status.
//
// See (*searchCriteria).ctFilteringStatusCase for details.
// See (*searchCriterion).ctFilteringStatusCase for details.
ctFilteringStatus
)
@ -40,17 +40,83 @@ var filteringStatusValues = []string{
filteringStatusProcessed,
}
// searchCriteria - every search request may contain a list of different search criteria
// we use each of them to match the query
type searchCriteria struct {
value string // search criteria value
criteriaType criteriaType // type of the criteria
strict bool // should we strictly match (equality) or not (indexOf)
// searchCriterion is a search criterion that is used to match a record.
type searchCriterion struct {
value string
criterionType criterionType
// strict, if true, means that the criterion must be applied to the
// whole value rather than the part of it. That is, equality and not
// containment.
strict bool
}
// match - checks if the log entry matches this search criteria
func (c *searchCriteria) match(entry *logEntry) bool {
switch c.criteriaType {
func (c *searchCriterion) ctDomainOrClientCaseStrict(
term string,
clientID string,
name string,
host string,
ip string,
) (ok bool) {
return strings.EqualFold(host, term) ||
strings.EqualFold(clientID, term) ||
strings.EqualFold(ip, term) ||
strings.EqualFold(name, term)
}
func (c *searchCriterion) ctDomainOrClientCaseNonStrict(
term string,
clientID string,
name string,
host string,
ip string,
) (ok bool) {
// TODO(a.garipov): Write a performant, case-insensitive version of
// strings.Contains instead of generating garbage. Or, perhaps in the
// future, use a locale-appropriate matcher from golang.org/x/text.
clientID = strings.ToLower(clientID)
host = strings.ToLower(host)
ip = strings.ToLower(ip)
name = strings.ToLower(name)
term = strings.ToLower(term)
return strings.Contains(clientID, term) ||
strings.Contains(host, term) ||
strings.Contains(ip, term) ||
strings.Contains(name, term)
}
// quickMatch quickly checks if the line matches the given search criterion.
// It returns false if the like doesn't match. This method is only here for
// optimisation purposes.
func (c *searchCriterion) quickMatch(line string, findClient quickMatchClientFunc) (ok bool) {
switch c.criterionType {
case ctDomainOrClient:
host := readJSONValue(line, `"QH":"`)
ip := readJSONValue(line, `"IP":"`)
clientID := readJSONValue(line, `"CID":"`)
var name string
if cli := findClient(clientID, ip); cli != nil {
name = cli.Name
}
if c.strict {
return c.ctDomainOrClientCaseStrict(c.value, clientID, name, host, ip)
}
return c.ctDomainOrClientCaseNonStrict(c.value, clientID, name, host, ip)
case ctFilteringStatus:
// Go on, as we currently don't do quick matches against
// filtering statuses.
return true
default:
return true
}
}
// match checks if the log entry matches this search criterion.
func (c *searchCriterion) match(entry *logEntry) bool {
switch c.criterionType {
case ctDomainOrClient:
return c.ctDomainOrClientCase(entry)
case ctFilteringStatus:
@ -60,14 +126,7 @@ func (c *searchCriteria) match(entry *logEntry) bool {
return false
}
func (c *searchCriteria) ctDomainOrClientCaseStrict(term, clientID, name, host, ip string) bool {
return strings.EqualFold(host, term) ||
strings.EqualFold(clientID, term) ||
strings.EqualFold(ip, term) ||
strings.EqualFold(name, term)
}
func (c *searchCriteria) ctDomainOrClientCase(e *logEntry) bool {
func (c *searchCriterion) ctDomainOrClientCase(e *logEntry) bool {
clientID := e.ClientID
host := e.QHost
@ -82,22 +141,10 @@ func (c *searchCriteria) ctDomainOrClientCase(e *logEntry) bool {
return c.ctDomainOrClientCaseStrict(term, clientID, name, host, ip)
}
// TODO(a.garipov): Write a case-insensitive version of strings.Contains
// instead of generating garbage. Or, perhaps in the future, use
// a locale-appropriate matcher from golang.org/x/text.
clientID = strings.ToLower(clientID)
host = strings.ToLower(host)
ip = strings.ToLower(ip)
name = strings.ToLower(name)
term = strings.ToLower(term)
return strings.Contains(clientID, term) ||
strings.Contains(host, term) ||
strings.Contains(ip, term) ||
strings.Contains(name, term)
return c.ctDomainOrClientCaseNonStrict(term, clientID, name, host, ip)
}
func (c *searchCriteria) ctFilteringStatusCase(res dnsfilter.Result) bool {
func (c *searchCriterion) ctFilteringStatusCase(res dnsfilter.Result) bool {
switch c.value {
case filteringStatusAll:
return true

View File

@ -5,7 +5,7 @@ import "time"
// searchParams represent the search query sent by the client
type searchParams struct {
// searchCriteria - list of search criteria that we use to get filter results
searchCriteria []searchCriteria
searchCriteria []searchCriterion
// olderThen - return entries that are older than this value
// if not set - disregard it and return any value
@ -27,6 +27,22 @@ func newSearchParams() *searchParams {
}
}
// quickMatchClientFunc is a simplified client finder for quick matches.
type quickMatchClientFunc = func(clientID, ip string) (c *Client)
// quickMatch quickly checks if the line matches the given search parameters.
// It returns false if the line doesn't match. This method is only here for
// optimisation purposes.
func (s *searchParams) quickMatch(line string, findClient quickMatchClientFunc) (ok bool) {
for _, c := range s.searchCriteria {
if !c.quickMatch(line, findClient) {
return false
}
}
return true
}
// match - checks if the logEntry matches the searchParams
func (s *searchParams) match(entry *logEntry) bool {
if !s.olderThan.IsZero() && entry.Time.UnixNano() >= s.olderThan.UnixNano() {