2020-02-20 22:07:30 +00:00
|
|
|
package querylog
|
|
|
|
|
|
|
|
import (
|
2024-11-15 12:57:07 +00:00
|
|
|
"context"
|
2023-05-24 14:33:15 +01:00
|
|
|
"fmt"
|
2020-02-20 22:07:30 +00:00
|
|
|
"io"
|
2024-11-15 12:57:07 +00:00
|
|
|
"log/slog"
|
2024-02-08 17:39:18 +00:00
|
|
|
"slices"
|
2020-02-20 22:07:30 +00:00
|
|
|
"time"
|
|
|
|
|
2023-05-24 14:33:15 +01:00
|
|
|
"github.com/AdguardTeam/golibs/errors"
|
2024-11-15 12:57:07 +00:00
|
|
|
"github.com/AdguardTeam/golibs/logutil/slogutil"
|
2020-02-20 22:07:30 +00:00
|
|
|
)
|
|
|
|
|
2022-02-10 12:42:59 +00:00
|
|
|
// client finds the client info, if any, by its ClientID and IP address,
|
2021-04-02 15:30:39 +01:00
|
|
|
// optionally checking the provided cache. It will use the IP address
|
|
|
|
// regardless of if the IP anonymization is enabled now, because the
|
|
|
|
// anonymization could have been disabled in the past, and client will try to
|
|
|
|
// find those records as well.
|
|
|
|
func (l *queryLog) client(clientID, ip string, cache clientCache) (c *Client, err error) {
|
|
|
|
cck := clientCacheKey{clientID: clientID, ip: ip}
|
2021-04-12 12:10:20 +01:00
|
|
|
|
|
|
|
var ok bool
|
|
|
|
if c, ok = cache[cck]; ok {
|
2021-04-02 15:30:39 +01:00
|
|
|
return c, nil
|
|
|
|
}
|
2020-05-26 13:37:37 +01:00
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
var ids []string
|
|
|
|
if clientID != "" {
|
|
|
|
ids = append(ids, clientID)
|
|
|
|
}
|
|
|
|
|
|
|
|
if ip != "" {
|
|
|
|
ids = append(ids, ip)
|
|
|
|
}
|
|
|
|
|
|
|
|
c, err = l.findClient(ids)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2020-05-26 13:37:37 +01:00
|
|
|
}
|
|
|
|
|
2021-04-12 12:10:20 +01:00
|
|
|
// Cache all results, including negative ones, to prevent excessive and
|
|
|
|
// expensive client searching.
|
|
|
|
cache[cck] = c
|
2021-04-02 15:30:39 +01:00
|
|
|
|
|
|
|
return c, nil
|
|
|
|
}
|
2020-05-26 13:37:37 +01:00
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
// searchMemory looks up log records which are currently in the in-memory
|
|
|
|
// buffer. It optionally uses the client cache, if provided. It also returns
|
|
|
|
// the total amount of records in the buffer at the moment of searching.
|
2024-01-11 15:38:30 +00:00
|
|
|
// l.confMu is expected to be locked.
|
2024-11-15 12:57:07 +00:00
|
|
|
func (l *queryLog) searchMemory(
|
|
|
|
ctx context.Context,
|
|
|
|
params *searchParams,
|
|
|
|
cache clientCache,
|
|
|
|
) (entries []*logEntry, total int) {
|
2024-02-12 15:45:51 +00:00
|
|
|
// Check memory size, as the buffer can contain a single log record. See
|
|
|
|
// [newQueryLog].
|
2024-01-11 15:38:30 +00:00
|
|
|
if l.conf.MemSize == 0 {
|
|
|
|
return nil, 0
|
|
|
|
}
|
|
|
|
|
2020-05-26 13:37:37 +01:00
|
|
|
l.bufferLock.Lock()
|
2021-04-02 15:30:39 +01:00
|
|
|
defer l.bufferLock.Unlock()
|
2020-05-26 13:37:37 +01:00
|
|
|
|
2023-10-03 18:50:43 +01:00
|
|
|
l.buffer.ReverseRange(func(entry *logEntry) (cont bool) {
|
2023-02-27 14:18:56 +00:00
|
|
|
// A shallow clone is enough, since the only thing that this loop
|
|
|
|
// modifies is the client field.
|
2023-10-03 18:50:43 +01:00
|
|
|
e := entry.shallowClone()
|
2021-04-02 15:30:39 +01:00
|
|
|
|
2023-10-03 18:50:43 +01:00
|
|
|
var err error
|
2021-04-02 15:30:39 +01:00
|
|
|
e.client, err = l.client(e.ClientID, e.IP.String(), cache)
|
|
|
|
if err != nil {
|
2024-11-15 12:57:07 +00:00
|
|
|
l.logger.ErrorContext(
|
|
|
|
ctx,
|
|
|
|
"enriching memory record",
|
|
|
|
"time", e.Time,
|
|
|
|
"client_ip", e.IP,
|
|
|
|
"client_id", e.ClientID,
|
|
|
|
slogutil.KeyError, err,
|
|
|
|
)
|
2021-04-02 15:30:39 +01:00
|
|
|
|
|
|
|
// Go on and try to match anyway.
|
2020-05-26 13:37:37 +01:00
|
|
|
}
|
2021-04-02 15:30:39 +01:00
|
|
|
|
|
|
|
if params.match(e) {
|
|
|
|
entries = append(entries, e)
|
|
|
|
}
|
|
|
|
|
2023-10-03 18:50:43 +01:00
|
|
|
return true
|
|
|
|
})
|
|
|
|
|
2024-01-11 15:38:30 +00:00
|
|
|
return entries, int(l.buffer.Len())
|
2021-04-02 15:30:39 +01:00
|
|
|
}
|
|
|
|
|
2024-01-11 15:38:30 +00:00
|
|
|
// search searches log entries in memory buffer and log file using specified
|
|
|
|
// parameters and returns the list of entries found and the time of the oldest
|
|
|
|
// entry. l.confMu is expected to be locked.
|
2024-11-15 12:57:07 +00:00
|
|
|
func (l *queryLog) search(
|
|
|
|
ctx context.Context,
|
|
|
|
params *searchParams,
|
|
|
|
) (entries []*logEntry, oldest time.Time) {
|
2023-03-31 15:39:04 +01:00
|
|
|
start := time.Now()
|
2021-04-02 15:30:39 +01:00
|
|
|
|
|
|
|
if params.limit == 0 {
|
|
|
|
return []*logEntry{}, time.Time{}
|
2020-05-26 13:37:37 +01:00
|
|
|
}
|
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
cache := clientCache{}
|
2023-03-31 15:39:04 +01:00
|
|
|
|
2024-11-15 12:57:07 +00:00
|
|
|
memoryEntries, bufLen := l.searchMemory(ctx, params, cache)
|
|
|
|
l.logger.DebugContext(ctx, "got entries from memory", "count", len(memoryEntries))
|
2023-03-31 15:39:04 +01:00
|
|
|
|
2024-11-15 12:57:07 +00:00
|
|
|
fileEntries, oldest, total := l.searchFiles(ctx, params, cache)
|
|
|
|
l.logger.DebugContext(ctx, "got entries from files", "count", len(fileEntries))
|
2023-03-31 15:39:04 +01:00
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
total += bufLen
|
|
|
|
|
2020-05-26 13:37:37 +01:00
|
|
|
totalLimit := params.offset + params.limit
|
|
|
|
|
|
|
|
// now let's get a unified collection
|
2022-05-25 16:00:50 +01:00
|
|
|
entries = append(memoryEntries, fileEntries...)
|
2020-05-26 13:37:37 +01:00
|
|
|
if len(entries) > totalLimit {
|
|
|
|
// remove extra records
|
|
|
|
entries = entries[:totalLimit]
|
|
|
|
}
|
2020-11-10 16:00:55 +00:00
|
|
|
|
2020-11-30 10:32:58 +00:00
|
|
|
// Resort entries on start time to partially mitigate query log looking
|
|
|
|
// weird on the frontend.
|
|
|
|
//
|
|
|
|
// See https://github.com/AdguardTeam/AdGuardHome/issues/2293.
|
2023-08-10 18:00:17 +01:00
|
|
|
slices.SortStableFunc(entries, func(a, b *logEntry) (res int) {
|
|
|
|
return -a.Time.Compare(b.Time)
|
2020-11-30 10:32:58 +00:00
|
|
|
})
|
|
|
|
|
2020-05-26 13:37:37 +01:00
|
|
|
if params.offset > 0 {
|
|
|
|
if len(entries) > params.offset {
|
|
|
|
entries = entries[params.offset:]
|
|
|
|
} else {
|
|
|
|
entries = make([]*logEntry, 0)
|
|
|
|
oldest = time.Time{}
|
|
|
|
}
|
|
|
|
}
|
2020-11-10 16:00:55 +00:00
|
|
|
|
2022-05-25 16:00:50 +01:00
|
|
|
if len(entries) > 0 {
|
2020-11-10 16:00:55 +00:00
|
|
|
// Update oldest after merging in the memory buffer.
|
2020-05-26 13:37:37 +01:00
|
|
|
oldest = entries[len(entries)-1].Time
|
|
|
|
}
|
|
|
|
|
2024-11-15 12:57:07 +00:00
|
|
|
l.logger.DebugContext(
|
|
|
|
ctx,
|
|
|
|
"prepared data",
|
|
|
|
"count", len(entries),
|
|
|
|
"total", total,
|
|
|
|
"older_than", params.olderThan,
|
|
|
|
"elapsed", time.Since(start),
|
2022-05-25 16:00:50 +01:00
|
|
|
)
|
2020-05-26 13:37:37 +01:00
|
|
|
|
|
|
|
return entries, oldest
|
|
|
|
}
|
|
|
|
|
2023-05-24 14:33:15 +01:00
|
|
|
// seekRecord changes the current position to the next record older than the
|
|
|
|
// provided parameter.
|
2024-11-15 12:57:07 +00:00
|
|
|
func (r *qLogReader) seekRecord(ctx context.Context, olderThan time.Time) (err error) {
|
2023-05-24 14:33:15 +01:00
|
|
|
if olderThan.IsZero() {
|
|
|
|
return r.SeekStart()
|
|
|
|
}
|
|
|
|
|
2024-11-15 12:57:07 +00:00
|
|
|
err = r.seekTS(ctx, olderThan.UnixNano())
|
2023-05-24 14:33:15 +01:00
|
|
|
if err == nil {
|
|
|
|
// Read to the next record, because we only need the one that goes
|
|
|
|
// after it.
|
|
|
|
_, err = r.ReadNext()
|
|
|
|
}
|
|
|
|
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// setQLogReader creates a reader with the specified files and sets the
|
|
|
|
// position to the next record older than the provided parameter.
|
2024-11-15 12:57:07 +00:00
|
|
|
func (l *queryLog) setQLogReader(
|
|
|
|
ctx context.Context,
|
|
|
|
olderThan time.Time,
|
|
|
|
) (qr *qLogReader, err error) {
|
2021-03-15 11:19:04 +00:00
|
|
|
files := []string{
|
|
|
|
l.logFile + ".1",
|
|
|
|
l.logFile,
|
|
|
|
}
|
|
|
|
|
2024-11-15 12:57:07 +00:00
|
|
|
r, err := newQLogReader(ctx, l.logger, files)
|
2020-02-20 22:07:30 +00:00
|
|
|
if err != nil {
|
2024-11-18 13:39:35 +00:00
|
|
|
return nil, fmt.Errorf("opening qlog reader: %w", err)
|
2020-02-20 22:07:30 +00:00
|
|
|
}
|
|
|
|
|
2024-11-15 12:57:07 +00:00
|
|
|
err = r.seekRecord(ctx, olderThan)
|
2020-02-20 22:07:30 +00:00
|
|
|
if err != nil {
|
2023-05-24 14:33:15 +01:00
|
|
|
defer func() { err = errors.WithDeferred(err, r.Close()) }()
|
2024-11-15 12:57:07 +00:00
|
|
|
l.logger.DebugContext(ctx, "cannot seek", "older_than", olderThan, slogutil.KeyError, err)
|
2021-04-02 15:30:39 +01:00
|
|
|
|
2023-05-24 14:33:15 +01:00
|
|
|
return nil, nil
|
2020-02-20 22:07:30 +00:00
|
|
|
}
|
|
|
|
|
2023-05-24 14:33:15 +01:00
|
|
|
return r, nil
|
|
|
|
}
|
2021-04-02 15:30:39 +01:00
|
|
|
|
2024-02-12 15:45:51 +00:00
|
|
|
// readEntries reads entries from the reader to totalLimit. By default, we do
|
2023-05-24 14:33:15 +01:00
|
|
|
// not scan more than maxFileScanEntries at once. The idea is to make search
|
|
|
|
// calls faster so that the UI could handle it and show something quicker.
|
|
|
|
// This behavior can be overridden if maxFileScanEntries is set to 0.
|
|
|
|
func (l *queryLog) readEntries(
|
2024-11-15 12:57:07 +00:00
|
|
|
ctx context.Context,
|
2023-05-24 14:33:15 +01:00
|
|
|
r *qLogReader,
|
|
|
|
params *searchParams,
|
|
|
|
cache clientCache,
|
|
|
|
totalLimit int,
|
|
|
|
) (entries []*logEntry, oldestNano int64, total int) {
|
2020-05-26 13:37:37 +01:00
|
|
|
for total < params.maxFileScanEntries || params.maxFileScanEntries <= 0 {
|
2024-11-15 12:57:07 +00:00
|
|
|
ent, ts, rErr := l.readNextEntry(ctx, r, params, cache)
|
2023-05-24 14:33:15 +01:00
|
|
|
if rErr != nil {
|
|
|
|
if rErr == io.EOF {
|
2022-05-25 16:00:50 +01:00
|
|
|
oldestNano = 0
|
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2024-11-15 12:57:07 +00:00
|
|
|
l.logger.ErrorContext(ctx, "reading next entry", slogutil.KeyError, rErr)
|
2020-02-20 22:07:30 +00:00
|
|
|
}
|
|
|
|
|
2020-02-27 11:02:05 +00:00
|
|
|
oldestNano = ts
|
|
|
|
total++
|
|
|
|
|
2023-05-24 14:33:15 +01:00
|
|
|
if ent == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
entries = append(entries, ent)
|
|
|
|
if len(entries) == totalLimit {
|
|
|
|
break
|
2020-02-20 22:07:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-24 14:33:15 +01:00
|
|
|
return entries, oldestNano, total
|
|
|
|
}
|
|
|
|
|
|
|
|
// searchFiles looks up log records from all log files. It optionally uses the
|
|
|
|
// client cache, if provided. searchFiles does not scan more than
|
|
|
|
// maxFileScanEntries so callers may need to call it several times to get all
|
|
|
|
// the results. oldest and total are the time of the oldest processed entry
|
|
|
|
// and the total number of processed entries, including discarded ones,
|
|
|
|
// correspondingly.
|
|
|
|
func (l *queryLog) searchFiles(
|
2024-11-15 12:57:07 +00:00
|
|
|
ctx context.Context,
|
2023-05-24 14:33:15 +01:00
|
|
|
params *searchParams,
|
|
|
|
cache clientCache,
|
|
|
|
) (entries []*logEntry, oldest time.Time, total int) {
|
2024-11-15 12:57:07 +00:00
|
|
|
r, err := l.setQLogReader(ctx, params.olderThan)
|
2023-05-24 14:33:15 +01:00
|
|
|
if err != nil {
|
2024-11-15 12:57:07 +00:00
|
|
|
l.logger.ErrorContext(ctx, "searching files", slogutil.KeyError, err)
|
2023-05-24 14:33:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if r == nil {
|
|
|
|
return entries, oldest, 0
|
|
|
|
}
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
if closeErr := r.Close(); closeErr != nil {
|
2024-11-15 12:57:07 +00:00
|
|
|
l.logger.ErrorContext(ctx, "closing files", slogutil.KeyError, closeErr)
|
2023-05-24 14:33:15 +01:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
totalLimit := params.offset + params.limit
|
2024-11-15 12:57:07 +00:00
|
|
|
entries, oldestNano, total := l.readEntries(ctx, r, params, cache, totalLimit)
|
2020-06-16 10:10:17 +01:00
|
|
|
if oldestNano != 0 {
|
|
|
|
oldest = time.Unix(0, oldestNano)
|
|
|
|
}
|
2021-04-02 15:30:39 +01:00
|
|
|
|
2020-02-20 22:07:30 +00:00
|
|
|
return entries, oldest, total
|
|
|
|
}
|
|
|
|
|
2021-04-12 16:22:11 +01:00
|
|
|
// quickMatchClientFinder is a wrapper around the usual client finding function
|
|
|
|
// to make it easier to use with quick matches.
|
|
|
|
type quickMatchClientFinder struct {
|
|
|
|
client func(clientID, ip string, cache clientCache) (c *Client, err error)
|
|
|
|
cache clientCache
|
|
|
|
}
|
|
|
|
|
|
|
|
// findClient is a method that can be used as a quickMatchClientFinder.
|
2024-11-15 12:57:07 +00:00
|
|
|
func (f quickMatchClientFinder) findClient(
|
|
|
|
ctx context.Context,
|
|
|
|
logger *slog.Logger,
|
|
|
|
clientID string,
|
|
|
|
ip string,
|
|
|
|
) (c *Client) {
|
2021-04-12 16:22:11 +01:00
|
|
|
var err error
|
|
|
|
c, err = f.client(clientID, ip, f.cache)
|
|
|
|
if err != nil {
|
2024-11-15 12:57:07 +00:00
|
|
|
logger.ErrorContext(
|
|
|
|
ctx,
|
|
|
|
"enriching file record for quick search",
|
|
|
|
"client_ip", ip,
|
|
|
|
"client_id", clientID,
|
|
|
|
slogutil.KeyError, err,
|
2021-04-12 16:22:11 +01:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
// readNextEntry reads the next log entry and checks if it matches the search
|
2023-05-24 14:33:15 +01:00
|
|
|
// criteria. It optionally uses the client cache, if provided. e is nil if
|
|
|
|
// the entry doesn't match the search criteria. ts is the timestamp of the
|
2021-04-02 15:30:39 +01:00
|
|
|
// processed entry.
|
|
|
|
func (l *queryLog) readNextEntry(
|
2024-11-15 12:57:07 +00:00
|
|
|
ctx context.Context,
|
2023-05-24 14:33:15 +01:00
|
|
|
r *qLogReader,
|
2021-04-02 15:30:39 +01:00
|
|
|
params *searchParams,
|
|
|
|
cache clientCache,
|
|
|
|
) (e *logEntry, ts int64, err error) {
|
|
|
|
var line string
|
|
|
|
line, err = r.ReadNext()
|
2020-02-20 22:07:30 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, 0, err
|
|
|
|
}
|
|
|
|
|
2021-04-12 16:22:11 +01:00
|
|
|
clientFinder := quickMatchClientFinder{
|
|
|
|
client: l.client,
|
|
|
|
cache: cache,
|
|
|
|
}
|
|
|
|
|
2024-11-15 12:57:07 +00:00
|
|
|
if !params.quickMatch(ctx, l.logger, line, clientFinder.findClient) {
|
|
|
|
ts = readQLogTimestamp(ctx, l.logger, line)
|
2021-04-12 16:22:11 +01:00
|
|
|
|
|
|
|
return nil, ts, nil
|
|
|
|
}
|
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
e = &logEntry{}
|
2024-11-15 12:57:07 +00:00
|
|
|
l.decodeLogEntry(ctx, e, line)
|
2020-02-20 22:07:30 +00:00
|
|
|
|
2023-02-07 14:50:39 +00:00
|
|
|
if l.isIgnored(e.QHost) {
|
|
|
|
return nil, ts, nil
|
|
|
|
}
|
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
e.client, err = l.client(e.ClientID, e.IP.String(), cache)
|
|
|
|
if err != nil {
|
2024-11-15 12:57:07 +00:00
|
|
|
l.logger.ErrorContext(
|
|
|
|
ctx,
|
2024-11-18 13:39:35 +00:00
|
|
|
"enriching file record",
|
2024-11-18 11:03:42 +00:00
|
|
|
"at", e.Time,
|
2024-11-15 12:57:07 +00:00
|
|
|
"client_ip", e.IP,
|
|
|
|
"client_id", e.ClientID,
|
|
|
|
slogutil.KeyError, err,
|
2021-04-02 15:30:39 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// Go on and try to match anyway.
|
2020-02-20 22:07:30 +00:00
|
|
|
}
|
|
|
|
|
2023-04-14 13:25:04 +01:00
|
|
|
if e.client != nil && e.client.IgnoreQueryLog {
|
|
|
|
return nil, ts, nil
|
|
|
|
}
|
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
ts = e.Time.UnixNano()
|
|
|
|
if !params.match(e) {
|
|
|
|
return nil, ts, nil
|
2020-02-20 22:07:30 +00:00
|
|
|
}
|
|
|
|
|
2021-04-02 15:30:39 +01:00
|
|
|
return e, ts, nil
|
2020-02-20 22:07:30 +00:00
|
|
|
}
|