diff --git a/AGHTechDoc.md b/AGHTechDoc.md index 60fc986f..c6e70391 100644 --- a/AGHTechDoc.md +++ b/AGHTechDoc.md @@ -43,6 +43,7 @@ Contents: * API: Set statistics parameters * API: Get statistics parameters * Query logs + * API: Get query log * API: Set querylog parameters * API: Get querylog parameters * Filtering @@ -1007,6 +1008,92 @@ Response: ## Query logs +When a new DNS request is received and processed, we store information about this event in "query log". It is a file on disk in JSON format: + + { + "Question":"..."," + Answer":"...", + "Result":{ + "IsFiltered":true, + "Reason":3, + "Rule":"...", + "FilterID":1 + }, + "Time":"...", + "Elapsed":12345, + "IP":"127.0.0.1" + } + + +### Adding new data + +First, new data is stored in a memory region. When this array is filled to a particular amount of entries (e.g. 5000), we flush this data to a file and clear the array. + + +### Getting data + +When UI asks for data from query log (see "API: Get query log"), server reads the newest entries from memory array and the file. The maximum number of items returned per one request is limited by configuration. + + +### Removing old data + +We store data for a limited amount of time - the log file is automatically rotated. + + +### API: Get query log + +Request: + + POST /control/querylog + + { + older_than: "2006-01-02T15:04:05.999999999Z07:00" // must be "" for the first request + + filter:{ + domain: "..." + client: "..." + question_type: "A" | "AAAA" + response_status: "" | "filtered" + } + } + +If `older_than` value is set, server returns the next chunk of entries that are older than this time stamp. This setting is used for paging. UI sets this value to `""` on the first request and gets the latest log entries. To get the older entries, UI sets this value to the timestamp of the last (the oldest) entry from the previous response from Server. + +If "filter" settings are set, server returns only entries that match the specified request. + +For `filter.domain` and `filter.client` the server matches substrings by default: `adguard.com` matches `www.adguard.com`. Strict matching can be enabled by enclosing the value in double quotes: `"adguard.com"` matches `adguard.com` but doesn't match `www.adguard.com`. + +Response: + + [ + { + "answer":[ + { + "ttl":10, + "type":"AAAA", + "value":"::" + } + ... + ], + "client":"127.0.0.1", + "elapsedMs":"0.098403", + "filterId":1, + "question":{ + "class":"IN", + "host":"doubleclick.net", + "type":"AAAA" + }, + "reason":"FilteredBlackList", + "rule":"||doubleclick.net^", + "status":"NOERROR", + "time":"2006-01-02T15:04:05.999999999Z07:00" + } + ... + ] + +The most recent entries are at the top of list. + + ### API: Set querylog parameters Request: diff --git a/home/control_querylog.go b/home/control_querylog.go index 43ac3869..737b9f12 100644 --- a/home/control_querylog.go +++ b/home/control_querylog.go @@ -3,12 +3,81 @@ package home import ( "encoding/json" "net/http" + "time" "github.com/AdguardTeam/AdGuardHome/querylog" + "github.com/miekg/dns" ) +type qlogFilterJSON struct { + Domain string `json:"domain"` + Client string `json:"client"` + QuestionType string `json:"question_type"` + ResponseStatus string `json:"response_status"` +} + +type queryLogRequest struct { + OlderThan string `json:"older_than"` + Filter qlogFilterJSON `json:"filter"` +} + +// "value" -> value, return TRUE +func getDoubleQuotesEnclosedValue(s *string) bool { + t := *s + if len(t) >= 2 && t[0] == '"' && t[len(t)-1] == '"' { + *s = t[1 : len(t)-1] + return true + } + return false +} + func handleQueryLog(w http.ResponseWriter, r *http.Request) { - data := config.queryLog.GetData() + req := queryLogRequest{} + err := json.NewDecoder(r.Body).Decode(&req) + if err != nil { + httpError(w, http.StatusBadRequest, "json decode: %s", err) + return + } + + params := querylog.GetDataParams{ + Domain: req.Filter.Domain, + Client: req.Filter.Client, + } + if len(req.OlderThan) != 0 { + params.OlderThan, err = time.Parse(time.RFC3339Nano, req.OlderThan) + if err != nil { + httpError(w, http.StatusBadRequest, "invalid time stamp: %s", err) + return + } + } + + if getDoubleQuotesEnclosedValue(¶ms.Domain) { + params.StrictMatchDomain = true + } + if getDoubleQuotesEnclosedValue(¶ms.Client) { + params.StrictMatchClient = true + } + + if len(req.Filter.QuestionType) != 0 { + qtype, ok := dns.StringToType[req.Filter.QuestionType] + if !ok { + httpError(w, http.StatusBadRequest, "invalid question_type") + return + } + params.QuestionType = qtype + } + + if len(req.Filter.ResponseStatus) != 0 { + switch req.Filter.ResponseStatus { + case "filtered": + params.ResponseStatus = querylog.ResponseStatusFiltered + default: + httpError(w, http.StatusBadRequest, "invalid response_status") + return + } + } + + data := config.queryLog.GetData(params) jsonVal, err := json.Marshal(data) if err != nil { @@ -84,7 +153,7 @@ func checkQueryLogInterval(i uint32) bool { // RegisterQueryLogHandlers - register handlers func RegisterQueryLogHandlers() { - httpRegister(http.MethodGet, "/control/querylog", handleQueryLog) + httpRegister("POST", "/control/querylog", handleQueryLog) httpRegister(http.MethodGet, "/control/querylog_info", handleQueryLogInfo) httpRegister(http.MethodPost, "/control/querylog_clear", handleQueryLogClear) httpRegister(http.MethodPost, "/control/querylog_config", handleQueryLogConfig) diff --git a/querylog/qlog.go b/querylog/qlog.go index 690f4e21..b29c8dec 100644 --- a/querylog/qlog.go +++ b/querylog/qlog.go @@ -18,7 +18,10 @@ import ( const ( logBufferCap = 5000 // maximum capacity of logBuffer before it's flushed to disk queryLogFileName = "querylog.json" // .gz added during compression - queryLogSize = 5000 // maximum API response for /querylog + getDataLimit = 500 // GetData(): maximum log entries to return + + // maximum data chunks to parse when filtering entries + maxFilteringChunks = 10 ) // queryLog is a structure that writes and reads the DNS query log @@ -30,9 +33,6 @@ type queryLog struct { logBuffer []*logEntry fileFlushLock sync.Mutex // synchronize a file-flushing goroutine and main thread flushPending bool // don't start another goroutine while the previous one is still running - - cache []*logEntry - lock sync.RWMutex } // create a new instance of the query log @@ -41,7 +41,6 @@ func newQueryLog(conf Config) *queryLog { l.logFile = filepath.Join(conf.BaseDir, queryLogFileName) l.conf = conf go l.periodicQueryLogRotate() - go l.fillFromFile() return &l } @@ -62,10 +61,6 @@ func (l *queryLog) Clear() { l.flushPending = false l.logBufferLock.Unlock() - l.lock.Lock() - l.cache = nil - l.lock.Unlock() - err := os.Remove(l.logFile + ".1") if err != nil { log.Error("file remove: %s: %s", l.logFile+".1", err) @@ -147,13 +142,6 @@ func (l *queryLog) Add(question *dns.Msg, answer *dns.Msg, result *dnsfilter.Res } } l.logBufferLock.Unlock() - l.lock.Lock() - l.cache = append(l.cache, &entry) - if len(l.cache) > queryLogSize { - toremove := len(l.cache) - queryLogSize - l.cache = l.cache[toremove:] - } - l.lock.Unlock() // if buffer needs to be flushed to disk, do it now if needFlush { @@ -163,20 +151,143 @@ func (l *queryLog) Add(question *dns.Msg, answer *dns.Msg, result *dnsfilter.Res } } -func (l *queryLog) GetData() []map[string]interface{} { - l.lock.RLock() - values := make([]*logEntry, len(l.cache)) - copy(values, l.cache) - l.lock.RUnlock() - - // reverse it so that newest is first - for left, right := 0, len(values)-1; left < right; left, right = left+1, right-1 { - values[left], values[right] = values[right], values[left] +// Return TRUE if this entry is needed +func isNeeded(entry *logEntry, params GetDataParams) bool { + if params.ResponseStatus != 0 { + if params.ResponseStatus == ResponseStatusFiltered && !entry.Result.IsFiltered { + return false + } } - // iterate + if len(params.Domain) != 0 || params.QuestionType != 0 { + m := dns.Msg{} + _ = m.Unpack(entry.Question) + + if params.QuestionType != 0 { + if m.Question[0].Qtype != params.QuestionType { + return false + } + } + + if len(params.Domain) != 0 && params.StrictMatchDomain { + if m.Question[0].Name != params.Domain { + return false + } + } else if len(params.Domain) != 0 { + if strings.Index(m.Question[0].Name, params.Domain) == -1 { + return false + } + } + } + + if len(params.Client) != 0 && params.StrictMatchClient { + if entry.IP != params.Client { + return false + } + } else if len(params.Client) != 0 { + if strings.Index(entry.IP, params.Client) == -1 { + return false + } + } + + return true +} + +func (l *queryLog) readFromFile(params GetDataParams) ([]*logEntry, int) { + entries := []*logEntry{} + olderThan := params.OlderThan + totalChunks := 0 + total := 0 + + r := l.OpenReader() + if r == nil { + return entries, 0 + } + r.BeginRead(olderThan, getDataLimit) + for totalChunks < maxFilteringChunks { + first := true + newEntries := []*logEntry{} + for { + entry := r.Next() + if entry == nil { + break + } + total++ + + if first { + first = false + olderThan = entry.Time + } + + if !isNeeded(entry, params) { + continue + } + if len(newEntries) == getDataLimit { + newEntries = newEntries[1:] + } + newEntries = append(newEntries, entry) + } + + log.Debug("entries: +%d (%d) older-than:%s", len(newEntries), len(entries), olderThan) + + entries = append(newEntries, entries...) + if len(entries) > getDataLimit { + toremove := len(entries) - getDataLimit + entries = entries[toremove:] + break + } + if first || len(entries) == getDataLimit { + break + } + totalChunks++ + r.BeginReadPrev(olderThan, getDataLimit) + } + + r.Close() + return entries, total +} + +func (l *queryLog) GetData(params GetDataParams) []map[string]interface{} { var data = []map[string]interface{}{} - for _, entry := range values { + + if len(params.Domain) != 0 && params.StrictMatchDomain { + params.Domain = params.Domain + "." + } + + now := time.Now() + entries := []*logEntry{} + total := 0 + + // add from file + entries, total = l.readFromFile(params) + + if params.OlderThan.IsZero() { + params.OlderThan = now + } + + // add from memory buffer + l.logBufferLock.Lock() + total += len(l.logBuffer) + for _, entry := range l.logBuffer { + + if !isNeeded(entry, params) { + continue + } + + if entry.Time.UnixNano() >= params.OlderThan.UnixNano() { + break + } + + if len(entries) == getDataLimit { + entries = entries[1:] + } + entries = append(entries, entry) + } + l.logBufferLock.Unlock() + + // process the elements from latest to oldest + for i := len(entries) - 1; i >= 0; i-- { + entry := entries[i] var q *dns.Msg var a *dns.Msg @@ -200,7 +311,7 @@ func (l *queryLog) GetData() []map[string]interface{} { jsonEntry := map[string]interface{}{ "reason": entry.Result.Reason.String(), "elapsedMs": strconv.FormatFloat(entry.Elapsed.Seconds()*1000, 'f', -1, 64), - "time": entry.Time.Format(time.RFC3339), + "time": entry.Time.Format(time.RFC3339Nano), "client": entry.IP, } if q != nil { @@ -231,6 +342,8 @@ func (l *queryLog) GetData() []map[string]interface{} { data = append(data, jsonEntry) } + log.Debug("QueryLog: prepared data (%d/%d) older than %s in %s", + len(entries), total, params.OlderThan, time.Since(now)) return data } diff --git a/querylog/querylog.go b/querylog/querylog.go index 3a18eb53..308830a9 100644 --- a/querylog/querylog.go +++ b/querylog/querylog.go @@ -21,7 +21,7 @@ type QueryLog interface { Add(question *dns.Msg, answer *dns.Msg, result *dnsfilter.Result, elapsed time.Duration, addr net.Addr, upstream string) // Get log entries - GetData() []map[string]interface{} + GetData(params GetDataParams) []map[string]interface{} // Clear memory buffer and remove log files Clear() @@ -37,3 +37,23 @@ type Config struct { func New(conf Config) QueryLog { return newQueryLog(conf) } + +// GetDataParams - parameters for GetData() +type GetDataParams struct { + OlderThan time.Time // return entries that are older than this value + Domain string // filter by domain name in question + Client string // filter by client IP + QuestionType uint16 // filter by question type + ResponseStatus ResponseStatusType // filter by response status + StrictMatchDomain bool // if Domain value must be matched strictly + StrictMatchClient bool // if Client value must be matched strictly +} + +// ResponseStatusType - response status +type ResponseStatusType int32 + +// Response status constants +const ( + ResponseStatusAll ResponseStatusType = iota + 1 + ResponseStatusFiltered +) diff --git a/querylog/querylog_file.go b/querylog/querylog_file.go index 6f6f887a..4f63a247 100644 --- a/querylog/querylog_file.go +++ b/querylog/querylog_file.go @@ -5,13 +5,13 @@ import ( "compress/gzip" "encoding/json" "fmt" + "io" "os" "sync" "time" "github.com/AdguardTeam/golibs/log" "github.com/go-test/deep" - "github.com/miekg/dns" ) var ( @@ -19,6 +19,7 @@ var ( ) const enableGzip = false +const maxEntrySize = 1000 // flushLogBuffer flushes the current buffer to file and resets the current buffer func (l *queryLog) flushLogBuffer(fullFlush bool) error { @@ -182,50 +183,232 @@ func (l *queryLog) periodicQueryLogRotate() { // Reader is the DB reader context type Reader struct { - f *os.File - jd *json.Decoder - now time.Time - ql *queryLog + ql *queryLog + + f *os.File + jd *json.Decoder + now time.Time + validFrom int64 // UNIX time (ns) + olderThan int64 // UNIX time (ns) files []string ifile int - count uint64 // returned elements counter + limit uint64 + count uint64 // counter for returned elements + latest bool // return the latest entries + filePrepared bool + + searching bool // we're seaching for an entry with exact time stamp + fseeker fileSeeker // file seeker object + fpos uint64 // current file offset + nSeekRequests uint32 // number of Seek() requests made (finding a new line doesn't count) } -// OpenReader locks the file and returns reader object or nil on error +type fileSeeker struct { + target uint64 // target value + + pos uint64 // current offset, may be adjusted by user for increased accuracy + lastpos uint64 // the last offset returned + lo uint64 // low boundary offset + hi uint64 // high boundary offset +} + +// OpenReader - return reader object func (l *queryLog) OpenReader() *Reader { r := Reader{} r.ql = l r.now = time.Now() - + r.validFrom = r.now.Unix() - int64(l.conf.Interval*60*60) + r.validFrom *= 1000000000 + r.files = []string{ + r.ql.logFile, + r.ql.logFile + ".1", + } return &r } -// Close closes the reader +// Close - close the reader func (r *Reader) Close() { elapsed := time.Since(r.now) var perunit time.Duration if r.count > 0 { perunit = elapsed / time.Duration(r.count) } - log.Debug("querylog: read %d entries in %v, %v/entry", - r.count, elapsed, perunit) + log.Debug("querylog: read %d entries in %v, %v/entry, seek-reqs:%d", + r.count, elapsed, perunit, r.nSeekRequests) if r.f != nil { r.f.Close() } } -// BeginRead starts reading -func (r *Reader) BeginRead() { - r.files = []string{ - r.ql.logFile, - r.ql.logFile + ".1", +// BeginRead - start reading +// olderThan: stop returning entries when an entry with this time is reached +// count: minimum number of entries to return +func (r *Reader) BeginRead(olderThan time.Time, count uint64) { + r.olderThan = olderThan.UnixNano() + r.latest = olderThan.IsZero() + r.limit = count + if r.latest { + r.olderThan = r.now.UnixNano() } + r.filePrepared = false + r.searching = false + r.jd = nil } -// Next returns the next entry or nil if reading is finished +// BeginReadPrev - start reading the previous data chunk +func (r *Reader) BeginReadPrev(olderThan time.Time, count uint64) { + r.olderThan = olderThan.UnixNano() + r.latest = olderThan.IsZero() + r.limit = count + if r.latest { + r.olderThan = r.now.UnixNano() + } + + off := r.fpos - maxEntrySize*(r.limit+1) + if int64(off) < maxEntrySize { + off = 0 + } + r.fpos = uint64(off) + log.Debug("QueryLog: seek: %x", off) + _, err := r.f.Seek(int64(off), io.SeekStart) + if err != nil { + log.Error("file.Seek: %s: %s", r.files[r.ifile], err) + return + } + r.nSeekRequests++ + + r.seekToNewLine() + r.fseeker.pos = r.fpos + + r.filePrepared = true + r.searching = false + r.jd = nil +} + +// Perform binary seek +// Return 0: success; 1: seek reqiured; -1: error +func (fs *fileSeeker) seekBinary(cur uint64) int32 { + log.Debug("QueryLog: seek: tgt=%x cur=%x, %x: [%x..%x]", fs.target, cur, fs.pos, fs.lo, fs.hi) + + off := uint64(0) + if fs.pos >= fs.lo && fs.pos < fs.hi { + if cur == fs.target { + return 0 + } else if cur < fs.target { + fs.lo = fs.pos + 1 + } else { + fs.hi = fs.pos + } + off = fs.lo + (fs.hi-fs.lo)/2 + } else { + // we didn't find another entry from the last file offset: now return the boundary beginning + off = fs.lo + } + + if off == fs.lastpos { + return -1 + } + + fs.lastpos = off + fs.pos = off + return 1 +} + +// Seek to a new line +func (r *Reader) seekToNewLine() bool { + b := make([]byte, maxEntrySize*2) + + _, err := r.f.Read(b) + if err != nil { + log.Error("QueryLog: file.Read: %s: %s", r.files[r.ifile], err) + return false + } + + off := bytes.IndexByte(b, '\n') + 1 + if off == 0 { + log.Error("QueryLog: Can't find a new line: %s", r.files[r.ifile]) + return false + } + + r.fpos += uint64(off) + log.Debug("QueryLog: seek: %x (+%d)", r.fpos, off) + _, err = r.f.Seek(int64(r.fpos), io.SeekStart) + if err != nil { + log.Error("QueryLog: file.Seek: %s: %s", r.files[r.ifile], err) + return false + } + return true +} + +// Open a file +func (r *Reader) openFile() bool { + var err error + fn := r.files[r.ifile] + + r.f, err = os.Open(fn) + if err != nil { + if !os.IsNotExist(err) { + log.Error("QueryLog: Failed to open file \"%s\": %s", fn, err) + } + return false + } + return true +} + +// Seek to the needed position +func (r *Reader) prepareRead() bool { + fn := r.files[r.ifile] + + fi, err := r.f.Stat() + if err != nil { + log.Error("QueryLog: file.Stat: %s: %s", fn, err) + return false + } + fsize := uint64(fi.Size()) + + off := uint64(0) + if r.latest { + // read data from the end of file + off = fsize - maxEntrySize*(r.limit+1) + if int64(off) < maxEntrySize { + off = 0 + } + r.fpos = uint64(off) + log.Debug("QueryLog: seek: %x", off) + _, err = r.f.Seek(int64(off), io.SeekStart) + if err != nil { + log.Error("QueryLog: file.Seek: %s: %s", fn, err) + return false + } + } else { + // start searching in file: we'll read the first chunk of data from the middle of file + r.searching = true + r.fseeker = fileSeeker{} + r.fseeker.target = uint64(r.olderThan) + r.fseeker.hi = fsize + rc := r.fseeker.seekBinary(0) + r.fpos = r.fseeker.pos + if rc == 1 { + _, err = r.f.Seek(int64(r.fpos), io.SeekStart) + if err != nil { + log.Error("QueryLog: file.Seek: %s: %s", fn, err) + return false + } + } + } + r.nSeekRequests++ + + if !r.seekToNewLine() { + return false + } + r.fseeker.pos = r.fpos + return true +} + +// Next - return the next entry or nil if reading is finished func (r *Reader) Next() *logEntry { // nolint var err error for { @@ -234,15 +417,19 @@ func (r *Reader) Next() *logEntry { // nolint if r.ifile == len(r.files) { return nil } - fn := r.files[r.ifile] - r.f, err = os.Open(fn) - if err != nil { - log.Error("Failed to open file \"%s\": %s", fn, err) + if !r.openFile() { r.ifile++ continue } } + if !r.filePrepared { + if !r.prepareRead() { + return nil + } + r.filePrepared = true + } + // open decoder if needed if r.jd == nil { r.jd = json.NewDecoder(r.f) @@ -251,20 +438,60 @@ func (r *Reader) Next() *logEntry { // nolint // check if there's data if !r.jd.More() { r.jd = nil - r.f.Close() - r.f = nil - r.ifile++ - continue + return nil } // read data var entry logEntry err = r.jd.Decode(&entry) if err != nil { - log.Error("Failed to decode: %s", err) - // next entry can be fine, try more + log.Error("QueryLog: Failed to decode: %s", err) + r.jd = nil + return nil + } + + t := entry.Time.UnixNano() + if r.searching { + r.jd = nil + + rr := r.fseeker.seekBinary(uint64(t)) + r.fpos = r.fseeker.pos + if rr < 0 { + log.Error("QueryLog: File seek error: can't find the target entry: %s", r.files[r.ifile]) + return nil + } else if rr == 0 { + // We found the target entry. + // We'll start reading the previous chunk of data. + r.searching = false + + off := r.fpos - (maxEntrySize * (r.limit + 1)) + if int64(off) < maxEntrySize { + off = 0 + } + r.fpos = off + } + + _, err = r.f.Seek(int64(r.fpos), io.SeekStart) + if err != nil { + log.Error("QueryLog: file.Seek: %s: %s", r.files[r.ifile], err) + return nil + } + r.nSeekRequests++ + + if !r.seekToNewLine() { + return nil + } + r.fseeker.pos = r.fpos continue } + + if t < r.validFrom { + continue + } + if t >= r.olderThan { + return nil + } + r.count++ return &entry } @@ -274,57 +501,3 @@ func (r *Reader) Next() *logEntry { // nolint func (r *Reader) Total() int { return 0 } - -// Fill cache from file -func (l *queryLog) fillFromFile() { - now := time.Now() - validFrom := now.Unix() - int64(l.conf.Interval*60*60) - r := l.OpenReader() - if r == nil { - return - } - - r.BeginRead() - - for { - entry := r.Next() - if entry == nil { - break - } - - if entry.Time.Unix() < validFrom { - continue - } - - if len(entry.Question) == 0 { - log.Printf("entry question is absent, skipping") - continue - } - - if entry.Time.After(now) { - log.Printf("t %v vs %v is in the future, ignoring", entry.Time, now) - continue - } - - q := new(dns.Msg) - if err := q.Unpack(entry.Question); err != nil { - log.Printf("failed to unpack dns message question: %s", err) - continue - } - - if len(q.Question) != 1 { - log.Printf("malformed dns message, has no questions, skipping") - continue - } - - l.lock.Lock() - l.cache = append(l.cache, entry) - if len(l.cache) > queryLogSize { - toremove := len(l.cache) - queryLogSize - l.cache = l.cache[toremove:] - } - l.lock.Unlock() - } - - r.Close() -} diff --git a/querylog/querylog_test.go b/querylog/querylog_test.go index 8da84183..d533fe4c 100644 --- a/querylog/querylog_test.go +++ b/querylog/querylog_test.go @@ -3,6 +3,7 @@ package querylog import ( "net" "testing" + "time" "github.com/AdguardTeam/AdGuardHome/dnsfilter" "github.com/miekg/dns" @@ -36,7 +37,10 @@ func TestQueryLog(t *testing.T) { res := dnsfilter.Result{} l.Add(&q, &a, &res, 0, nil, "upstream") - d := l.GetData() + params := GetDataParams{ + OlderThan: time.Now(), + } + d := l.GetData(params) m := d[0] mq := m["question"].(map[string]interface{}) assert.True(t, mq["host"].(string) == "example.org")