diff --git a/CHANGELOG.md b/CHANGELOG.md index bcd1cfc7..851caebd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,11 +23,18 @@ See also the [v0.107.35 GitHub milestone][ms-v0.107.35]. NOTE: Add new changes BELOW THIS COMMENT. --> +### Fixed + +- `bufio.Scanner: token too long` errors when trying to add filtering-rule lists + with lines over 1024 bytes long ([#6003]). + ### Removed - Default exposure of the non-standard ports 784 and 8853 for DNS-over-QUIC in the `Dockerfile`. +[#6003]: https://github.com/AdguardTeam/AdGuardHome/issues/6003 + diff --git a/internal/filtering/filtering.go b/internal/filtering/filtering.go index f80b2220..b6249cee 100644 --- a/internal/filtering/filtering.go +++ b/internal/filtering/filtering.go @@ -943,7 +943,7 @@ func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) { d = &DNSFilter{ bufPool: &sync.Pool{ New: func() (buf any) { - bufVal := make([]byte, rulelist.MaxRuleLen) + bufVal := make([]byte, rulelist.DefaultRuleBufSize) return &bufVal }, diff --git a/internal/filtering/rulelist/parser.go b/internal/filtering/rulelist/parser.go index 0bf5dba8..e98567d8 100644 --- a/internal/filtering/rulelist/parser.go +++ b/internal/filtering/rulelist/parser.go @@ -7,6 +7,7 @@ import ( "hash/crc32" "io" "unicode" + "unicode/utf8" "github.com/AdguardTeam/golibs/errors" ) @@ -48,19 +49,29 @@ type ParseResult struct { // nil. func (p *Parser) Parse(dst io.Writer, src io.Reader, buf []byte) (r *ParseResult, err error) { s := bufio.NewScanner(src) - s.Buffer(buf, MaxRuleLen) - lineIdx := 0 + // Don't use [DefaultRuleBufSize] as the maximum size, since some + // filtering-rule lists compressed by e.g. HostlistsCompiler can have very + // large lines. The buffer optimization still works for the more common + // case of reasonably-sized lines. + // + // See https://github.com/AdguardTeam/AdGuardHome/issues/6003. + s.Buffer(buf, bufio.MaxScanTokenSize) + + // Use a one-based index for lines and columns, since these errors end up in + // the frontend, and users are more familiar with one-based line and column + // indexes. + lineNum := 1 for s.Scan() { var n int - n, err = p.processLine(dst, s.Bytes(), lineIdx) + n, err = p.processLine(dst, s.Bytes(), lineNum) p.written += n if err != nil { // Don't wrap the error, because it's informative enough as is. return p.result(), err } - lineIdx++ + lineNum++ } r = p.result() @@ -81,7 +92,7 @@ func (p *Parser) result() (r *ParseResult) { // processLine processes a single line. It may write to dst, and if it does, n // is the number of bytes written. -func (p *Parser) processLine(dst io.Writer, line []byte, lineIdx int) (n int, err error) { +func (p *Parser) processLine(dst io.Writer, line []byte, lineNum int) (n int, err error) { trimmed := bytes.TrimSpace(line) if p.written == 0 && isHTMLLine(trimmed) { return 0, ErrHTML @@ -94,10 +105,13 @@ func (p *Parser) processLine(dst io.Writer, line []byte, lineIdx int) (n int, er badIdx, isRule = p.parseLineTitle(trimmed) } if badIdx != -1 { + badRune, _ := utf8.DecodeRune(trimmed[badIdx:]) + return 0, fmt.Errorf( - "line at index %d: character at index %d: non-printable character", - lineIdx, - badIdx+bytes.Index(line, trimmed), + "line %d: character %d: non-printable character %q", + lineNum, + badIdx+bytes.Index(line, trimmed)+1, + badRune, ) } diff --git a/internal/filtering/rulelist/parser_test.go b/internal/filtering/rulelist/parser_test.go index 5e912988..c04d67ca 100644 --- a/internal/filtering/rulelist/parser_test.go +++ b/internal/filtering/rulelist/parser_test.go @@ -17,6 +17,9 @@ import ( func TestParser_Parse(t *testing.T) { t.Parallel() + longRule := strings.Repeat("a", rulelist.DefaultRuleBufSize+1) + "\n" + tooLongRule := strings.Repeat("a", bufio.MaxScanTokenSize+1) + "\n" + testCases := []struct { name string in string @@ -80,20 +83,28 @@ func TestParser_Parse(t *testing.T) { testRuleTextBlocked + ">>>\x7F<<<", wantDst: testRuleTextBlocked, - wantErrMsg: "line at index 2: " + - "character at index 3: " + - "non-printable character", + wantErrMsg: "line 3: " + + "character 4: " + + "non-printable character '\\x7f'", wantTitle: "Test Title", wantRulesNum: 1, wantWritten: len(testRuleTextBlocked), }, { name: "too_long", - in: strings.Repeat("a", rulelist.MaxRuleLen+1), + in: tooLongRule, wantDst: "", - wantErrMsg: "scanning filter contents: " + bufio.ErrTooLong.Error(), + wantErrMsg: "scanning filter contents: bufio.Scanner: token too long", wantTitle: "", wantRulesNum: 0, wantWritten: 0, + }, { + name: "longer_than_default", + in: longRule, + wantDst: longRule, + wantErrMsg: "", + wantTitle: "", + wantRulesNum: 1, + wantWritten: len(longRule), }, { name: "bad_tab_and_comment", in: testRuleTextBadTab, @@ -118,7 +129,7 @@ func TestParser_Parse(t *testing.T) { t.Parallel() dst := &bytes.Buffer{} - buf := make([]byte, rulelist.MaxRuleLen) + buf := make([]byte, rulelist.DefaultRuleBufSize) p := rulelist.NewParser() r, err := p.Parse(dst, strings.NewReader(tc.in), buf) @@ -145,7 +156,7 @@ func TestParser_Parse_writeError(t *testing.T) { return 1, errors.Error("test error") }, } - buf := make([]byte, rulelist.MaxRuleLen) + buf := make([]byte, rulelist.DefaultRuleBufSize) p := rulelist.NewParser() r, err := p.Parse(dst, strings.NewReader(testRuleTextBlocked), buf) @@ -165,7 +176,7 @@ func TestParser_Parse_checksums(t *testing.T) { "# Another comment.\n" ) - buf := make([]byte, rulelist.MaxRuleLen) + buf := make([]byte, rulelist.DefaultRuleBufSize) p := rulelist.NewParser() r, err := p.Parse(&bytes.Buffer{}, strings.NewReader(withoutComments), buf) @@ -192,7 +203,7 @@ var ( func BenchmarkParser_Parse(b *testing.B) { dst := &bytes.Buffer{} src := strings.NewReader(strings.Repeat(testRuleTextBlocked, 1000)) - buf := make([]byte, rulelist.MaxRuleLen) + buf := make([]byte, rulelist.DefaultRuleBufSize) p := rulelist.NewParser() b.ReportAllocs() diff --git a/internal/filtering/rulelist/rulelist.go b/internal/filtering/rulelist/rulelist.go index 1a6236c5..464650a1 100644 --- a/internal/filtering/rulelist/rulelist.go +++ b/internal/filtering/rulelist/rulelist.go @@ -4,8 +4,6 @@ // TODO(a.garipov): Expand. package rulelist -// MaxRuleLen is the maximum length of a line with a filtering rule, in bytes. -// -// TODO(a.garipov): Consider changing this to a rune length, like AdGuardDNS -// does. -const MaxRuleLen = 1024 +// DefaultRuleBufSize is the default length of a buffer used to read a line with +// a filtering rule, in bytes. +const DefaultRuleBufSize = 1024