rulelist: fix tabs

2023-07-06 19:30:42 +03:00 · 2023-07-06 19:30:42 +03:00 · 7df637b003
parent 0598d0d435
commit 7df637b003
3 changed files with 51 additions and 27 deletions
--- a/internal/filtering/rulelist/parser.go
+++ b/internal/filtering/rulelist/parser.go
@ -141,13 +141,19 @@ func parseLine(line []byte) (nonPrintIdx int, isRule bool) {
 		return -1, false
 	}

-	nonPrintIdx = bytes.IndexFunc(line, func(r rune) (ok bool) {
-		return !unicode.IsGraphic(r)
-	})
+	nonPrintIdx = bytes.IndexFunc(line, isNotPrintable)

 	return nonPrintIdx, nonPrintIdx == -1
 }

+// isNotPrintable returns true if r is not a printable character that can be
+// contained in a filtering rule.
+func isNotPrintable(r rune) (ok bool) {
+	// Tab isn't included into Unicode's graphic symbols, so include it here
+	// explicitly.
+	return r != '\t' && !unicode.IsGraphic(r)
+}
+
 // parseLineTitle is like [parseLine] but additionally looks for a title.  line
 // is assumed to be trimmed of whitespace characters.
 func (p *Parser) parseLineTitle(line []byte) (nonPrintIdx int, isRule bool) {
@ -156,9 +162,7 @@ func (p *Parser) parseLineTitle(line []byte) (nonPrintIdx int, isRule bool) {
 	}

 	if line[0] != '!' {
-		nonPrintIdx = bytes.IndexFunc(line, func(r rune) (ok bool) {
-			return !unicode.IsGraphic(r)
-		})
+		nonPrintIdx = bytes.IndexFunc(line, isNotPrintable)

 		return nonPrintIdx, nonPrintIdx == -1
 	}
--- a/internal/filtering/rulelist/parser_test.go
+++ b/internal/filtering/rulelist/parser_test.go
@ -35,7 +35,7 @@ func TestParser_Parse(t *testing.T) {
 		wantWritten:  0,
 	}, {
 		name:         "html",
-		in:           testHTMLText,
+		in:           testRuleTextHTML,
 		wantErrMsg:   rulelist.ErrHTML.Error(),
 		wantTitle:    "",
 		wantRulesNum: 0,
@ -50,42 +50,42 @@ func TestParser_Parse(t *testing.T) {
 		wantWritten:  0,
 	}, {}, {
 		name:         "rule",
-		in:           testRuleText,
-		wantDst:      testRuleText,
+		in:           testRuleTextBlocked,
+		wantDst:      testRuleTextBlocked,
 		wantErrMsg:   "",
 		wantRulesNum: 1,
 		wantTitle:    "",
-		wantWritten:  len(testRuleText),
+		wantWritten:  len(testRuleTextBlocked),
 	}, {
 		name:         "html_in_rule",
-		in:           testRuleText + testHTMLText,
-		wantDst:      testRuleText + testHTMLText,
+		in:           testRuleTextBlocked + testRuleTextHTML,
+		wantDst:      testRuleTextBlocked + testRuleTextHTML,
 		wantErrMsg:   "",
 		wantTitle:    "",
 		wantRulesNum: 2,
-		wantWritten:  len(testRuleText) + len(testHTMLText),
+		wantWritten:  len(testRuleTextBlocked) + len(testRuleTextHTML),
 	}, {
 		name: "title",
 		in: "! Title:  Test Title \n" +
 			"! Title: Bad, Ignored Title\n" +
-			testRuleText,
-		wantDst:      testRuleText,
+			testRuleTextBlocked,
+		wantDst:      testRuleTextBlocked,
 		wantErrMsg:   "",
 		wantTitle:    "Test Title",
 		wantRulesNum: 1,
-		wantWritten:  len(testRuleText),
+		wantWritten:  len(testRuleTextBlocked),
 	}, {
 		name: "bad_char",
 		in: "! Title:  Test Title \n" +
-			testRuleText +
+			testRuleTextBlocked +
 			">>>\x7F<<<",
-		wantDst: testRuleText,
+		wantDst: testRuleTextBlocked,
 		wantErrMsg: "line at index 2: " +
 			"character at index 3: " +
 			"non-printable character",
 		wantTitle:    "Test Title",
 		wantRulesNum: 1,
-		wantWritten:  len(testRuleText),
+		wantWritten:  len(testRuleTextBlocked),
 	}, {
 		name:         "too_long",
 		in:           strings.Repeat("a", rulelist.MaxRuleLen+1),
@ -94,6 +94,22 @@ func TestParser_Parse(t *testing.T) {
 		wantTitle:    "",
 		wantRulesNum: 0,
 		wantWritten:  0,
+	}, {
+		name:         "bad_tab_and_comment",
+		in:           testRuleTextBadTab,
+		wantDst:      testRuleTextBadTab,
+		wantErrMsg:   "",
+		wantTitle:    "",
+		wantRulesNum: 1,
+		wantWritten:  len(testRuleTextBadTab),
+	}, {
+		name:         "etc_hosts_tab_and_comment",
+		in:           testRuleTextEtcHostsTab,
+		wantDst:      testRuleTextEtcHostsTab,
+		wantErrMsg:   "",
+		wantTitle:    "",
+		wantRulesNum: 1,
+		wantWritten:  len(testRuleTextEtcHostsTab),
 	}}

 	for _, tc := range testCases {
@ -132,7 +148,7 @@ func TestParser_Parse_writeError(t *testing.T) {
 	buf := make([]byte, rulelist.MaxRuleLen)

 	p := rulelist.NewParser()
-	r, err := p.Parse(dst, strings.NewReader(testRuleText), buf)
+	r, err := p.Parse(dst, strings.NewReader(testRuleTextBlocked), buf)
 	require.NotNil(t, r)

 	testutil.AssertErrorMsg(t, "writing rule line: test error", err)
@ -143,9 +159,9 @@ func TestParser_Parse_checksums(t *testing.T) {
 	t.Parallel()

 	const (
-		withoutComments = testRuleText
+		withoutComments = testRuleTextBlocked
 		withComments    = "! Some comment.\n" +
-			"  " + testRuleText +
+			"  " + testRuleTextBlocked +
 			"# Another comment.\n"
 	)

@ -175,7 +191,7 @@ var (

 func BenchmarkParser_Parse(b *testing.B) {
 	dst := &bytes.Buffer{}
-	src := strings.NewReader(strings.Repeat(testRuleText, 1000))
+	src := strings.NewReader(strings.Repeat(testRuleTextBlocked, 1000))
 	buf := make([]byte, rulelist.MaxRuleLen)
 	p := rulelist.NewParser()

@ -202,8 +218,10 @@ func FuzzParser_Parse(f *testing.F) {
 		"! Comment",
 		"! Title ",
 		"! Title XXX",
-		testHTMLText,
-		testRuleText,
+		testRuleTextEtcHostsTab,
+		testRuleTextHTML,
+		testRuleTextBlocked,
+		testRuleTextBadTab,
 		"1.2.3.4",
 		"1.2.3.4 etc-hosts.example",
 		">>>\x00<<<",
--- a/internal/filtering/rulelist/rulelist_test.go
+++ b/internal/filtering/rulelist/rulelist_test.go
@ -7,6 +7,8 @@ const testTimeout = 1 * time.Second

 // Common texts for tests.
 const (
-	testHTMLText = "<!DOCTYPE html>\n"
-	testRuleText = "||blocked.example^\n"
+	testRuleTextHTML        = "<!DOCTYPE html>\n"
+	testRuleTextBlocked     = "||blocked.example^\n"
+	testRuleTextBadTab      = "||bad-tab-and-comment.example^\t# A comment.\n"
+	testRuleTextEtcHostsTab = "0.0.0.0 tab..example^\t# A comment.\n"
 )