rulelist: fix tabs

This commit is contained in:
Ainar Garipov 2023-07-06 19:30:42 +03:00
parent 0598d0d435
commit 7df637b003
3 changed files with 51 additions and 27 deletions

View File

@ -141,13 +141,19 @@ func parseLine(line []byte) (nonPrintIdx int, isRule bool) {
return -1, false
}
nonPrintIdx = bytes.IndexFunc(line, func(r rune) (ok bool) {
return !unicode.IsGraphic(r)
})
nonPrintIdx = bytes.IndexFunc(line, isNotPrintable)
return nonPrintIdx, nonPrintIdx == -1
}
// isNotPrintable returns true if r is not a printable character that can be
// contained in a filtering rule.
func isNotPrintable(r rune) (ok bool) {
// Tab isn't included into Unicode's graphic symbols, so include it here
// explicitly.
return r != '\t' && !unicode.IsGraphic(r)
}
// parseLineTitle is like [parseLine] but additionally looks for a title. line
// is assumed to be trimmed of whitespace characters.
func (p *Parser) parseLineTitle(line []byte) (nonPrintIdx int, isRule bool) {
@ -156,9 +162,7 @@ func (p *Parser) parseLineTitle(line []byte) (nonPrintIdx int, isRule bool) {
}
if line[0] != '!' {
nonPrintIdx = bytes.IndexFunc(line, func(r rune) (ok bool) {
return !unicode.IsGraphic(r)
})
nonPrintIdx = bytes.IndexFunc(line, isNotPrintable)
return nonPrintIdx, nonPrintIdx == -1
}

View File

@ -35,7 +35,7 @@ func TestParser_Parse(t *testing.T) {
wantWritten: 0,
}, {
name: "html",
in: testHTMLText,
in: testRuleTextHTML,
wantErrMsg: rulelist.ErrHTML.Error(),
wantTitle: "",
wantRulesNum: 0,
@ -50,42 +50,42 @@ func TestParser_Parse(t *testing.T) {
wantWritten: 0,
}, {}, {
name: "rule",
in: testRuleText,
wantDst: testRuleText,
in: testRuleTextBlocked,
wantDst: testRuleTextBlocked,
wantErrMsg: "",
wantRulesNum: 1,
wantTitle: "",
wantWritten: len(testRuleText),
wantWritten: len(testRuleTextBlocked),
}, {
name: "html_in_rule",
in: testRuleText + testHTMLText,
wantDst: testRuleText + testHTMLText,
in: testRuleTextBlocked + testRuleTextHTML,
wantDst: testRuleTextBlocked + testRuleTextHTML,
wantErrMsg: "",
wantTitle: "",
wantRulesNum: 2,
wantWritten: len(testRuleText) + len(testHTMLText),
wantWritten: len(testRuleTextBlocked) + len(testRuleTextHTML),
}, {
name: "title",
in: "! Title: Test Title \n" +
"! Title: Bad, Ignored Title\n" +
testRuleText,
wantDst: testRuleText,
testRuleTextBlocked,
wantDst: testRuleTextBlocked,
wantErrMsg: "",
wantTitle: "Test Title",
wantRulesNum: 1,
wantWritten: len(testRuleText),
wantWritten: len(testRuleTextBlocked),
}, {
name: "bad_char",
in: "! Title: Test Title \n" +
testRuleText +
testRuleTextBlocked +
">>>\x7F<<<",
wantDst: testRuleText,
wantDst: testRuleTextBlocked,
wantErrMsg: "line at index 2: " +
"character at index 3: " +
"non-printable character",
wantTitle: "Test Title",
wantRulesNum: 1,
wantWritten: len(testRuleText),
wantWritten: len(testRuleTextBlocked),
}, {
name: "too_long",
in: strings.Repeat("a", rulelist.MaxRuleLen+1),
@ -94,6 +94,22 @@ func TestParser_Parse(t *testing.T) {
wantTitle: "",
wantRulesNum: 0,
wantWritten: 0,
}, {
name: "bad_tab_and_comment",
in: testRuleTextBadTab,
wantDst: testRuleTextBadTab,
wantErrMsg: "",
wantTitle: "",
wantRulesNum: 1,
wantWritten: len(testRuleTextBadTab),
}, {
name: "etc_hosts_tab_and_comment",
in: testRuleTextEtcHostsTab,
wantDst: testRuleTextEtcHostsTab,
wantErrMsg: "",
wantTitle: "",
wantRulesNum: 1,
wantWritten: len(testRuleTextEtcHostsTab),
}}
for _, tc := range testCases {
@ -132,7 +148,7 @@ func TestParser_Parse_writeError(t *testing.T) {
buf := make([]byte, rulelist.MaxRuleLen)
p := rulelist.NewParser()
r, err := p.Parse(dst, strings.NewReader(testRuleText), buf)
r, err := p.Parse(dst, strings.NewReader(testRuleTextBlocked), buf)
require.NotNil(t, r)
testutil.AssertErrorMsg(t, "writing rule line: test error", err)
@ -143,9 +159,9 @@ func TestParser_Parse_checksums(t *testing.T) {
t.Parallel()
const (
withoutComments = testRuleText
withoutComments = testRuleTextBlocked
withComments = "! Some comment.\n" +
" " + testRuleText +
" " + testRuleTextBlocked +
"# Another comment.\n"
)
@ -175,7 +191,7 @@ var (
func BenchmarkParser_Parse(b *testing.B) {
dst := &bytes.Buffer{}
src := strings.NewReader(strings.Repeat(testRuleText, 1000))
src := strings.NewReader(strings.Repeat(testRuleTextBlocked, 1000))
buf := make([]byte, rulelist.MaxRuleLen)
p := rulelist.NewParser()
@ -202,8 +218,10 @@ func FuzzParser_Parse(f *testing.F) {
"! Comment",
"! Title ",
"! Title XXX",
testHTMLText,
testRuleText,
testRuleTextEtcHostsTab,
testRuleTextHTML,
testRuleTextBlocked,
testRuleTextBadTab,
"1.2.3.4",
"1.2.3.4 etc-hosts.example",
">>>\x00<<<",

View File

@ -7,6 +7,8 @@ const testTimeout = 1 * time.Second
// Common texts for tests.
const (
testHTMLText = "<!DOCTYPE html>\n"
testRuleText = "||blocked.example^\n"
testRuleTextHTML = "<!DOCTYPE html>\n"
testRuleTextBlocked = "||blocked.example^\n"
testRuleTextBadTab = "||bad-tab-and-comment.example^\t# A comment.\n"
testRuleTextEtcHostsTab = "0.0.0.0 tab..example^\t# A comment.\n"
)