remove duplicate rules

This commit is contained in:
nkartyshov 2016-11-22 17:41:21 +03:00
parent 6815f67d95
commit aca809cbd3
2 changed files with 37 additions and 553 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,9 @@
import urllib2, datetime, mmap, re
## GLOBAL VAR ##
processed_rules = set()
exclusions = open('exclusions.txt', 'r').read().split('\n')
## FUNCTION ##
def is_domain_rule(rule):
point_idx = rule.find('.')
@ -26,20 +30,24 @@ def save_comment(comment, f):
comment = comment[:idx] + date_now() + '\n'
f.writelines(comment)
def is_rule_not_exclusion(rule, exclusions):
def is_rule_not_exclusion(rule):
for line in exclusions:
if line in rule and line != '':
return False
return True
def is_not_duplication(rule):
return rule not in processed_rules
def write_rule(rule, f):
if is_domain_rule(rule):
if is_domain_rule(rule) and is_not_duplication(rule):
f.writelines(rule + '\n')
processed_rules.add(rule)
def save_url_rule(line, exclusions, f):
def save_url_rule(line, f):
url = line.replace('url', '').strip()
for rule in get_content(url):
if is_rule_not_exclusion(rule, exclusions):
if is_rule_not_exclusion(rule):
if rule.find('$') != -1:
idx = rule.find('$');
write_rule(rule[:idx], f)
@ -50,17 +58,16 @@ def save_file_rule(line, f):
file_name = line.replace('file', '').strip()
with open(file_name, 'r') as rf:
for rule in rf:
f.writelines(rule)
write_rule(rule.rstrip(), f)
## MAIN ##
exclusions = open('exclusions.txt', 'r').read().split('\n')
with open('filter.template', 'r') as tmpl:
with open('filter.txt', 'w') as f:
for line in tmpl:
if line.startswith('!'):
save_comment(line, f)
if line.startswith('url'):
save_url_rule(line, exclusions, f)
save_url_rule(line, f)
if line.startswith('file'):
save_file_rule(line, f)