From a9415abd7659bb6017919788a72bfc02781a955a Mon Sep 17 00:00:00 2001 From: nkartyshov Date: Fri, 18 Nov 2016 16:09:22 +0300 Subject: [PATCH] added exclusions list --- Filters/parser.py | 54 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/Filters/parser.py b/Filters/parser.py index 95e9df6d..7647de5b 100644 --- a/Filters/parser.py +++ b/Filters/parser.py @@ -1,5 +1,17 @@ -import urllib2 -import datetime +import urllib2, datetime, mmap, re + +## FUNCTION ## +def is_domain_rule(rule): + point_idx = rule.find('.') + if point_idx == -1: + return False + question_idx = rule.find('?', point_idx); + slash_idx = rule.find('/', point_idx) + if slash_idx == -1 and question_idx == -1: + return True + replace_idx = slash_idx if slash_idx != -1 else question_idx + tail = rule[replace_idx:] + return len(tail) <= 2 def date_now(): return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -14,18 +26,25 @@ def save_comment(comment, f): comment = comment[:idx] + date_now() + '\n' f.writelines(comment) -def save_url_rule(url, f): +def is_rule_not_exclusion(rule, exclusions): + for line in exclusions: + if line in rule and line != '': + return False + return True + +def write_rule(rule, f): + if is_domain_rule(rule): + f.writelines(rule + '\n') + +def save_url_rule(line, exclusions, f): url = line.replace('url', '').strip() - for rule in get_content(url): -# if rule.find('^') != -1: -# idx = rule.find('^') -# f.writelines(rule[:idx] + '\n') -# elif - if rule.find('$') != -1: - idx = rule.find('$'); - f.writelines(rule[:idx] + '\n') - else: - f.writelines(rule + '\n') + for rule in get_content(url): + if is_rule_not_exclusion(rule, exclusions): + if rule.find('$') != -1: + idx = rule.find('$'); + write_rule(rule[:idx], f) + else: + write_rule(rule, f) def save_file_rule(line, f): file_name = line.replace('file', '').strip() @@ -33,14 +52,15 @@ def save_file_rule(line, f): for rule in rf: f.writelines(rule) +## MAIN ## +exclusions = open('exclusions.txt', 'r').read().split('\n') with open('filter.template', 'r') as tmpl: - with open('filter.txt', 'w') as f: + with open('filter.txt', 'w') as f: for line in tmpl: if line.startswith('!'): save_comment(line, f) if line.startswith('url'): - save_url_rule(line, f) + save_url_rule(line, exclusions, f) if line.startswith('file'): - save_file_rule(line, f) - + save_file_rule(line, f)