AdGuardHome/Filters/parser.py

78 lines
2.3 KiB
Python
Raw Normal View History

2017-07-11 12:55:48 +01:00
import urllib2, datetime, mmap, re, os, sys
2016-11-18 13:09:22 +00:00
2016-11-22 14:41:21 +00:00
## GLOBAL VAR ##
2017-07-11 12:55:48 +01:00
dir = os.path.dirname(__file__)
2016-11-22 14:41:21 +00:00
processed_rules = set()
2017-07-11 12:55:48 +01:00
exclusions_file = open(os.path.join(dir, 'exclusions.txt'), 'r').read().split('\n')
2016-11-23 07:25:17 +00:00
# Remove comments
exclusions = filter(lambda line : not line.startswith('!'), exclusions_file)
2016-11-18 13:09:22 +00:00
## FUNCTION ##
def is_domain_rule(rule):
point_idx = rule.find('.')
if point_idx == -1:
return False
question_idx = rule.find('?', point_idx);
slash_idx = rule.find('/', point_idx)
if slash_idx == -1 and question_idx == -1:
return True
replace_idx = slash_idx if slash_idx != -1 else question_idx
tail = rule[replace_idx:]
return len(tail) <= 2
2016-07-06 15:02:14 +01:00
def date_now():
return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def get_content(url):
r = urllib2.urlopen(url)
return r.read().split('\n')
def save_comment(comment, f):
idx = comment.find('%timestamp%')
if idx != -1:
comment = comment[:idx] + date_now() + '\n'
f.writelines(comment)
2016-11-22 14:41:21 +00:00
def is_rule_not_exclusion(rule):
2016-11-18 13:09:22 +00:00
for line in exclusions:
if line in rule and line != '':
return False
return True
2016-11-22 14:41:21 +00:00
def is_not_duplication(rule):
return rule not in processed_rules
2016-11-18 13:09:22 +00:00
def write_rule(rule, f):
2016-11-23 07:25:17 +00:00
if (is_domain_rule(rule) and is_not_duplication(rule)) or rule.startswith('!'):
2016-11-18 13:09:22 +00:00
f.writelines(rule + '\n')
2016-11-22 14:41:21 +00:00
processed_rules.add(rule)
2016-11-18 13:09:22 +00:00
2016-11-22 14:41:21 +00:00
def save_url_rule(line, f):
2016-07-07 08:28:04 +01:00
url = line.replace('url', '').strip()
2016-11-18 13:09:22 +00:00
for rule in get_content(url):
2016-11-22 14:41:21 +00:00
if is_rule_not_exclusion(rule):
2016-11-18 13:09:22 +00:00
if rule.find('$') != -1:
idx = rule.find('$');
write_rule(rule[:idx], f)
else:
write_rule(rule, f)
2016-07-06 15:02:14 +01:00
2016-07-07 08:28:04 +01:00
def save_file_rule(line, f):
file_name = line.replace('file', '').strip()
2017-07-11 12:55:48 +01:00
with open(os.path.join(dir, file_name), 'r') as rf:
2016-07-07 08:28:04 +01:00
for rule in rf:
2016-11-22 14:41:21 +00:00
write_rule(rule.rstrip(), f)
2016-07-07 08:28:04 +01:00
2016-11-18 13:09:22 +00:00
## MAIN ##
2017-07-11 12:55:48 +01:00
with open(os.path.join(dir, 'filter.template'), 'r') as tmpl:
with open(os.path.join(dir, 'filter.txt'), 'w') as f:
2016-07-06 15:02:14 +01:00
for line in tmpl:
if line.startswith('!'):
save_comment(line, f)
2016-07-07 08:28:04 +01:00
if line.startswith('url'):
2016-11-22 14:41:21 +00:00
save_url_rule(line, f)
2016-07-07 08:28:04 +01:00
if line.startswith('file'):
2017-07-11 12:55:48 +01:00
save_file_rule(line, f)
2017-07-07 10:39:03 +01:00
sys.exit(0)
2017-07-11 12:55:48 +01:00