mirror of https://github.com/arendst/Tasmota.git
165 lines
5.0 KiB
Plaintext
165 lines
5.0 KiB
Plaintext
|
#! /usr/bin/env python3
|
||
|
|
||
|
RE_EXEC = "./re"
|
||
|
|
||
|
test_suite = [
|
||
|
# basics
|
||
|
("search", r"abc", "abcdef"),
|
||
|
("search", r"cde", "abcdef"),
|
||
|
("search", r"abc*", "abdef"),
|
||
|
("search", r"abc*", "abcccdef"),
|
||
|
("search", r"abc+", "abdef"),
|
||
|
("search", r"abc+", "abcccdef"),
|
||
|
|
||
|
# match
|
||
|
("match", r"abc", "abcdef"),
|
||
|
("match", r"abc*", "abdef"),
|
||
|
|
||
|
# search vs match distinction
|
||
|
("match", r"a*", "baa"),
|
||
|
("search", r"a*", "baa"),
|
||
|
|
||
|
# nested group matching
|
||
|
("match", r"(([0-9]*)([a-z]*)[0-9]*)", "1234hello567"),
|
||
|
("match", r"([0-9]*)(([a-z]*)([0-9]*))", "1234hello567"),
|
||
|
|
||
|
# non-capturing groups
|
||
|
("match", r"(([0-9]*)(?:[a-z]*)[0-9]*)", "1234hello568"),
|
||
|
("match", r"(?:[0-9]*)(([a-z]*)(?:[0-9]*))", "1234hello568"),
|
||
|
("match", r"([0-9]*)(?:([a-z]*)(?:[0-9]*))", "1234hello568"),
|
||
|
("match", r"(?:)", "1234hello568"),
|
||
|
("match", r"1?:", "1:"),
|
||
|
|
||
|
# named character classes
|
||
|
("match", r"\d+", "123abc456"),
|
||
|
("match", r"\s+", " \t123abc456"),
|
||
|
("match", r"\w+", "123abc_456 abc"),
|
||
|
("match", r"(\w+)\s+(\w+)", "ABC \t123hello456 abc"),
|
||
|
("match", r"(\S+)\s+(\D+)", "ABC \thello abc456 abc"),
|
||
|
("match", r"(([0-9]*)([a-z]*)\d*)", "123hello456"),
|
||
|
|
||
|
# classes
|
||
|
("match", r"[a]*", "a"),
|
||
|
("search", r"([yab]*)(e*)([cd])", "xyac"),
|
||
|
("search", r"([yab]*)(e*)([^y]?)$", "xyac"),
|
||
|
("match", r"[-]*", "--"),
|
||
|
("match", r"[-a]*", "-a-b"),
|
||
|
("match", r"[-ab]*", "-a-b"),
|
||
|
("match", r"[-a-c]*", "-a-b-d-"),
|
||
|
("match", r"[a-]*", "-a-b"),
|
||
|
("match", r"[ab-]*", "-a-b"),
|
||
|
("match", r"[a-c-]*", "-a-b-d-"),
|
||
|
|
||
|
# escaped metacharacters
|
||
|
("match", r"(\?:)", ":"),
|
||
|
("match", r"\(?:", "(:"),
|
||
|
|
||
|
# non-greedy
|
||
|
("match", r"a(b??)(b*)c", "abbc"),
|
||
|
("match", r"a(b+?)(b*)c", "abbbc"),
|
||
|
("match", r"a(b*?)(b*)c", "abbbbc"),
|
||
|
|
||
|
# greedy
|
||
|
("match", r"a(b?)(b*)c", "abbc"),
|
||
|
("match", r"a(b+)(b*)c", "abbbc"),
|
||
|
("match", r"a(b*)(b*)c", "abbbbc"),
|
||
|
|
||
|
# errors
|
||
|
("search", r"?", ""),
|
||
|
("search", r"*", ""),
|
||
|
("search", r"+", ""),
|
||
|
("search", r"[", ""),
|
||
|
("search", r"(", ""),
|
||
|
("search", r")", ""),
|
||
|
("search", "\\", ""),
|
||
|
("search", "|+", ""),
|
||
|
("search", "|*", ""),
|
||
|
("search", "|?", ""),
|
||
|
("search", "^*", ""),
|
||
|
("search", "$*", ""),
|
||
|
("search", "a*+", ""),
|
||
|
("search", "a*?", ""),
|
||
|
("search", "a**", ""),
|
||
|
]
|
||
|
|
||
|
import re
|
||
|
import sre_constants
|
||
|
import subprocess
|
||
|
from collections import OrderedDict
|
||
|
|
||
|
def parse_result(string, res):
|
||
|
name, rest = res.split(b" ", 1)
|
||
|
if rest == b"-no match-":
|
||
|
return name, None
|
||
|
if rest == b"REGEX ERROR":
|
||
|
return name, rest
|
||
|
assert rest.startswith(b"match ")
|
||
|
rest = rest[6:]
|
||
|
tuples = [eval(t) for t in rest.split()]
|
||
|
matches = tuple(string[t[0]:t[1]] for t in tuples)
|
||
|
return name, matches
|
||
|
|
||
|
def fit_str(string, width):
|
||
|
if len(string) <= width:
|
||
|
return string
|
||
|
else:
|
||
|
return string[:width - 2] + ".."
|
||
|
|
||
|
def main():
|
||
|
engine_stats = OrderedDict()
|
||
|
for kind, regex, string in test_suite:
|
||
|
# run Python re to get correct result
|
||
|
try:
|
||
|
if kind == "match":
|
||
|
py_res = re.match(regex, string)
|
||
|
else:
|
||
|
py_res = re.search(regex, string)
|
||
|
if py_res is not None:
|
||
|
py_res = (py_res.group(0),) + py_res.groups()
|
||
|
except sre_constants.error:
|
||
|
py_res = b"REGEX ERROR"
|
||
|
|
||
|
# run our code
|
||
|
try:
|
||
|
args = (["-m"] if kind == "match" else []) + [regex, string]
|
||
|
re_res = subprocess.check_output([RE_EXEC]+args, stderr=subprocess.STDOUT)
|
||
|
re_res = re_res.split(b'\n')[1:-1] # split lines, remove first and last
|
||
|
except subprocess.CalledProcessError as e:
|
||
|
if e.returncode == 2 and e.output == b"fatal error: Error in regexp\n":
|
||
|
re_res = [b"recursive REGEX ERROR", b"recursiveloop REGEX ERROR", b"backtrack REGEX ERROR", b"thompson REGEX ERROR", b"pike REGEX ERROR"]
|
||
|
else:
|
||
|
raise
|
||
|
|
||
|
# check result of each engine
|
||
|
for engine in re_res:
|
||
|
engine_name, re_res = parse_result(string, engine)
|
||
|
try:
|
||
|
stats = engine_stats[engine_name]
|
||
|
except KeyError:
|
||
|
engine_stats[engine_name] = stats = [0, 0]
|
||
|
|
||
|
# Thompson algo offers just boolean match/no match status
|
||
|
py_res_cur = py_res
|
||
|
re_res_cur = re_res
|
||
|
if engine_name == b"thompson":
|
||
|
if py_res is not None:
|
||
|
py_res_cur = True
|
||
|
if re_res is not None:
|
||
|
re_res_cur = True
|
||
|
|
||
|
if py_res_cur == re_res_cur:
|
||
|
print("pass ", end="")
|
||
|
stats[0] += 1
|
||
|
else:
|
||
|
print("FAIL ", end="")
|
||
|
stats[1] += 1
|
||
|
|
||
|
print("%s %-25s %-20s" % (kind[0], fit_str(regex, 25), fit_str(string, 20)))
|
||
|
|
||
|
print("Ran %d tests, results:" % len(test_suite))
|
||
|
for name, stats in engine_stats.items():
|
||
|
print("%15s %2d pass %2d fail" % (str(name, encoding='utf8'), stats[0], stats[1]))
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|