From d09a5b51c2c96e0315ec000589fdf81ca41c3fec Mon Sep 17 00:00:00 2001 From: Damien George Date: Sat, 21 Feb 2015 10:33:20 +0000 Subject: [PATCH] extmod: Pull in upstream changes to re1.5; fixes bug, adds named class. --- extmod/re1.5/charclass.c | 20 ++++++++++++++++++++ extmod/re1.5/compilecode.c | 15 ++++++++++++--- extmod/re1.5/re1.5.h | 2 ++ extmod/re1.5/recursiveloop.c | 6 ++++++ 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/extmod/re1.5/charclass.c b/extmod/re1.5/charclass.c index c9df403750..7f6388c93d 100644 --- a/extmod/re1.5/charclass.c +++ b/extmod/re1.5/charclass.c @@ -11,3 +11,23 @@ int _re1_5_classmatch(const char *pc, const char *sp) } return !is_positive; } + +int _re1_5_namedclassmatch(const char *pc, const char *sp) +{ + // pc points to name of class + int off = (*pc >> 5) & 1; + if ((*pc | 0x20) == 'd') { + if (!(*sp >= '0' && *sp <= '9')) { + off ^= 1; + } + } else if ((*pc | 0x20) == 's') { + if (!(*sp == ' ' || (*sp >= '\t' && *sp <= '\r'))) { + off ^= 1; + } + } else { // w + if (!((*sp >= 'A' && *sp <= 'Z') || (*sp >= 'a' && *sp <= 'z') || (*sp >= '0' && *sp <= '9') || *sp == '_')) { + off ^= 1; + } + } + return off; +} diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c index 2ed38d02c3..42672a756b 100644 --- a/extmod/re1.5/compilecode.c +++ b/extmod/re1.5/compilecode.c @@ -78,6 +78,13 @@ const char *_compilecode(const char *re, ByteProg *prog) switch (*re) { case '\\': re++; + if ((*re | 0x20) == 'd' || (*re | 0x20) == 's' || (*re | 0x20) == 'w') { + term = pc; + EMIT(pc++, NamedClass); + EMIT(pc++, *re); + prog->len++; + break; + } default: term = pc; EMIT(pc++, Char); @@ -112,11 +119,12 @@ const char *_compilecode(const char *re, ByteProg *prog) EMIT(term + 1, cnt); break; } - case '(': + case '(': { term = pc; + int sub = ++prog->sub; EMIT(pc++, Save); - EMIT(pc++, 2 * ++prog->sub); + EMIT(pc++, 2 * sub); prog->len++; prog->bytelen = pc; @@ -124,10 +132,11 @@ const char *_compilecode(const char *re, ByteProg *prog) pc = prog->bytelen; EMIT(pc++, Save); - EMIT(pc++, 2 * prog->sub + 1); + EMIT(pc++, 2 * sub + 1); prog->len++; break; + } case '?': insert_code(code, term, 2, &pc); EMIT(term, Split); diff --git a/extmod/re1.5/re1.5.h b/extmod/re1.5/re1.5.h index d8c1cf3e54..815c5d33d1 100644 --- a/extmod/re1.5/re1.5.h +++ b/extmod/re1.5/re1.5.h @@ -82,6 +82,7 @@ enum /* Inst.opcode */ Any, Class, ClassNot, + NamedClass, ASSERTS = 0x50, Bol = ASSERTS, @@ -145,5 +146,6 @@ int re1_5_compilecode(ByteProg *prog, const char *re); void re1_5_dumpcode(ByteProg *prog); void cleanmarks(ByteProg *prog); int _re1_5_classmatch(const char *pc, const char *sp); +int _re1_5_namedclassmatch(const char *pc, const char *sp); #endif /*_RE1_5_REGEXP__H*/ diff --git a/extmod/re1.5/recursiveloop.c b/extmod/re1.5/recursiveloop.c index f133b5d9b1..e8fef03049 100644 --- a/extmod/re1.5/recursiveloop.c +++ b/extmod/re1.5/recursiveloop.c @@ -30,6 +30,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n pc += *(unsigned char*)pc * 2 + 1; sp++; continue; + case NamedClass: + if (!_re1_5_namedclassmatch(pc, sp)) + return 0; + pc++; + sp++; + continue; case Match: return 1; case Jmp: