extmod/ure: Use single function for match/search/sub.

Saves about 500 bytes on unix x64 and enables CPython-conform
usage of passing a re object to these functions.
This commit is contained in:
stijn 2020-06-03 10:18:49 +02:00
parent bd06c698f0
commit 51fd6c9777
4 changed files with 43 additions and 39 deletions

View File

@ -71,8 +71,8 @@ mp_obj_t mpy_init(mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, mp_obj_t *a
re_type.locals_dict = (void*)&re_locals_dict; re_type.locals_dict = (void*)&re_locals_dict;
mp_store_global(MP_QSTR_compile, MP_OBJ_FROM_PTR(&mod_re_compile_obj)); mp_store_global(MP_QSTR_compile, MP_OBJ_FROM_PTR(&mod_re_compile_obj));
mp_store_global(MP_QSTR_match, MP_OBJ_FROM_PTR(&mod_re_match_obj)); mp_store_global(MP_QSTR_match, MP_OBJ_FROM_PTR(&re_match_obj));
mp_store_global(MP_QSTR_search, MP_OBJ_FROM_PTR(&mod_re_search_obj)); mp_store_global(MP_QSTR_search, MP_OBJ_FROM_PTR(&re_search_obj));
MP_DYNRUNTIME_INIT_EXIT MP_DYNRUNTIME_INIT_EXIT
} }

View File

@ -53,6 +53,10 @@ typedef struct _mp_obj_match_t {
const char *caps[0]; const char *caps[0];
} mp_obj_match_t; } mp_obj_match_t;
STATIC mp_obj_t mod_re_compile(size_t n_args, const mp_obj_t *args);
#if !MICROPY_ENABLE_DYNRUNTIME
STATIC const mp_obj_type_t re_type;
#endif
STATIC void match_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) { STATIC void match_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
(void)kind; (void)kind;
@ -175,7 +179,12 @@ STATIC void re_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t
STATIC mp_obj_t ure_exec(bool is_anchored, uint n_args, const mp_obj_t *args) { STATIC mp_obj_t ure_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
(void)n_args; (void)n_args;
mp_obj_re_t *self = MP_OBJ_TO_PTR(args[0]); mp_obj_re_t *self;
if (mp_obj_is_type(args[0], &re_type)) {
self = MP_OBJ_TO_PTR(args[0]);
} else {
self = MP_OBJ_TO_PTR(mod_re_compile(1, args));
}
Subject subj; Subject subj;
size_t len; size_t len;
subj.begin = mp_obj_str_get_data(args[1], &len); subj.begin = mp_obj_str_get_data(args[1], &len);
@ -253,8 +262,13 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_split_obj, 2, 3, re_split);
#if MICROPY_PY_URE_SUB #if MICROPY_PY_URE_SUB
STATIC mp_obj_t re_sub_helper(mp_obj_t self_in, size_t n_args, const mp_obj_t *args) { STATIC mp_obj_t re_sub_helper(size_t n_args, const mp_obj_t *args) {
mp_obj_re_t *self = MP_OBJ_TO_PTR(self_in); mp_obj_re_t *self;
if (mp_obj_is_type(args[0], &re_type)) {
self = MP_OBJ_TO_PTR(args[0]);
} else {
self = MP_OBJ_TO_PTR(mod_re_compile(1, args));
}
mp_obj_t replace = args[1]; mp_obj_t replace = args[1];
mp_obj_t where = args[2]; mp_obj_t where = args[2];
mp_int_t count = 0; mp_int_t count = 0;
@ -358,10 +372,7 @@ STATIC mp_obj_t re_sub_helper(mp_obj_t self_in, size_t n_args, const mp_obj_t *a
return mp_obj_new_str_from_vstr(mp_obj_get_type(where), &vstr_return); return mp_obj_new_str_from_vstr(mp_obj_get_type(where), &vstr_return);
} }
STATIC mp_obj_t re_sub(size_t n_args, const mp_obj_t *args) { MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_sub_obj, 3, 5, re_sub_helper);
return re_sub_helper(args[0], n_args, args);
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_sub_obj, 3, 5, re_sub);
#endif #endif
@ -414,41 +425,14 @@ STATIC mp_obj_t mod_re_compile(size_t n_args, const mp_obj_t *args) {
} }
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_compile_obj, 1, 2, mod_re_compile); MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_compile_obj, 1, 2, mod_re_compile);
STATIC mp_obj_t mod_re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
(void)n_args;
mp_obj_t self = mod_re_compile(1, args);
const mp_obj_t args2[] = {self, args[1]};
mp_obj_t match = ure_exec(is_anchored, 2, args2);
return match;
}
STATIC mp_obj_t mod_re_match(size_t n_args, const mp_obj_t *args) {
return mod_re_exec(true, n_args, args);
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_match_obj, 2, 4, mod_re_match);
STATIC mp_obj_t mod_re_search(size_t n_args, const mp_obj_t *args) {
return mod_re_exec(false, n_args, args);
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_search_obj, 2, 4, mod_re_search);
#if MICROPY_PY_URE_SUB
STATIC mp_obj_t mod_re_sub(size_t n_args, const mp_obj_t *args) {
mp_obj_t self = mod_re_compile(1, args);
return re_sub_helper(self, n_args, args);
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_sub_obj, 3, 5, mod_re_sub);
#endif
#if !MICROPY_ENABLE_DYNRUNTIME #if !MICROPY_ENABLE_DYNRUNTIME
STATIC const mp_rom_map_elem_t mp_module_re_globals_table[] = { STATIC const mp_rom_map_elem_t mp_module_re_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_ure) }, { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_ure) },
{ MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) }, { MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) },
{ MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&mod_re_match_obj) }, { MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
{ MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&mod_re_search_obj) }, { MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
#if MICROPY_PY_URE_SUB #if MICROPY_PY_URE_SUB
{ MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&mod_re_sub_obj) }, { MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) },
#endif #endif
#if MICROPY_PY_URE_DEBUG #if MICROPY_PY_URE_DEBUG
{ MP_ROM_QSTR(MP_QSTR_DEBUG), MP_ROM_INT(FLAG_DEBUG) }, { MP_ROM_QSTR(MP_QSTR_DEBUG), MP_ROM_INT(FLAG_DEBUG) },

View File

@ -125,3 +125,14 @@ print(re.compile(r"[ax\-]").split("foo-bar"))
print(re.compile(r"[a\-x]").split("foo-bar")) print(re.compile(r"[a\-x]").split("foo-bar"))
print(re.compile(r"[\-ax]").split("foo-bar")) print(re.compile(r"[\-ax]").split("foo-bar"))
print("===") print("===")
# Module functions take str/bytes/re.
for f in (re.match, re.search):
print(f(".", "foo").group(0))
print(f(b".", b"foo").group(0))
print(f(re.compile("."), "foo").group(0))
try:
f(123, "a")
except TypeError:
print("TypeError")
print("===")

View File

@ -60,3 +60,12 @@ try:
re.sub("(a)", "b\\199999999999999999999999999999999999999", "a") re.sub("(a)", "b\\199999999999999999999999999999999999999", "a")
except: except:
print("invalid group") print("invalid group")
# Module function takes str/bytes/re.
print(re.sub("a", "a", "a"))
print(re.sub(b".", b"a", b"a"))
print(re.sub(re.compile("a"), "a", "a"))
try:
re.sub(123, "a", "a")
except TypeError:
print("TypeError")