extmod/modure: If input string is bytes, return bytes results too.

This applies to match.group() and split().

For ARM Thumb2, this increased code size by 12 bytes.
This commit is contained in:
Paul Sokolovsky 2017-07-01 01:25:23 +03:00
parent 871a45dd0c
commit 58b7b01cb5
3 changed files with 14 additions and 3 deletions

View File

@ -31,6 +31,7 @@
#include "py/nlr.h"
#include "py/runtime.h"
#include "py/binary.h"
#include "py/objstr.h"
#if MICROPY_PY_URE
@ -69,7 +70,8 @@ STATIC mp_obj_t match_group(mp_obj_t self_in, mp_obj_t no_in) {
// no match for this group
return mp_const_none;
}
return mp_obj_new_str(start, self->caps[no * 2 + 1] - start, false);
return mp_obj_new_str_of_type(mp_obj_get_type(self->str),
(const byte*)start, self->caps[no * 2 + 1] - start);
}
MP_DEFINE_CONST_FUN_OBJ_2(match_group_obj, match_group);
@ -129,6 +131,7 @@ STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
mp_obj_re_t *self = MP_OBJ_TO_PTR(args[0]);
Subject subj;
size_t len;
const mp_obj_type_t *str_type = mp_obj_get_type(args[1]);
subj.begin = mp_obj_str_get_data(args[1], &len);
subj.end = subj.begin + len;
int caps_num = (self->re.sub + 1) * 2;
@ -150,7 +153,7 @@ STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
break;
}
mp_obj_t s = mp_obj_new_str(subj.begin, caps[0] - subj.begin, false);
mp_obj_t s = mp_obj_new_str_of_type(str_type, (const byte*)subj.begin, caps[0] - subj.begin);
mp_obj_list_append(retval, s);
if (self->re.sub > 0) {
mp_not_implemented("Splitting with sub-captures");
@ -161,7 +164,7 @@ STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
}
}
mp_obj_t s = mp_obj_new_str(subj.begin, subj.end - subj.begin, false);
mp_obj_t s = mp_obj_new_str_of_type(str_type, (const byte*)subj.begin, subj.end - subj.begin);
mp_obj_list_append(retval, s);
return retval;
}

View File

@ -80,3 +80,6 @@ try:
re.compile("*")
except:
print("Caught invalid regex")
# bytes objects
m = re.match(rb'a+?', b'ab'); print(m.group(0))

View File

@ -26,3 +26,8 @@ print(s)
r = re.compile("[a-f]+")
s = r.split("0a3b9")
print(s)
# bytes objects
r = re.compile(b"x")
s = r.split(b"fooxbar")
print(s)