Merge pull request #13817 from s-hadinger/re_split

Berry improved re.split
This commit is contained in:
s-hadinger 2021-11-26 19:37:15 +01:00 committed by GitHub
commit 187ff280cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 62 additions and 26 deletions

View File

@ -26,6 +26,7 @@ re = module("re")
re.compile = def (regex_str) end # native
re.match = def (regex_str, str) end # native
re.search = def (regex_str, str) end # native
re.split = def (regex_str, str) end # native
*******************************************************************/
@ -145,39 +146,73 @@ int re_pattern_match(bvm *vm) {
be_raise(vm, "type_error", NULL);
}
// Berry: `re_pattern.split(s:string) -> list(string)`
int re_pattern_split_run(bvm *vm, ByteProg *code, const char *hay, int split_limit) {
Subject subj = {hay, hay + strlen(hay)};
int sub_els = (code->sub + 1) * 2;
const char *sub[sub_els];
be_newobject(vm, "list");
while (1) {
if (split_limit == 0 || !re1_5_recursiveloopprog(code, &subj, sub, sub_els, bfalse)) {
be_pushnstring(vm, subj.begin, subj.end - subj.begin);
be_data_push(vm, -2);
be_pop(vm, 1);
break;
}
if (sub[0] == NULL || sub[1] == NULL || sub[0] == sub[1]) {
be_raise(vm, "internal_error", "can't match");
}
be_pushnstring(vm, subj.begin, sub[0] - subj.begin);
be_data_push(vm, -2);
be_pop(vm, 1);
subj.begin = sub[1];
split_limit--;
}
be_pop(vm, 1); // remove list
be_return(vm); // return list object
}
// Berry: `re_pattern.split(s:string [, split_limit:int]) -> list(string)`
int re_pattern_split(bvm *vm) {
int32_t argc = be_top(vm); // Get the number of arguments
if (argc >= 2 && be_isstring(vm, 2)) {
int split_limit = -1;
if (argc >= 3) {
split_limit = be_toint(vm, 3);
}
const char * hay = be_tostring(vm, 2);
be_getmember(vm, 1, "_p");
ByteProg * code = (ByteProg*) be_tocomptr(vm, -1);
Subject subj = {hay, hay + strlen(hay)};
int sub_els = (code->sub + 1) * 2;
const char *sub[sub_els];
return re_pattern_split_run(vm, code, hay, split_limit);
}
be_raise(vm, "type_error", NULL);
}
be_newobject(vm, "list");
while (1) {
if (!re1_5_recursiveloopprog(code, &subj, sub, sub_els, bfalse)) {
be_pushnstring(vm, subj.begin, subj.end - subj.begin);
be_data_push(vm, -2);
be_pop(vm, 1);
break;
}
if (sub[0] == NULL || sub[1] == NULL || sub[0] == sub[1]) {
be_raise(vm, "internal_error", "can't match");
}
be_pushnstring(vm, subj.begin, sub[0] - subj.begin);
be_data_push(vm, -2);
be_pop(vm, 1);
subj.begin = sub[1];
// Berry: `re.split(pattern:string, s:string [, split_limit:int]) -> list(string)`
int be_re_split(bvm *vm) {
int32_t argc = be_top(vm); // Get the number of arguments
if (argc >= 2 && be_isstring(vm, 1) && be_isstring(vm, 2)) {
const char * regex_str = be_tostring(vm, 1);
const char * hay = be_tostring(vm, 2);
int split_limit = -1;
if (argc >= 3) {
split_limit = be_toint(vm, 3);
}
be_pop(vm, 1); // remove list
be_return(vm); // return list object
int sz = re1_5_sizecode(regex_str);
if (sz < 0) {
be_raise(vm, "internal_error", "error in regex");
}
ByteProg *code = be_os_malloc(sizeof(ByteProg) + sz);
int ret = re1_5_compilecode(code, regex_str);
if (ret != 0) {
be_raise(vm, "internal_error", "error in regex");
}
return re_pattern_split_run(vm, code, hay, split_limit);
}
be_raise(vm, "type_error", NULL);
}
@ -187,11 +222,12 @@ int re_pattern_split(bvm *vm) {
********************************************************************/
be_local_module(re,
"re",
be_nested_map(3,
be_nested_map(4,
( (struct bmapnode*) &(const bmapnode[]) {
{ be_nested_key("compile", 1000265118, 7, -1), be_const_func(be_re_compile) },
{ be_nested_key("search", -2144130903, 6, 2), be_const_func(be_re_search) },
{ be_nested_key("match", 2116038550, 5, -1), be_const_func(be_re_match) },
{ be_nested_key("search", -2144130903, 6, -1), be_const_func(be_re_search) },
{ be_nested_key("match", 2116038550, 5, 0), be_const_func(be_re_match) },
{ be_nested_key("split", -2017972765, 5, -1), be_const_func(be_re_split) },
}))
);
BE_EXPORT_VARIABLE be_define_const_native_module(re);