2021-11-25 21:57:37 +00:00
|
|
|
/********************************************************************
|
|
|
|
* Tasmota lib
|
|
|
|
*
|
|
|
|
* To use: `import re`
|
|
|
|
*
|
|
|
|
* Regex using re1.5
|
|
|
|
*******************************************************************/
|
|
|
|
#include "be_constobj.h"
|
|
|
|
#include "be_mem.h"
|
2022-06-16 21:37:33 +01:00
|
|
|
#include "be_object.h"
|
2022-09-12 21:02:25 +01:00
|
|
|
#include "../../re1.5/re1.5.h"
|
2021-11-25 21:57:37 +00:00
|
|
|
|
|
|
|
/********************************************************************
|
|
|
|
# Berry skeleton for `re` module
|
|
|
|
#
|
|
|
|
|
|
|
|
class re_pattern
|
|
|
|
var _p # comobj containing the compiled bytecode for the pattern
|
|
|
|
|
|
|
|
def search() end
|
|
|
|
def match() end
|
|
|
|
def split() end
|
|
|
|
end
|
|
|
|
|
|
|
|
re = module("re")
|
|
|
|
|
|
|
|
re.compile = def (regex_str) end # native
|
|
|
|
re.match = def (regex_str, str) end # native
|
|
|
|
re.search = def (regex_str, str) end # native
|
2021-11-26 18:03:21 +00:00
|
|
|
re.split = def (regex_str, str) end # native
|
2021-11-25 21:57:37 +00:00
|
|
|
|
|
|
|
|
|
|
|
*******************************************************************/
|
|
|
|
|
|
|
|
extern const bclass be_class_re_pattern;
|
|
|
|
|
|
|
|
// Native functions be_const_func()
|
|
|
|
// Berry: `re.compile(pattern:string) -> instance(be_pattern)`
|
|
|
|
int be_re_compile(bvm *vm) {
|
|
|
|
int32_t argc = be_top(vm); // Get the number of arguments
|
|
|
|
if (argc >= 1 && be_isstring(vm, 1)) {
|
|
|
|
const char * regex_str = be_tostring(vm, 1);
|
2023-05-09 21:10:16 +01:00
|
|
|
int sz = re1_5_sizecode(regex_str);
|
|
|
|
if (sz < 0) {
|
2021-11-25 21:57:37 +00:00
|
|
|
be_raise(vm, "internal_error", "error in regex");
|
2023-05-09 21:10:16 +01:00
|
|
|
}
|
2021-11-25 21:57:37 +00:00
|
|
|
|
|
|
|
ByteProg *code = be_os_malloc(sizeof(ByteProg) + sz);
|
|
|
|
int ret = re1_5_compilecode(code, regex_str);
|
|
|
|
if (ret != 0) {
|
|
|
|
be_raise(vm, "internal_error", "error in regex");
|
|
|
|
}
|
|
|
|
be_pushntvclass(vm, &be_class_re_pattern);
|
|
|
|
be_call(vm, 0);
|
2022-06-16 21:37:33 +01:00
|
|
|
be_newcomobj(vm, code, &be_commonobj_destroy_generic);
|
2021-11-25 21:57:37 +00:00
|
|
|
be_setmember(vm, -2, "_p");
|
|
|
|
be_pop(vm, 1);
|
|
|
|
be_return(vm);
|
|
|
|
}
|
|
|
|
be_raise(vm, "type_error", NULL);
|
|
|
|
}
|
|
|
|
|
2022-09-12 21:02:25 +01:00
|
|
|
// pushes either a list if matched, else `nil`
|
|
|
|
// return index of next offset, or -1 if not found
|
2023-05-07 15:18:34 +01:00
|
|
|
const char *be_re_match_search_run(bvm *vm, ByteProg *code, const char *hay, bbool is_anchored, bbool size_only) {
|
2021-11-25 21:57:37 +00:00
|
|
|
Subject subj = {hay, hay + strlen(hay)};
|
|
|
|
|
|
|
|
int sub_els = (code->sub + 1) * 2;
|
|
|
|
const char *sub[sub_els];
|
2022-10-10 13:20:30 +01:00
|
|
|
memset(sub, 0, sub_els * sizeof sub[0]);
|
2021-11-25 21:57:37 +00:00
|
|
|
|
|
|
|
if (!re1_5_recursiveloopprog(code, &subj, sub, sub_els, is_anchored)) {
|
2022-09-12 21:02:25 +01:00
|
|
|
be_pushnil(vm);
|
|
|
|
return NULL; // no match
|
2021-11-25 21:57:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
be_newobject(vm, "list");
|
2022-10-10 13:20:30 +01:00
|
|
|
int k = sub_els;
|
2021-11-25 21:57:37 +00:00
|
|
|
for (int i = 0; i < k; i += 2) {
|
2022-10-10 13:20:30 +01:00
|
|
|
if (sub[i] == nil || sub[i+1] == nil) {
|
|
|
|
be_pushnil(vm);
|
|
|
|
} else {
|
2023-05-07 15:18:34 +01:00
|
|
|
if (size_only && i==0) {
|
|
|
|
be_pushint(vm, sub[i+1] - sub[i]);
|
|
|
|
} else {
|
|
|
|
be_pushnstring(vm, sub[i], sub[i+1] - sub[i]);
|
|
|
|
}
|
2022-10-10 13:20:30 +01:00
|
|
|
}
|
2021-11-25 21:57:37 +00:00
|
|
|
be_data_push(vm, -2);
|
|
|
|
be_pop(vm, 1);
|
|
|
|
}
|
2022-09-12 21:02:25 +01:00
|
|
|
be_pop(vm, 1); // remove list
|
|
|
|
return sub[1];
|
2021-11-25 21:57:37 +00:00
|
|
|
}
|
|
|
|
|
2023-05-07 15:18:34 +01:00
|
|
|
int be_re_match_search(bvm *vm, bbool is_anchored, bbool size_only) {
|
2021-11-25 21:57:37 +00:00
|
|
|
int32_t argc = be_top(vm); // Get the number of arguments
|
|
|
|
if (argc >= 2 && be_isstring(vm, 1) && be_isstring(vm, 2)) {
|
|
|
|
const char * regex_str = be_tostring(vm, 1);
|
|
|
|
const char * hay = be_tostring(vm, 2);
|
2023-05-07 15:18:34 +01:00
|
|
|
int32_t offset = 0;
|
|
|
|
if (argc >= 3 && be_isint(vm, 3)) {
|
|
|
|
offset = be_toint(vm, 3);
|
|
|
|
}
|
|
|
|
int32_t hay_len = strlen(hay);
|
|
|
|
if (offset < 0) { offset = 0; }
|
|
|
|
if (offset >= hay_len) { be_return_nil(vm); } // any match of empty string returns nil, this catches implicitly when hay_len == 0
|
|
|
|
hay += offset; // shift to offset
|
|
|
|
|
2023-05-09 21:10:16 +01:00
|
|
|
int sz = re1_5_sizecode(regex_str);
|
|
|
|
if (sz < 0) {
|
2021-11-25 21:57:37 +00:00
|
|
|
be_raise(vm, "internal_error", "error in regex");
|
2023-05-09 21:10:16 +01:00
|
|
|
}
|
2021-11-25 21:57:37 +00:00
|
|
|
|
|
|
|
ByteProg *code = be_os_malloc(sizeof(ByteProg) + sz);
|
|
|
|
int ret = re1_5_compilecode(code, regex_str);
|
|
|
|
if (ret != 0) {
|
|
|
|
be_raise(vm, "internal_error", "error in regex");
|
|
|
|
}
|
2023-05-07 15:18:34 +01:00
|
|
|
be_re_match_search_run(vm, code, hay, is_anchored, size_only);
|
2022-09-12 21:02:25 +01:00
|
|
|
be_return(vm);
|
|
|
|
}
|
|
|
|
be_raise(vm, "type_error", NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
int be_re_match_search_all(bvm *vm, bbool is_anchored) {
|
|
|
|
int32_t argc = be_top(vm); // Get the number of arguments
|
|
|
|
if (argc >= 2 && be_isstring(vm, 1) && be_isstring(vm, 2)) {
|
|
|
|
const char * regex_str = be_tostring(vm, 1);
|
|
|
|
const char * hay = be_tostring(vm, 2);
|
|
|
|
int limit = -1;
|
|
|
|
if (argc >= 3) {
|
|
|
|
limit = be_toint(vm, 3);
|
|
|
|
}
|
2023-05-09 21:10:16 +01:00
|
|
|
int sz = re1_5_sizecode(regex_str);
|
|
|
|
if (sz < 0) {
|
2022-09-12 21:02:25 +01:00
|
|
|
be_raise(vm, "internal_error", "error in regex");
|
2023-05-09 21:10:16 +01:00
|
|
|
}
|
2022-09-12 21:02:25 +01:00
|
|
|
|
|
|
|
ByteProg *code = be_os_malloc(sizeof(ByteProg) + sz);
|
|
|
|
int ret = re1_5_compilecode(code, regex_str);
|
|
|
|
if (ret != 0) {
|
|
|
|
be_raise(vm, "internal_error", "error in regex");
|
|
|
|
}
|
|
|
|
|
|
|
|
be_newobject(vm, "list");
|
|
|
|
for (int i = limit; i != 0 && hay != NULL; i--) {
|
2023-05-07 15:18:34 +01:00
|
|
|
hay = be_re_match_search_run(vm, code, hay, is_anchored, bfalse);
|
2022-09-12 21:02:25 +01:00
|
|
|
if (hay != NULL) {
|
|
|
|
be_data_push(vm, -2); // add sub list to list
|
|
|
|
}
|
|
|
|
be_pop(vm, 1);
|
|
|
|
}
|
|
|
|
be_pop(vm, 1);
|
|
|
|
be_return(vm);
|
2021-11-25 21:57:37 +00:00
|
|
|
}
|
|
|
|
be_raise(vm, "type_error", NULL);
|
|
|
|
}
|
|
|
|
|
2023-05-07 15:18:34 +01:00
|
|
|
// Berry: `re.match(s:string [, offset:int]) -> nil`
|
2021-11-25 21:57:37 +00:00
|
|
|
int be_re_match(bvm *vm) {
|
2023-05-07 15:18:34 +01:00
|
|
|
return be_re_match_search(vm, btrue, bfalse);
|
2021-11-25 21:57:37 +00:00
|
|
|
}
|
2023-05-07 15:18:34 +01:00
|
|
|
// Berry: `re.match2(s:string [, offset:int]) -> nil`
|
|
|
|
int be_re_match2(bvm *vm) {
|
|
|
|
return be_re_match_search(vm, btrue, btrue);
|
|
|
|
}
|
|
|
|
// Berry: `re.search(s:string [, offset:int]) -> nil`
|
2021-11-25 21:57:37 +00:00
|
|
|
int be_re_search(bvm *vm) {
|
2023-05-07 15:18:34 +01:00
|
|
|
return be_re_match_search(vm, bfalse, bfalse);
|
2021-11-25 21:57:37 +00:00
|
|
|
}
|
|
|
|
|
2022-09-12 21:02:25 +01:00
|
|
|
// Berry: `re.search_all`
|
|
|
|
int be_re_match_all(bvm *vm) {
|
|
|
|
return be_re_match_search_all(vm, btrue);
|
|
|
|
}
|
|
|
|
// Berry: `re.search_all`
|
|
|
|
int be_re_search_all(bvm *vm) {
|
|
|
|
return be_re_match_search_all(vm, bfalse);
|
|
|
|
}
|
|
|
|
|
2023-05-07 15:18:34 +01:00
|
|
|
// Berry: `re_pattern.search(s:string [, offset:int]) -> list(string)`
|
2021-11-25 21:57:37 +00:00
|
|
|
int re_pattern_search(bvm *vm) {
|
|
|
|
int32_t argc = be_top(vm); // Get the number of arguments
|
|
|
|
if (argc >= 2 && be_isstring(vm, 2)) {
|
|
|
|
const char * hay = be_tostring(vm, 2);
|
2023-05-07 15:18:34 +01:00
|
|
|
int32_t offset = 0;
|
|
|
|
if (argc >= 3 && be_isint(vm, 3)) {
|
|
|
|
offset = be_toint(vm, 3);
|
|
|
|
}
|
|
|
|
int32_t hay_len = strlen(hay);
|
|
|
|
if (offset < 0) { offset = 0; }
|
|
|
|
if (offset >= hay_len) { be_return_nil(vm); } // any match of empty string returns nil, this catches implicitly when hay_len == 0
|
|
|
|
hay += offset; // shift to offset
|
2021-11-25 21:57:37 +00:00
|
|
|
be_getmember(vm, 1, "_p");
|
|
|
|
ByteProg * code = (ByteProg*) be_tocomptr(vm, -1);
|
2023-05-07 15:18:34 +01:00
|
|
|
be_re_match_search_run(vm, code, hay, bfalse, bfalse);
|
2022-09-12 21:02:25 +01:00
|
|
|
be_return(vm);
|
2021-11-25 21:57:37 +00:00
|
|
|
}
|
|
|
|
be_raise(vm, "type_error", NULL);
|
|
|
|
}
|
|
|
|
|
2023-04-16 16:43:49 +01:00
|
|
|
// Berry: `re_pattern.searchall(s:string) -> list(list(string))`
|
|
|
|
int re_pattern_match_search_all(bvm *vm, bbool is_anchored) {
|
|
|
|
int32_t argc = be_top(vm); // Get the number of arguments
|
|
|
|
if (argc >= 2 && be_isstring(vm, 2)) {
|
|
|
|
const char * hay = be_tostring(vm, 2);
|
|
|
|
be_getmember(vm, 1, "_p");
|
|
|
|
ByteProg * code = (ByteProg*) be_tocomptr(vm, -1);
|
|
|
|
int limit = -1;
|
|
|
|
if (argc >= 3) {
|
|
|
|
limit = be_toint(vm, 3);
|
|
|
|
}
|
|
|
|
|
|
|
|
be_newobject(vm, "list");
|
|
|
|
for (int i = limit; i != 0 && hay != NULL; i--) {
|
2023-05-07 15:18:34 +01:00
|
|
|
hay = be_re_match_search_run(vm, code, hay, is_anchored, bfalse);
|
2023-04-16 16:43:49 +01:00
|
|
|
if (hay != NULL) {
|
|
|
|
be_data_push(vm, -2); // add sub list to list
|
|
|
|
}
|
|
|
|
be_pop(vm, 1);
|
|
|
|
}
|
|
|
|
be_pop(vm, 1);
|
|
|
|
be_return(vm);
|
|
|
|
}
|
|
|
|
be_raise(vm, "type_error", NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Berry: `re_pattern.searchall(s:string) -> list(list(string))`
|
|
|
|
int re_pattern_search_all(bvm *vm) {
|
|
|
|
return re_pattern_match_search_all(vm, bfalse);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Berry: `re_pattern.matchall(s:string) -> list(list(string))`
|
|
|
|
int re_pattern_match_all(bvm *vm) {
|
|
|
|
return re_pattern_match_search_all(vm, btrue);
|
|
|
|
}
|
|
|
|
|
2023-05-07 15:18:34 +01:00
|
|
|
// Berry: `re_pattern.match(s:string [, offset:int]) -> list(string)`
|
|
|
|
int re_pattern_match_size(bvm *vm, bbool size_only) {
|
2021-11-25 21:57:37 +00:00
|
|
|
int32_t argc = be_top(vm); // Get the number of arguments
|
|
|
|
if (argc >= 2 && be_isstring(vm, 2)) {
|
|
|
|
const char * hay = be_tostring(vm, 2);
|
2023-05-07 15:18:34 +01:00
|
|
|
int32_t offset = 0;
|
|
|
|
if (argc >= 3 && be_isint(vm, 3)) {
|
|
|
|
offset = be_toint(vm, 3);
|
|
|
|
}
|
|
|
|
int32_t hay_len = strlen(hay);
|
|
|
|
if (offset < 0) { offset = 0; }
|
|
|
|
if (offset >= hay_len) { be_return_nil(vm); } // any match of empty string returns nil, this catches implicitly when hay_len == 0
|
|
|
|
hay += offset; // shift to offset
|
2021-11-25 21:57:37 +00:00
|
|
|
be_getmember(vm, 1, "_p");
|
|
|
|
ByteProg * code = (ByteProg*) be_tocomptr(vm, -1);
|
2023-05-07 15:18:34 +01:00
|
|
|
be_re_match_search_run(vm, code, hay, btrue, size_only);
|
2022-09-12 21:02:25 +01:00
|
|
|
be_return(vm);
|
2021-11-25 21:57:37 +00:00
|
|
|
}
|
|
|
|
be_raise(vm, "type_error", NULL);
|
|
|
|
}
|
|
|
|
|
2023-05-07 15:18:34 +01:00
|
|
|
// Berry: `re_pattern.match(s:string [, offset:int]) -> list(string)`
|
|
|
|
int re_pattern_match(bvm *vm) {
|
|
|
|
return re_pattern_match_size(vm, bfalse);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Berry: `re_pattern.match(s:string [, offset:int]) -> list(string)`
|
|
|
|
int re_pattern_match2(bvm *vm) {
|
|
|
|
return re_pattern_match_size(vm, btrue);
|
|
|
|
}
|
2021-11-26 18:03:21 +00:00
|
|
|
|
|
|
|
int re_pattern_split_run(bvm *vm, ByteProg *code, const char *hay, int split_limit) {
|
|
|
|
Subject subj = {hay, hay + strlen(hay)};
|
|
|
|
|
|
|
|
int sub_els = (code->sub + 1) * 2;
|
|
|
|
const char *sub[sub_els];
|
|
|
|
|
|
|
|
be_newobject(vm, "list");
|
|
|
|
while (1) {
|
|
|
|
if (split_limit == 0 || !re1_5_recursiveloopprog(code, &subj, sub, sub_els, bfalse)) {
|
|
|
|
be_pushnstring(vm, subj.begin, subj.end - subj.begin);
|
|
|
|
be_data_push(vm, -2);
|
|
|
|
be_pop(vm, 1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sub[0] == NULL || sub[1] == NULL || sub[0] == sub[1]) {
|
|
|
|
be_raise(vm, "internal_error", "can't match");
|
|
|
|
}
|
|
|
|
be_pushnstring(vm, subj.begin, sub[0] - subj.begin);
|
|
|
|
be_data_push(vm, -2);
|
|
|
|
be_pop(vm, 1);
|
|
|
|
subj.begin = sub[1];
|
|
|
|
split_limit--;
|
|
|
|
}
|
|
|
|
be_pop(vm, 1); // remove list
|
|
|
|
be_return(vm); // return list object
|
|
|
|
}
|
|
|
|
|
|
|
|
// Berry: `re_pattern.split(s:string [, split_limit:int]) -> list(string)`
|
2021-11-25 21:57:37 +00:00
|
|
|
int re_pattern_split(bvm *vm) {
|
|
|
|
int32_t argc = be_top(vm); // Get the number of arguments
|
|
|
|
if (argc >= 2 && be_isstring(vm, 2)) {
|
2021-11-26 18:03:21 +00:00
|
|
|
int split_limit = -1;
|
|
|
|
if (argc >= 3) {
|
|
|
|
split_limit = be_toint(vm, 3);
|
|
|
|
}
|
2021-11-25 21:57:37 +00:00
|
|
|
const char * hay = be_tostring(vm, 2);
|
|
|
|
be_getmember(vm, 1, "_p");
|
|
|
|
ByteProg * code = (ByteProg*) be_tocomptr(vm, -1);
|
2021-11-26 18:03:21 +00:00
|
|
|
|
|
|
|
return re_pattern_split_run(vm, code, hay, split_limit);
|
|
|
|
}
|
|
|
|
be_raise(vm, "type_error", NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Berry: `re.split(pattern:string, s:string [, split_limit:int]) -> list(string)`
|
|
|
|
int be_re_split(bvm *vm) {
|
|
|
|
int32_t argc = be_top(vm); // Get the number of arguments
|
|
|
|
if (argc >= 2 && be_isstring(vm, 1) && be_isstring(vm, 2)) {
|
|
|
|
const char * regex_str = be_tostring(vm, 1);
|
|
|
|
const char * hay = be_tostring(vm, 2);
|
|
|
|
int split_limit = -1;
|
|
|
|
if (argc >= 3) {
|
|
|
|
split_limit = be_toint(vm, 3);
|
2021-11-25 21:57:37 +00:00
|
|
|
}
|
2023-05-09 21:10:16 +01:00
|
|
|
int sz = re1_5_sizecode(regex_str);
|
|
|
|
if (sz < 0) {
|
2021-11-26 18:03:21 +00:00
|
|
|
be_raise(vm, "internal_error", "error in regex");
|
2023-05-09 21:10:16 +01:00
|
|
|
}
|
2021-11-25 21:57:37 +00:00
|
|
|
|
2021-11-26 18:03:21 +00:00
|
|
|
ByteProg *code = be_os_malloc(sizeof(ByteProg) + sz);
|
|
|
|
int ret = re1_5_compilecode(code, regex_str);
|
|
|
|
if (ret != 0) {
|
|
|
|
be_raise(vm, "internal_error", "error in regex");
|
|
|
|
}
|
|
|
|
return re_pattern_split_run(vm, code, hay, split_limit);
|
2021-11-25 21:57:37 +00:00
|
|
|
}
|
|
|
|
be_raise(vm, "type_error", NULL);
|
|
|
|
}
|
|
|
|
|
2022-09-12 21:02:25 +01:00
|
|
|
#include "../generate/be_fixed_re.h"
|
|
|
|
#include "../generate/be_fixed_be_class_re_pattern.h"
|
|
|
|
/*
|
|
|
|
@const_object_info_begin
|
|
|
|
module re (scope: global) {
|
|
|
|
compile, func(be_re_compile)
|
|
|
|
search, func(be_re_search)
|
|
|
|
searchall, func(be_re_search_all)
|
|
|
|
match, func(be_re_match)
|
2023-05-07 15:18:34 +01:00
|
|
|
match2, func(be_re_match2)
|
2022-09-12 21:02:25 +01:00
|
|
|
matchall, func(be_re_match_all)
|
|
|
|
split, func(be_re_split)
|
|
|
|
}
|
|
|
|
@const_object_info_end
|
|
|
|
|
|
|
|
@const_object_info_begin
|
|
|
|
class be_class_re_pattern (scope: global, name: re_pattern) {
|
|
|
|
_p, var
|
|
|
|
search, func(re_pattern_search)
|
2023-04-16 16:43:49 +01:00
|
|
|
searchall, func(re_pattern_search_all)
|
2022-09-12 21:02:25 +01:00
|
|
|
match, func(re_pattern_match)
|
2023-05-07 15:18:34 +01:00
|
|
|
match2, func(re_pattern_match2)
|
2023-04-16 16:43:49 +01:00
|
|
|
matchall, func(re_pattern_match_all)
|
2022-09-12 21:02:25 +01:00
|
|
|
split, func(re_pattern_split)
|
|
|
|
}
|
|
|
|
@const_object_info_end */
|