Tasmota/lib/libesp32/Berry/src/be_strlib.c

784 lines
22 KiB
C

/********************************************************************
** Copyright (c) 2018-2020 Guan Wenliang
** This file is part of the Berry default interpreter.
** skiars@qq.com, https://github.com/Skiars/berry
** See Copyright Notice in the LICENSE file or at
** https://github.com/Skiars/berry/blob/master/LICENSE
********************************************************************/
#include "be_strlib.h"
#include "be_string.h"
#include "be_vm.h"
#include "be_class.h"
#include "be_module.h"
#include "be_exec.h"
#include "be_mem.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#define is_space(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || (c) == '\n')
#define is_digit(c) ((c) >= '0' && (c) <= '9')
#define skip_space(s) while (is_space(*(s))) { ++(s); }
typedef bint (*str_opfunc)(const char*, const char*, bint, bint);
bstring* be_strcat(bvm *vm, bstring *s1, bstring *s2)
{
size_t len = (size_t)str_len(s1) + str_len(s2);
if (len <= SHORT_STR_MAX_LEN) {
char buf[SHORT_STR_MAX_LEN + 1];
strcpy(buf, str(s1));
strncat(buf, str(s2), len);
return be_newstrn(vm, buf, len);
} else { /* long string */
bstring *s = be_newstrn(vm, NULL, len);
char *sbuf = (char*)str(s);
strcpy(sbuf, str(s1));
strcpy(sbuf + str_len(s1), str(s2));
return s;
}
}
int be_strcmp(bstring *s1, bstring *s2)
{
if (be_eqstr(s1, s2)) {
return 0;
}
return strcmp(str(s1), str(s2));
}
bstring* be_num2str(bvm *vm, bvalue *v)
{
char buf[25];
if (var_isint(v)) {
sprintf(buf, BE_INT_FORMAT, var_toint(v));
} else if (var_isreal(v)) {
sprintf(buf, "%g", var_toreal(v));
} else {
sprintf(buf, "(nan)");
}
return be_newstr(vm, buf);
}
static void module2str(char *buf, bvalue *v)
{
const char *name = be_module_name(cast(bmodule*, var_toobj(v)));
if (name) {
sprintf(buf, "<module: %s>", name);
} else {
sprintf(buf, "<module: %p>", var_toobj(v));
}
}
static bstring* sim2str(bvm *vm, bvalue *v)
{
char sbuf[64]; /* BUG: memory overflow */
switch (var_type(v)) {
case BE_NIL:
strcpy(sbuf, "nil");
break;
case BE_BOOL:
strcpy(sbuf, var_tobool(v) ? "true" : "false");
break;
case BE_INT:
sprintf(sbuf, BE_INT_FORMAT, var_toint(v));
break;
case BE_REAL:
sprintf(sbuf, "%g", var_toreal(v));
break;
case BE_CLOSURE: case BE_NTVCLOS: case BE_NTVFUNC:
sprintf(sbuf, "<function: %p>", var_toobj(v));
break;
case BE_CLASS:
sprintf(sbuf, "<class: %s>",
str(be_class_name(cast(bclass*, var_toobj(v)))));
break;
case BE_MODULE:
module2str(sbuf, v);
break;
default:
strcpy(sbuf, "(unknow value)");
break;
}
return be_newstr(vm, sbuf);
}
static bstring* ins2str(bvm *vm, int idx)
{
bstring *s = str_literal(vm, "tostring");
binstance *obj = var_toobj(vm->reg + idx);
/* get method 'tostring' */
int type = be_instance_member(vm, obj, s, vm->top);
be_incrtop(vm); /* push the obj::tostring to stack */
if (basetype(type) != BE_FUNCTION) {
bstring *name = be_class_name(be_instance_class(obj));
char *sbuf = be_malloc(vm, (size_t)str_len(name) + 16);
sprintf(sbuf, "<instance: %s()>", str(name));
be_stackpop(vm, 1); /* pop the obj::tostring */
s = be_newstr(vm, sbuf);
be_free(vm, sbuf, (size_t)str_len(name) + 16);
} else {
*vm->top = vm->reg[idx];
be_dofunc(vm, vm->top - 1, 1);
be_stackpop(vm, 1); /* pop the obj::tostring */
if (!var_isstr(vm->top)) { /* check the return value */
const char *name = str(be_instance_name(obj));
be_raise(vm, "runtime_error", be_pushfstring(vm,
"the value of `%s::tostring()` is not a 'string'",
strlen(name) ? name : "<anonymous>"));
}
s = var_tostr(vm->top);
}
return s;
}
void be_val2str(bvm *vm, int index)
{
bstring *s;
int idx = be_absindex(vm, index) - 1;
bvalue *v = vm->reg + idx;
if (var_isstr(v)) return; /* do nothing */
s = var_isinstance(v) ? ins2str(vm, idx) : sim2str(vm, v);
v = vm->reg + idx; /* the stack may change */
var_setstr(v, s);
}
static void pushstr(bvm *vm, const char *s, size_t len)
{
/* to create a string and then update the top pointer,
* otherwise the GC may crash due to uninitialized values.
**/
bstring *str = be_newstrn(vm, s, len);
bvalue *reg = be_incrtop(vm);
var_setstr(reg, str);
}
static const char* concat2(bvm *vm)
{
bvalue *dst = vm->top - 2;
bstring *s1 = var_tostr(dst);
bstring *s2 = var_tostr(dst + 1);
bstring *s = be_strcat(vm, s1, s2);
be_assert(var_isstr(vm->top - 2) && var_isstr(vm->top - 1));
dst = vm->top - 2; /* the stack may change */
var_setstr(dst, s);
--vm->top;
return str(s);
}
const char* be_pushvfstr(bvm *vm, const char *format, va_list arg)
{
pushstr(vm, "", 0);
for (;;) {
const char *p = strchr(format, '%');
if (p == NULL) {
break;
}
pushstr(vm, format, p - format);
concat2(vm);
switch (p[1]) {
case 's': {
const char *s = va_arg(arg, char*);
if (s == NULL) {
s = "(null)";
}
pushstr(vm, s, strlen(s));
break;
}
case 'd': {
bstring *s;
bvalue *v = be_incrtop(vm);
var_setint(v, va_arg(arg, int));
s = be_num2str(vm, v);
var_setstr(v, s);
break;
}
case 'f': case 'g': {
bstring *s;
bvalue *v = be_incrtop(vm);
var_setreal(v, cast(breal, va_arg(arg, double)));
s = be_num2str(vm, v);
var_setstr(v, s);
break;
}
case 'c': {
char c = cast(char, va_arg(arg, int));
pushstr(vm, &c, 1);
break;
}
case '%': {
pushstr(vm, "%", 1);
break;
}
case 'p': {
char buf[2 * sizeof(void*) + 4];
sprintf(buf, "%p", va_arg(arg, void*));
pushstr(vm, buf, strlen(buf));
break;
}
default:
pushstr(vm, "(unknow)", 8);
break;
}
concat2(vm);
format = p + 2;
}
pushstr(vm, format, strlen(format));
return concat2(vm);
}
/*******************************************************************
* the function be_str2int():
* >>-+------------+--+-----+----digits----><
* '-whitespace-' +- + -+
* '- - -'
*******************************************************************/
BERRY_API bint be_str2int(const char *str, const char **endstr)
{
int c, sign;
bint sum = 0;
skip_space(str);
sign = c = *str++;
if (c == '+' || c == '-') {
c = *str++;
}
while (is_digit(c)) {
sum = sum * 10 + c - '0';
c = *str++;
}
if (endstr) {
*endstr = str - 1;
}
return sign == '-' ? -sum : sum;
}
/*******************************************************************
* the function be_str2real():
* >>-+------------+--+-----+--+-digits--+---+--+--------+-+------->
* '-whitespace-' +- + -+ | '-.-' '-digits-' |
* '- - -' '-.--digits-----------------'
*
* >--+------------------------+----------------------------------><
* '-+-e-+--+-----+--digits-'
* '-E-' +- + -+
* '- - -'
*******************************************************************/
BERRY_API breal be_str2real(const char *str, const char **endstr)
{
int c, sign;
breal sum = 0, deci = 0, point = (breal)0.1;
skip_space(str);
sign = c = *str++;
if (c == '+' || c == '-') {
c = *str++;
}
while (is_digit(c)) {
sum = sum * 10 + c - '0';
c = *str++;
}
if (c == '.') {
c = *str++;
while (is_digit(c)) {
deci = deci + ((breal)c - '0') * point;
point *= (breal)0.1;
c = *str++;
}
}
sum = sum + deci;
if (c == 'e' || c == 'E') {
int e = 0;
breal ratio = (c = *str++) == '-' ? (breal)0.1 : 10;
if (c == '+' || c == '-') {
c = *str++;
}
while (is_digit(c)) {
e = e * 10 + c - '0';
c = *str++;
}
while (e--) {
sum *= ratio;
}
}
if (endstr) {
*endstr = str - 1;
}
return sign == '-' ? -sum : sum;
}
/* convert a string to a number (integer or real).
* 1. skip \s*[\+\-]?\d*
* 2. matched [.eE]? yes: real, no: integer.
**/
BERRY_API const char *be_str2num(bvm *vm, const char *str)
{
const char *sout;
bint c, vint = be_str2int(str, &sout);
c = *sout;
if (c == '.' || c == 'e' || c == 'E') {
be_pushreal(vm, be_str2real(str, &sout));
} else {
be_pushint(vm, vint);
}
return sout;
}
/* string subscript operation */
bstring* be_strindex(bvm *vm, bstring *str, bvalue *idx)
{
if (var_isint(idx)) {
int pos = var_toidx(idx);
if (pos < str_len(str)) {
return be_newstrn(vm, str(str) + pos, 1);
}
be_raise(vm, "index_error", "string index out of range");
}
be_raise(vm, "index_error", "string indices must be integers");
return NULL;
}
size_t be_strlcpy(char *dst, const char *src, size_t maxlen)
{
const size_t srclen = strlen(src);
if (srclen + 1 < maxlen) {
memcpy(dst, src, srclen + 1);
} else if (maxlen != 0) {
memcpy(dst, src, maxlen - 1);
dst[maxlen-1] = '\0';
}
return srclen;
}
const char* be_splitpath(const char *path)
{
const char *p;
for (p = path - 1; *path != '\0'; ++path) {
if (*path == '/') {
p = path;
}
}
return p + 1; /* return the file name pointer */
}
const char* be_splitname(const char *path)
{
const char *p, *q, *end = path + strlen(path);
for (p = end; *p != '.' && p > path; --p); /* skip [^\.] */
for (q = p; *q == '.' && q > path; --q); /* skip \. */
if ((q == path && *q == '.') || *q == '/') {
return end;
}
return p;
}
static unsigned escape_length(const char *s, int quote)
{
unsigned c, len = 0, step = quote == '"' ? 5 : 3;
for (; (c = *s) != '\0'; ++s) {
switch (c) {
case '\\': case '\n': case '\r': case '\t':
len += 1;
break;
default:
if (c < 0x20)
len += step;
else if (c == (unsigned)quote)
len += 1;
break;
}
}
return len;
}
static unsigned eschex(unsigned num)
{
return num <= 9 ? '0' + num : 'a' + num - 10;
}
/* escape as Berry or JSON */
static char* escape(char *q, unsigned c, int quote)
{
int json = quote == '"';
switch (c) {
case '\\': *q++ = '\\'; *q = '\\'; break;
case '\n': *q++ = '\\'; *q = 'n'; break;
case '\r': *q++ = '\\'; *q = 'r'; break;
case '\t': *q++ = '\\'; *q = 't'; break;
default:
if (c < 0x20) { /* other characters are escaped using '\uxxxx' */
*q++ = '\\';
if (json) {
*q++ = 'u'; *q++ = '0'; *q++ = '0';
*q++ = (char)eschex(c >> 4);
*q = (char)eschex(c & 0x0f);
} else {
*q++ = 'x';
*q++ = (char)eschex(c >> 4);
*q = (char)eschex(c & 0x0f);
}
} else { /* quotes and unescaped characters */
if (c == (unsigned)quote)
*q++ = '\\';
*q = (char)c;
}
break;
}
return q;
}
static void toescape(bvm *vm, int index, int quote)
{
char *buf, *q;
const char *p, *s = be_tostring(vm, index);
size_t len = (size_t)be_strlen(vm, index);
len += escape_length(s, quote) + 2; /* escape length + quote mark */
buf = q = be_pushbuffer(vm, len);
*q++ = (char)quote; /* add first quote */
/* generate escape string */
for (p = s; *p != '\0'; ++p, ++q) {
q = escape(q, *p, quote);
}
*q = (char)quote; /* add last quote */
be_pushnstring(vm, buf, len); /* make escape string from buffer */
be_moveto(vm, -1, index);
be_pop(vm, 2); /* remove buffer & top string */
}
BERRY_API const char* be_toescape(bvm *vm, int index, int mode)
{
if (be_isstring(vm, index)) {
index = be_absindex(vm, index);
toescape(vm, index, mode == 'u' ? '"' : '\'');
}
return be_tostring(vm, index);
}
#if BE_USE_STRING_MODULE
#define MAX_FORMAT_MODE 32
#define FLAGES "+- #0"
static const char* skip2dig(const char *s)
{
if (is_digit(*s)) {
++s;
}
if (is_digit(*s)) {
++s;
}
return s;
}
static const char* get_mode(const char *str, char *buf)
{
const char *p = str;
while (*p && strchr(FLAGES, *p)) { /* skip flags */
++p;
}
p = skip2dig(p); /* skip width (2 digits at most) */
if (*p == '.') {
p = skip2dig(++p); /* skip width (2 digits at most) */
}
*(buf++) = '%';
strncpy(buf, str, p - str + 1);
buf[p - str + 1] = '\0';
return p;
}
static void mode_fixlen(char *mode, const char *lenmode)
{
size_t l = strlen(mode), lm = strlen(lenmode);
char spec = mode[l - 1];
strcpy(mode + l - 1, lenmode);
mode[l + lm - 1] = spec;
mode[l + lm] = '\0';
}
static int str_format(bvm *vm)
{
int top = be_top(vm);
if (top > 0 && be_isstring(vm, 1)) {
int index = 2;
const char *format = be_tostring(vm, 1);
pushstr(vm, "", 0);
for (;;) {
char mode[MAX_FORMAT_MODE];
char buf[128];
const char *p = strchr(format, '%');
if (p == NULL) {
break;
}
pushstr(vm, format, p - format);
concat2(vm);
p = get_mode(p + 1, mode);
buf[0] = '\0';
if (index > top) {
be_raise(vm, "runtime_error", be_pushfstring(vm,
"bad argument #%d to 'format': no value", index));
}
switch (*p) {
case 'd': case 'i': case 'o':
case 'u': case 'x': case 'X':
if (be_isint(vm, index)) {
mode_fixlen(mode, BE_INT_FMTLEN);
sprintf(buf, mode, be_toint(vm, index));
}
be_pushstring(vm, buf);
break;
case 'e': case 'E':
case 'f': case 'g': case 'G':
if (be_isnumber(vm, index)) {
sprintf(buf, mode, be_toreal(vm, index));
}
be_pushstring(vm, buf);
break;
case 'c':
if (be_isint(vm, index)) {
sprintf(buf, "%c", (int)be_toint(vm, index));
}
be_pushstring(vm, buf);
break;
case 's': {
const char *s = be_tostring(vm, index);
int len = be_strlen(vm, 2);
if (len > 100 && strlen(mode) == 2) {
be_pushvalue(vm, index);
} else {
sprintf(buf, mode, s);
be_pushstring(vm, buf);
}
break;
}
default: /* error */
be_raise(vm, "runtime_error", be_pushfstring(vm,
"invalid option '%%%c' to 'format'", *p));
break;
}
concat2(vm);
format = p + 1;
++index;
}
pushstr(vm, format, strlen(format));
concat2(vm);
be_return(vm);
}
be_return_nil(vm);
}
/* string.op(s1, s2, begin=0, end=length(s2)) */
static bint str_operation(bvm *vm, str_opfunc func, bint error)
{
int top = be_top(vm);
/* check the number and type of arguments */
if (top >= 2 && be_isstring(vm, 1) && be_isstring(vm, 2)) {
/* get the operation string and its length */
int len1 = be_strlen(vm, 1);
int len2 = be_strlen(vm, 2);
const char *s1 = be_tostring(vm, 1);
const char *s2 = be_tostring(vm, 2);
/* get begin and end indexes (may use default values) */
bint begin = top >= 3 && be_isint(vm, 3) ? be_toint(vm, 3) : 0;
bint end = top >= 4 && be_isint(vm, 4) ? be_toint(vm, 4) : len1;
/* basic range check:
* 1. begin position must be greater than 0 and
* less than the length of the source string.
* 2. the length of the pattern string cannot be
* less than the matching range (end - begin).
**/
if (begin >= 0 && begin <= len1 && end - begin >= len2) {
/* call the operation function */
return func(s1, s2, begin, end - len2);
}
}
return error; /* returns the default error value */
}
static bint _sfind(const char *s1, const char *s2, bint begin, bint end)
{
const char *res = strstr(s1 + begin, s2);
if (res) {
bint pos = (bint)(res - s1);
return pos <= end ? pos : -1;
}
return -1;
}
static int str_find(bvm *vm)
{
be_pushint(vm, str_operation(vm, _sfind, -1));
be_return(vm);
}
static bint _scount(const char *s1, const char *s2, bint begin, bint end)
{
bint count = 0;
const char *res = s1 + begin, *send = s1 + end;
while ((res = strstr(res, s2)) != NULL && res <= send) {
count += 1;
res += 1;
}
return count;
}
static int str_count(bvm *vm)
{
be_pushint(vm, str_operation(vm, _scount, 0));
be_return(vm);
}
static bbool _split_string(bvm *vm, int top)
{
if (be_isstring(vm, 2)) {
const char *res;
int len1 = be_strlen(vm, 1);
int len2 = be_strlen(vm, 2);
const char *s1 = be_tostring(vm, 1);
const char *s2 = be_tostring(vm, 2);
bint count = len2 /* match when the pattern string is not empty */
? top >= 3 && be_isint(vm, 3) ? be_toint(vm, 3) : len1
: 0; /* cannot match empty pattern string */
while (count-- && (res = strstr(s1, s2)) != NULL) {
be_pushnstring(vm, s1, res - s1);
be_data_push(vm, -2);
be_pop(vm, 1);
s1 = res + len2;
}
be_pushstring(vm, s1);
be_data_push(vm, -2);
be_pop(vm, 1);
return btrue;
}
return bfalse;
}
static bbool _split_index(bvm *vm)
{
if (be_isint(vm, 2)) {
int len = be_strlen(vm, 1), idx = be_toindex(vm, 2);
const char *s = be_tostring(vm, 1);
idx = idx > len ? len : idx < -len ? -len : idx;
if (idx < 0) {
idx += len;
}
be_pushnstring(vm, s, idx);
be_data_push(vm, -2);
be_pop(vm, 1);
be_pushnstring(vm, s + idx, (size_t)len - idx);
be_data_push(vm, -2);
be_pop(vm, 1);
return btrue;
}
return bfalse;
}
static int str_split(bvm *vm)
{
int top = be_top(vm);
be_newobject(vm, "list");
if (top >= 2 && be_isstring(vm, 1)) {
if (!_split_index(vm))
_split_string(vm, top);
}
be_pop(vm, 1);
be_return(vm);
}
static int str_i2hex(bvm *vm)
{
int top = be_top(vm);
if (top && be_isint(vm, 1)) {
bint value = be_toint(vm, 1);
char fmt[10] = { "%" BE_INT_FMTLEN "X" }, buf[18];
if (top >= 2 && be_isint(vm, 2)) {
bint num = be_toint(vm, 2);
if (num > 0 && num <= 16) {
sprintf(fmt, "%%.%d" BE_INT_FMTLEN "X", (int)num);
}
}
sprintf(buf, fmt, value);
be_pushstring(vm, buf);
be_return(vm);
}
be_return_nil(vm);
}
static int str_byte(bvm *vm)
{
if (be_top(vm) && be_isstring(vm, 1)) {
const bbyte *s = (const bbyte *)be_tostring(vm, 1);
be_pushint(vm, *s);
be_return(vm);
}
be_return_nil(vm);
}
static int str_char(bvm *vm)
{
if (be_top(vm) && be_isint(vm, 1)) {
char c = be_toint(vm, 1) & 0xFF;
be_pushnstring(vm, &c, 1);
be_return(vm);
}
be_return_nil(vm);
}
// boolean to select whether we call toupper() or tolower()
static int str_touplower(bvm *vm, bbool up)
{
if (be_top(vm) && be_isstring(vm, 1)) {
const char *p, *s = be_tostring(vm, 1);
size_t len = (size_t)be_strlen(vm, 1);
char *buf, *q;
buf = q = be_pushbuffer(vm, len);
/* convert to lower case */
for (p = s; *p != '\0'; ++p, ++q) {
*q = up ? toupper(*p) : tolower(*p);
}
be_pushnstring(vm, buf, len); /* make escape string from buffer */
be_remove(vm, 2); /* remove buffer */
be_return(vm);
}
be_return_nil(vm);
}
static int str_tolower(bvm *vm) {
return str_touplower(vm, bfalse);
}
static int str_toupper(bvm *vm) {
return str_touplower(vm, btrue);
}
#if !BE_USE_PRECOMPILED_OBJECT
be_native_module_attr_table(string) {
be_native_module_function("format", str_format),
be_native_module_function("count", str_count),
be_native_module_function("split", str_split),
be_native_module_function("find", str_find),
be_native_module_function("hex", str_i2hex),
be_native_module_function("byte", str_byte),
be_native_module_function("char", str_char),
be_native_module_function("tolower", str_tolower),
be_native_module_function("toupper", str_toupper),
};
be_define_native_module(string, NULL);
#else
/* @const_object_info_begin
module string (scope: global, depend: BE_USE_STRING_MODULE) {
format, func(str_format)
count, func(str_count)
split, func(str_split)
find, func(str_find)
hex, func(str_i2hex)
byte, func(str_byte)
char, func(str_char)
tolower, func(str_tolower)
toupper, func(str_toupper)
}
@const_object_info_end */
#include "../generate/be_fixed_string.h"
#endif
#endif /* BE_USE_STRING_MODULE */