Merge pull request #15325 from s-hadinger/berry_solidify_literal

Berry solidification toggle for string literals
This commit is contained in:
s-hadinger 2022-04-09 09:59:47 +02:00 committed by GitHub
commit a0992389d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 142 additions and 41 deletions

View File

@ -33,6 +33,12 @@ extern "C" {
.next = (uint32_t)(_next) & 0xFFFFFF \
}
#define be_const_key_literal(_str, _next) { \
.v.c = be_str_literal(_str), \
.type = BE_STRING, \
.next = (uint32_t)(_next) & 0xFFFFFF \
}
#define be_const_key_int(_i, _next) { \
.v.i = _i, \
.type = BE_INT, \
@ -230,6 +236,13 @@ const bntvmodule be_native_module(_module) = { \
BE_STRING \
}
#define be_nested_str_literal(_name_) \
{ \
{ .s=(be_nested_const_str(_name_, _hash, sizeof(_name_)-1 ))\
}, \
BE_STRING \
}
#define be_str_literal(_str) \
be_nested_const_str(_str, 0, sizeof(_str)-1 )

View File

@ -94,9 +94,63 @@ static void toidentifier(char *to, const char *p)
*to = 0; // final NULL
}
static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, const char *key);
static void m_solidify_map(bvm *vm, bmap * map, const char *class_name)
/********************************************************************\
* Encode string as literals with simple rules
*
* Encode specifically \\, \n, \"
* All other characters outside of 0x20-0x7F is escaped with \x..
\********************************************************************/
static unsigned toliteral_length(const char *s)
{
unsigned len = 1;
const char * p = s;
while (*p) {
if (*p == '\n' || *p == '\\' || *p == '"') {
len += 2;
} else if (*p >= 0x20 && (*p & 0x80) == 0) {
len++;
} else {
len += 4; /* encode as \x.. */
}
p++;
}
return len;
}
static void toliteral(char *to, const char *p)
{
while (*p) {
if (*p == '\n') {
to[0] = '\\';
to[1] = 'n';
to += 2;
} else if (*p == '\\') {
to[0] = '\\';
to[1] = '\\';
to += 2;
} else if (*p == '"') {
to[0] = '\\';
to[1] = '"';
to += 2;
} else if (*p >= 0x20 && (*p & 0x80) == 0) {
*to = *p;
to++;
} else {
to[0] = '\\';
to[1] = 'x';
to[2] = hexdigit((*p & 0xF0) >> 4);
to[3] = hexdigit(*p & 0x0F);
to += 4;
}
p++;
}
*to = 0; // final NULL
}
static void m_solidify_bvalue(bvm *vm, bbool str_literal, bvalue * value, const char *classname, const char *key);
static void m_solidify_map(bvm *vm, bbool str_literal, bmap * map, const char *class_name)
{
// compact first
be_map_compact(vm, map);
@ -116,14 +170,21 @@ static void m_solidify_map(bvm *vm, bmap * map, const char *class_name)
if (node->key.type == BE_STRING) {
/* convert the string literal to identifier */
const char * key = str(node->key.v.s);
size_t id_len = toidentifier_length(key);
char id_buf[id_len];
toidentifier(id_buf, key);
logfmt(" { be_const_key(%s, %i), ", id_buf, key_next);
m_solidify_bvalue(vm, &node->value, class_name, str(node->key.v.s));
if (!str_literal) {
size_t id_len = toidentifier_length(key);
char id_buf[id_len];
toidentifier(id_buf, key);
logfmt(" { be_const_key(%s, %i), ", id_buf, key_next);
} else {
size_t id_len = toliteral_length(key);
char id_buf[id_len];
toliteral(id_buf, key);
logfmt(" { be_const_key_literal(\"%s\", %i), ", id_buf, key_next);
}
m_solidify_bvalue(vm, str_literal, &node->value, class_name, str(node->key.v.s));
} else if (node->key.type == BE_INT) {
logfmt(" { be_const_key_int(%i, %i), ", node->key.v.i, key_next);
m_solidify_bvalue(vm, &node->value, class_name, NULL);
m_solidify_bvalue(vm, str_literal, &node->value, class_name, NULL);
} else {
char error[64];
snprintf(error, sizeof(error), "Unsupported type in key: %i", node->key.type);
@ -136,21 +197,21 @@ static void m_solidify_map(bvm *vm, bmap * map, const char *class_name)
}
static void m_solidify_list(bvm *vm, blist * list, const char *class_name)
static void m_solidify_list(bvm *vm, bbool str_literal, blist * list, const char *class_name)
{
logfmt(" be_nested_list(%i,\n", list->count);
logfmt(" ( (struct bvalue*) &(const bvalue[]) {\n");
for (int i = 0; i < list->count; i++) {
logfmt(" ");
m_solidify_bvalue(vm, &list->data[i], class_name, "");
m_solidify_bvalue(vm, str_literal, &list->data[i], class_name, "");
logfmt(",\n");
}
logfmt(" }))"); // TODO need terminal comma?
}
// pass key name in case of class, or NULL if none
static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, const char *key)
static void m_solidify_bvalue(bvm *vm, bbool str_literal, bvalue * value, const char *classname, const char *key)
{
int type = var_primetype(value);
switch (type) {
@ -188,10 +249,17 @@ static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, co
if (len >= 255) {
be_raise(vm, "internal_error", "Strings greater than 255 chars not supported yet");
}
size_t id_len = toidentifier_length(str);
char id_buf[id_len];
toidentifier(id_buf, str);
logfmt("be_nested_str(%s)", id_buf);
if (!str_literal) {
size_t id_len = toidentifier_length(str);
char id_buf[id_len];
toidentifier(id_buf, str);
logfmt("be_nested_str(%s)", id_buf);
} else {
size_t id_len = toliteral_length(str);
char id_buf[id_len];
toliteral(id_buf, str);
logfmt("be_nested_str_literal(\"%s\")", id_buf);
}
}
break;
case BE_CLOSURE:
@ -235,16 +303,16 @@ static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, co
} else {
logfmt(" be_const_list( * ");
}
m_solidify_bvalue(vm, &ins->members[0], classname, key);
m_solidify_bvalue(vm, str_literal, &ins->members[0], classname, key);
logfmt(" ) } ))");
}
}
break;
case BE_MAP:
m_solidify_map(vm, (bmap *) var_toobj(value), classname);
m_solidify_map(vm, str_literal, (bmap *) var_toobj(value), classname);
break;
case BE_LIST:
m_solidify_list(vm, (blist *) var_toobj(value), classname);
m_solidify_list(vm, str_literal, (blist *) var_toobj(value), classname);
break;
default:
{
@ -255,24 +323,24 @@ static void m_solidify_bvalue(bvm *vm, bvalue * value, const char *classname, co
}
}
static void m_solidify_subclass(bvm *vm, bclass *cl, int builtins);
static void m_solidify_subclass(bvm *vm, bbool str_literal, bclass *cl, int builtins);
/* solidify any inner class */
static void m_solidify_proto_inner_class(bvm *vm, bproto *pr, int builtins)
static void m_solidify_proto_inner_class(bvm *vm, bbool str_literal, bproto *pr, int builtins)
{
// parse any class in constants to output it first
if (pr->nconst > 0) {
for (int k = 0; k < pr->nconst; k++) {
if (var_type(&pr->ktab[k]) == BE_CLASS) {
// output the class
m_solidify_subclass(vm, (bclass*) var_toobj(&pr->ktab[k]), builtins);
m_solidify_subclass(vm, str_literal, (bclass*) var_toobj(&pr->ktab[k]), builtins);
}
}
}
}
static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int builtins, int indent)
static void m_solidify_proto(bvm *vm, bbool str_literal, bproto *pr, const char * func_name, int builtins, int indent)
{
// const char * func_name = str(pr->name);
// const char * func_source = str(pr->source);
@ -302,7 +370,7 @@ static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int bu
size_t sub_len = strlen(func_name) + 10;
char sub_name[sub_len];
snprintf(sub_name, sizeof(sub_name), "%s_%d", func_name, i);
m_solidify_proto(vm, pr->ptab[i], sub_name, builtins, indent+2);
m_solidify_proto(vm, str_literal, pr->ptab[i], sub_name, builtins, indent+2);
logfmt(",\n");
}
logfmt("%*s}),\n", indent, "");
@ -315,7 +383,7 @@ static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int bu
logfmt("%*s( &(const bvalue[%2d]) { /* constants */\n", indent, "", pr->nconst);
for (int k = 0; k < pr->nconst; k++) {
logfmt("%*s/* K%-3d */ ", indent, "", k);
m_solidify_bvalue(vm, &pr->ktab[k], NULL, NULL);
m_solidify_bvalue(vm, str_literal, &pr->ktab[k], NULL, NULL);
logfmt(",\n");
}
logfmt("%*s}),\n", indent, "");
@ -324,12 +392,18 @@ static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int bu
}
/* convert the string literal to identifier */
{
if (!str_literal) {
const char * key = str(pr->name);
size_t id_len = toidentifier_length(key);
char id_buf[id_len];
toidentifier(id_buf, key);
logfmt("%*s&be_const_str_%s,\n", indent, "", id_buf);
} else {
const char * key = str(pr->name);
size_t id_len = toliteral_length(key);
char id_buf[id_len];
toliteral(id_buf, key);
logfmt("%*sbe_str_literal(\"%s\"),\n", indent, "", id_buf);
}
// hard-code source as "solidified" for solidified
logfmt("%*s&be_const_str_solidified,\n", indent, "");
@ -356,7 +430,7 @@ static void m_solidify_proto(bvm *vm, bproto *pr, const char * func_name, int bu
}
static void m_solidify_closure(bvm *vm, bclosure *cl, const char * classname, int builtins)
static void m_solidify_closure(bvm *vm, bbool str_literal, bclosure *cl, const char * classname, int builtins)
{
bproto *pr = cl->proto;
const char * func_name = str(pr->name);
@ -368,7 +442,7 @@ static void m_solidify_closure(bvm *vm, bclosure *cl, const char * classname, in
int indent = 2;
m_solidify_proto_inner_class(vm, pr, builtins);
m_solidify_proto_inner_class(vm, str_literal, pr, builtins);
logfmt("\n");
logfmt("/********************************************************************\n");
@ -384,7 +458,7 @@ static void m_solidify_closure(bvm *vm, bclosure *cl, const char * classname, in
func_name_id);
}
m_solidify_proto(vm, pr, func_name, builtins, indent);
m_solidify_proto(vm, str_literal, pr, func_name, builtins, indent);
logfmt("\n");
// closure
@ -392,7 +466,7 @@ static void m_solidify_closure(bvm *vm, bclosure *cl, const char * classname, in
logfmt("/*******************************************************************/\n\n");
}
static void m_solidify_subclass(bvm *vm, bclass *cl, int builtins)
static void m_solidify_subclass(bvm *vm, bbool str_literal, bclass *cl, int builtins)
{
const char * class_name = str(cl->name);
@ -403,7 +477,7 @@ static void m_solidify_subclass(bvm *vm, bclass *cl, int builtins)
while ((node = be_map_next(cl->members, &iter)) != NULL) {
if (var_isstr(&node->key) && var_isclosure(&node->value)) {
bclosure *f = var_toobj(&node->value);
m_solidify_closure(vm, f, class_name, builtins);
m_solidify_closure(vm, str_literal, f, class_name, builtins);
}
}
}
@ -427,22 +501,32 @@ static void m_solidify_subclass(bvm *vm, bclass *cl, int builtins)
}
if (cl->members) {
m_solidify_map(vm, cl->members, class_name);
m_solidify_map(vm, str_literal, cl->members, class_name);
logfmt(",\n");
} else {
logfmt(" NULL,\n");
}
logfmt(" be_str_literal(\"%s\")\n", class_name);
if (!str_literal) {
size_t id_len = toidentifier_length(class_name);
char id_buf[id_len];
toidentifier(id_buf, class_name);
logfmt(" &be_const_str_%s,\n", id_buf);
} else {
size_t id_len = toliteral_length(class_name);
char id_buf[id_len];
toliteral(id_buf, class_name);
logfmt(" be_str_literal(\"%s\")\n", id_buf);
}
logfmt(");\n");
}
static void m_solidify_class(bvm *vm, bclass *cl, int builtins)
static void m_solidify_class(bvm *vm, bbool str_literal, bclass *cl, int builtins)
{
const char * class_name = str(cl->name);
m_solidify_subclass(vm, cl, builtins);
m_solidify_subclass(vm, str_literal, cl, builtins);
logfmt("/*******************************************************************/\n\n");
logfmt("void be_load_%s_class(bvm *vm) {\n", class_name);
@ -452,7 +536,7 @@ static void m_solidify_class(bvm *vm, bclass *cl, int builtins)
logfmt("}\n");
}
static void m_solidify_module(bvm *vm, bmodule *ml, int builtins)
static void m_solidify_module(bvm *vm, bbool str_literal, bmodule *ml, int builtins)
{
const char * module_name = be_module_name(ml);
if (!module_name) { module_name = ""; }
@ -464,11 +548,11 @@ static void m_solidify_module(bvm *vm, bmodule *ml, int builtins)
while ((node = be_map_next(ml->table, &iter)) != NULL) {
if (var_isstr(&node->key) && var_isclosure(&node->value)) {
bclosure *f = var_toobj(&node->value);
m_solidify_closure(vm, f, module_name, builtins);
m_solidify_closure(vm, str_literal, f, module_name, builtins);
}
if (var_isstr(&node->key) && var_isclass(&node->value)) {
bclass *cl = var_toobj(&node->value);
m_solidify_subclass(vm, cl, builtins);
m_solidify_subclass(vm, str_literal, cl, builtins);
}
}
}
@ -483,7 +567,7 @@ static void m_solidify_module(bvm *vm, bmodule *ml, int builtins)
logfmt(" \"%s\",\n", module_name);
if (ml->table) {
m_solidify_map(vm, ml->table, module_name);
m_solidify_map(vm, str_literal, ml->table, module_name);
logfmt("\n");
} else {
logfmt(" NULL,\n");
@ -501,12 +585,16 @@ static int m_dump(bvm *vm)
{
if (be_top(vm) >= 1) {
bvalue *v = be_indexof(vm, 1);
bbool str_literal = bfalse;
if (be_top(vm) >= 2) {
str_literal = be_tobool(vm, 2);
}
if (var_isclosure(v)) {
m_solidify_closure(vm, var_toobj(v), NULL, be_builtin_count(vm));
m_solidify_closure(vm, str_literal, var_toobj(v), NULL, be_builtin_count(vm));
} else if (var_isclass(v)) {
m_solidify_class(vm, var_toobj(v), be_builtin_count(vm));
m_solidify_class(vm, str_literal, var_toobj(v), be_builtin_count(vm));
} else if (var_ismodule(v)) {
m_solidify_module(vm, var_toobj(v), be_builtin_count(vm));
m_solidify_module(vm, str_literal, var_toobj(v), be_builtin_count(vm));
} else {
be_raise(vm, "value_error", "unsupported type");
}