From 5255255fb9ea003db65935fe6cf2ac9d17410faa Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 24 Feb 2017 13:43:43 +1100 Subject: [PATCH] py: Create str/bytes objects in the parser, not the compiler. Previous to this patch any non-interned str/bytes objects would create a special parse node that held a copy of the str/bytes data. Then in the compiler this data would be turned into a str/bytes object. This actually lead to 2 copies of the data, one in the parse node and one in the object. The parse node's copy of the data would be freed at the end of the compile stage but nevertheless it meant that the peak memory usage of the parse/compile stage was higher than it needed to be (by an amount equal to the number of bytes in all the non-interned str/bytes objects). This patch changes the behaviour so that str/bytes objects are created directly in the parser and the object stored in a const-object parse node (which already exists for bignum, float and complex const objects). This reduces peak RAM usage of the parse/compile stage, simplifies the parser and compiler, and reduces code size by about 170 bytes on Thumb2 archs, and by about 300 bytes on Xtensa archs. --- py/compile.c | 39 +++++++------------------ py/emitinlinethumb.c | 2 -- py/parse.c | 47 +++++++++--------------------- tests/cmdline/cmd_parsetree.py.exp | 4 +-- 4 files changed, 25 insertions(+), 67 deletions(-) diff --git a/py/compile.c b/py/compile.c index 15e757d464..ca21d8294c 100644 --- a/py/compile.c +++ b/py/compile.c @@ -47,8 +47,6 @@ typedef enum { #include "py/grammar.h" #undef DEF_RULE #undef DEF_RULE_NC - PN_string, // special node for non-interned string - PN_bytes, // special node for non-interned bytes PN_const_object, // special node for a constant, generic Python object // define rules without a compile function #define DEF_RULE(rule, comp, kind, ...) @@ -1880,8 +1878,6 @@ STATIC void compile_expr_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) { } else { // for non-REPL, evaluate then discard the expression if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !MP_PARSE_NODE_IS_ID(pns->nodes[0])) - || MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string) - || MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_bytes) || MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)) { // do nothing with a lonely constant } else { @@ -2600,31 +2596,17 @@ STATIC void compile_atom_expr_await(compiler_t *comp, mp_parse_node_struct_t *pn } #endif -STATIC void compile_string(compiler_t *comp, mp_parse_node_struct_t *pns) { - // only create and load the actual str object on the last pass - if (comp->pass != MP_PASS_EMIT) { - EMIT_ARG(load_const_obj, mp_const_none); - } else { - EMIT_ARG(load_const_obj, mp_obj_new_str((const char*)pns->nodes[0], pns->nodes[1], false)); - } -} - -STATIC void compile_bytes(compiler_t *comp, mp_parse_node_struct_t *pns) { - // only create and load the actual bytes object on the last pass - if (comp->pass != MP_PASS_EMIT) { - EMIT_ARG(load_const_obj, mp_const_none); - } else { - EMIT_ARG(load_const_obj, mp_obj_new_bytes((const byte*)pns->nodes[0], pns->nodes[1])); - } +STATIC mp_obj_t get_const_object(mp_parse_node_struct_t *pns) { + #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D + // nodes are 32-bit pointers, but need to extract 64-bit object + return (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32); + #else + return (mp_obj_t)pns->nodes[0]; + #endif } STATIC void compile_const_object(compiler_t *comp, mp_parse_node_struct_t *pns) { - #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D - // nodes are 32-bit pointers, but need to extract 64-bit object - EMIT_ARG(load_const_obj, (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32)); - #else - EMIT_ARG(load_const_obj, (mp_obj_t)pns->nodes[0]); - #endif + EMIT_ARG(load_const_obj, get_const_object(pns)); } typedef void (*compile_function_t)(compiler_t*, mp_parse_node_struct_t*); @@ -2637,8 +2619,6 @@ STATIC const compile_function_t compile_function[] = { #undef c #undef DEF_RULE #undef DEF_RULE_NC - compile_string, - compile_bytes, compile_const_object, }; @@ -2891,7 +2871,8 @@ STATIC void check_for_doc_string(compiler_t *comp, mp_parse_node_t pn) { mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0]) && MP_PARSE_NODE_LEAF_KIND(pns->nodes[0]) == MP_PARSE_NODE_STRING) - || MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)) { + || (MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object) + && MP_OBJ_IS_STR(get_const_object((mp_parse_node_struct_t*)pns->nodes[0])))) { // compile the doc string compile_node(comp, pns->nodes[0]); // store the doc string diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c index 89bcebfead..c1a4eac5d0 100644 --- a/py/emitinlinethumb.c +++ b/py/emitinlinethumb.c @@ -42,8 +42,6 @@ typedef enum { #include "py/grammar.h" #undef DEF_RULE #undef DEF_RULE_NC - PN_string, // special node for non-interned string - PN_bytes, // special node for non-interned bytes PN_const_object, // special node for a constant, generic Python object // define rules without a compile function #define DEF_RULE(rule, comp, kind, ...) diff --git a/py/parse.c b/py/parse.c index 7280f74870..5a5adc6093 100644 --- a/py/parse.c +++ b/py/parse.c @@ -38,6 +38,7 @@ #include "py/runtime0.h" #include "py/runtime.h" #include "py/objint.h" +#include "py/objstr.h" #include "py/builtin.h" #if MICROPY_ENABLE_COMPILER @@ -75,8 +76,6 @@ enum { #include "py/grammar.h" #undef DEF_RULE #undef DEF_RULE_NC - RULE_string, // special node for non-interned string - RULE_bytes, // special node for non-interned bytes RULE_const_object, // special node for a constant, generic Python object // define rules without a compile function @@ -123,8 +122,6 @@ STATIC const rule_t *const rules[] = { #include "py/grammar.h" #undef DEF_RULE #undef DEF_RULE_NC - NULL, // RULE_string - NULL, // RULE_bytes NULL, // RULE_const_object // define rules without a compile function @@ -326,11 +323,7 @@ void mp_parse_node_print(mp_parse_node_t pn, size_t indent) { } else { // node must be a mp_parse_node_struct_t mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; - if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) { - printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]); - } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) { - printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]); - } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) { + if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) { #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32)); #else @@ -392,21 +385,6 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) { parser->result_stack[parser->result_stack_top++] = pn; } -STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, size_t src_line, size_t rule_kind, const char *str, size_t len) { - mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2); - if (pn == NULL) { - parser->parse_error = PARSE_ERROR_MEMORY; - return MP_PARSE_NODE_NULL; - } - pn->source_line = src_line; - pn->kind_num_nodes = rule_kind | (2 << 8); - char *p = m_new(char, len); - memcpy(p, str, len); - pn->nodes[0] = (uintptr_t)p; - pn->nodes[1] = len; - return (mp_parse_node_t)pn; -} - STATIC mp_parse_node_t make_node_const_object(parser_t *parser, size_t src_line, mp_obj_t obj) { mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t)); if (pn == NULL) { @@ -473,8 +451,11 @@ STATIC void push_result_token(parser_t *parser, const rule_t *rule) { // qstr exists, make a leaf node pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst); } else { - // not interned, make a node holding a pointer to the string/bytes data - pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len); + // not interned, make a node holding a pointer to the string/bytes object + mp_obj_t o = mp_obj_new_str_of_type( + lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes, + (const byte*)lex->vstr.buf, lex->vstr.len); + pn = make_node_const_object(parser, lex->tok_line, o); } } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind); @@ -934,15 +915,13 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { // this code discards lonely statements, such as doc strings if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) { mp_parse_node_t p = peek_result(&parser, 1); - if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) { + if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) + || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_const_object)) { pop_result(&parser); // MP_PARSE_NODE_NULL - mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string - if (MP_PARSE_NODE_IS_STRUCT(pn)) { - mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn; - if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) { - m_del(char, (char*)pns->nodes[0], (size_t)pns->nodes[1]); - } - } + pop_result(&parser); // const expression (leaf or RULE_const_object) + // Pushing the "pass" rule here will overwrite any RULE_const_object + // entry that was on the result stack, allowing the GC to reclaim + // the memory from the const object when needed. push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0); break; } diff --git a/tests/cmdline/cmd_parsetree.py.exp b/tests/cmdline/cmd_parsetree.py.exp index 36f1f1b271..17fecaf960 100644 --- a/tests/cmdline/cmd_parsetree.py.exp +++ b/tests/cmdline/cmd_parsetree.py.exp @@ -15,13 +15,13 @@ str(str) [ 8] rule(5) (n=2) id(c) -[ 8] literal str(a very long str that will not be interned) +[ 8] literal \.\+ [ 9] rule(5) (n=2) id(d) bytes(bytes) [ 10] rule(5) (n=2) id(e) -[ 10] literal bytes(a very long bytes that will not be interned) +[ 10] literal \.\+ [ 11] rule(5) (n=2) id(f) [ 11] literal \.\+