From 5fa93b67557f21c22a41449c3266571c427f6798 Mon Sep 17 00:00:00 2001 From: Damien George Date: Wed, 22 Jan 2014 14:35:10 +0000 Subject: [PATCH] Second stage of qstr revamp: uPy str object can be qstr or not. --- py/builtin.c | 16 ++- py/builtineval.c | 2 +- py/builtinimport.c | 7 +- py/obj.c | 52 ++------ py/obj.h | 18 ++- py/objarray.c | 6 +- py/objfun.c | 8 +- py/objint.c | 35 +++-- py/objint_longlong.c | 8 +- py/objmodule.c | 2 +- py/objstr.c | 298 ++++++++++++++++++++++++++++--------------- py/objtype.c | 6 +- py/qstr.c | 10 +- py/qstr.h | 3 + py/runtime.c | 87 ++++++------- py/stream.c | 17 +-- stm/lcd.c | 4 +- stm/main.c | 39 +++--- stm/string0.c | 2 +- stm/usart.c | 11 +- unix-cpy/main.c | 1 + unix/file.c | 4 +- unix/main.c | 2 +- unix/socket.c | 14 +- 24 files changed, 377 insertions(+), 275 deletions(-) diff --git a/py/builtin.c b/py/builtin.c index 9cbc037677..56cb49de72 100644 --- a/py/builtin.c +++ b/py/builtin.c @@ -139,8 +139,8 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_callable_obj, mp_builtin_callable); static mp_obj_t mp_builtin_chr(mp_obj_t o_in) { int ord = mp_obj_get_int(o_in); if (0 <= ord && ord <= 0x10ffff) { - char str[1] = {ord}; - return mp_obj_new_str(qstr_from_strn(str, 1)); + byte str[1] = {ord}; + return mp_obj_new_str(str, 1, true); } else { nlr_jump(mp_obj_new_exception_msg(MP_QSTR_ValueError, "chr() arg not in range(0x110000)")); } @@ -258,7 +258,7 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_next_obj, mp_builtin_next); static mp_obj_t mp_builtin_ord(mp_obj_t o_in) { uint len; - const byte *str = qstr_data(mp_obj_get_qstr(o_in), &len); + const byte *str = mp_obj_str_get_data(o_in, &len); if (len == 1) { return mp_obj_new_int(str[0]); } else { @@ -305,8 +305,9 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_range_obj, 1, 3, mp_builtin_range static mp_obj_t mp_builtin_repr(mp_obj_t o_in) { vstr_t *vstr = vstr_new(); mp_obj_print_helper((void (*)(void *env, const char *fmt, ...))vstr_printf, vstr, o_in, PRINT_REPR); - // TODO don't intern this string - return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len)); + mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false); + vstr_free(vstr); + return s; } MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_repr_obj, mp_builtin_repr); @@ -345,8 +346,9 @@ MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted); static mp_obj_t mp_builtin_str(mp_obj_t o_in) { vstr_t *vstr = vstr_new(); mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR); - // TODO don't intern this string - return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len)); + mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false); + vstr_free(vstr); + return s; } MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str); diff --git a/py/builtineval.c b/py/builtineval.c index 67072a0fa7..0e8f9e31d2 100644 --- a/py/builtineval.c +++ b/py/builtineval.c @@ -21,7 +21,7 @@ static mp_obj_t mp_builtin_eval(mp_obj_t o_in) { uint str_len; - const byte *str = qstr_data(mp_obj_get_qstr(o_in), &str_len); + const byte *str = mp_obj_str_get_data(o_in, &str_len); // create the lexer mp_lexer_t *lex = mp_lexer_new_from_str_len("", (const char*)str, str_len, 0); diff --git a/py/builtinimport.c b/py/builtinimport.c index 4cdad4e249..3cfd64e887 100644 --- a/py/builtinimport.c +++ b/py/builtinimport.c @@ -29,7 +29,10 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) { } */ - qstr mod_name = mp_obj_get_qstr(args[0]); + uint mod_name_l; + const byte *mod_name_s = mp_obj_str_get_data(args[0], &mod_name_l); + qstr mod_name = qstr_from_strn((const char*)mod_name_s, mod_name_l); + mp_obj_t loaded = mp_obj_module_get(mod_name); if (loaded != MP_OBJ_NULL) { return loaded; @@ -43,7 +46,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) { } // create a new module object - mp_obj_t module_obj = mp_obj_new_module(mp_obj_get_qstr(args[0])); + mp_obj_t module_obj = mp_obj_new_module(mod_name); // save the old context mp_map_t *old_locals = rt_locals_get(); diff --git a/py/obj.c b/py/obj.c index a78af595a6..c2f726bb99 100644 --- a/py/obj.c +++ b/py/obj.c @@ -14,7 +14,7 @@ #include "runtime.h" #include "map.h" -mp_obj_t mp_obj_get_type(mp_obj_t o_in) { +mp_obj_type_t *mp_obj_get_type(mp_obj_t o_in) { if (MP_OBJ_IS_SMALL_INT(o_in)) { return (mp_obj_t)&int_type; } else if (MP_OBJ_IS_QSTR(o_in)) { @@ -26,14 +26,7 @@ mp_obj_t mp_obj_get_type(mp_obj_t o_in) { } const char *mp_obj_get_type_str(mp_obj_t o_in) { - if (MP_OBJ_IS_SMALL_INT(o_in)) { - return "int"; - } else if (MP_OBJ_IS_QSTR(o_in)) { - return "str"; - } else { - mp_obj_base_t *o = o_in; - return o->type->name; - } + return mp_obj_get_type(o_in)->name; } void printf_wrapper(void *env, const char *fmt, ...) { @@ -44,17 +37,11 @@ void printf_wrapper(void *env, const char *fmt, ...) { } void mp_obj_print_helper(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t o_in, mp_print_kind_t kind) { - if (MP_OBJ_IS_SMALL_INT(o_in)) { - print(env, "%d", (int)MP_OBJ_SMALL_INT_VALUE(o_in)); - } else if (MP_OBJ_IS_QSTR(o_in)) { - mp_obj_str_print_qstr(print, env, MP_OBJ_QSTR_VALUE(o_in), kind); + mp_obj_type_t *type = mp_obj_get_type(o_in); + if (type->print != NULL) { + type->print(print, env, o_in, kind); } else { - mp_obj_base_t *o = o_in; - if (o->type->print != NULL) { - o->type->print(print, env, o_in, kind); - } else { - print(env, "<%s>", o->type->name); - } + print(env, "<%s>", type->name); } } @@ -94,12 +81,10 @@ machine_int_t mp_obj_hash(mp_obj_t o_in) { return 1; // needs to hash to same as the integer 1, since True==1 } else if (MP_OBJ_IS_SMALL_INT(o_in)) { return MP_OBJ_SMALL_INT_VALUE(o_in); - } else if (MP_OBJ_IS_QSTR(o_in)) { - return MP_OBJ_QSTR_VALUE(o_in); + } else if (MP_OBJ_IS_STR(o_in)) { + return mp_obj_str_get_hash(o_in); } else if (MP_OBJ_IS_TYPE(o_in, &none_type)) { return (machine_int_t)o_in; - } else if (MP_OBJ_IS_TYPE(o_in, &str_type)) { - return mp_obj_str_get(o_in); } else { assert(0); return 0; @@ -138,10 +123,8 @@ bool mp_obj_equal(mp_obj_t o1, mp_obj_t o2) { } return false; } - } else if (MP_OBJ_IS_QSTR(o1) || MP_OBJ_IS_QSTR(o2)) { - return false; - } else if (MP_OBJ_IS_TYPE(o1, &str_type) && MP_OBJ_IS_TYPE(o2, &str_type)) { - return mp_obj_str_get(o1) == mp_obj_str_get(o2); + } else if (MP_OBJ_IS_STR(o1) && MP_OBJ_IS_STR(o2)) { + return mp_obj_str_equal(o1, o2); } else { mp_obj_base_t *o = o1; if (o->type->binary_op != NULL) { @@ -218,17 +201,6 @@ void mp_obj_get_complex(mp_obj_t arg, mp_float_t *real, mp_float_t *imag) { } #endif -qstr mp_obj_get_qstr(mp_obj_t arg) { - if (MP_OBJ_IS_QSTR(arg)) { - return MP_OBJ_QSTR_VALUE(arg); - } else if (MP_OBJ_IS_TYPE(arg, &str_type)) { - return mp_obj_str_get(arg); - } else { - assert(0); - return 0; - } -} - mp_obj_t *mp_obj_get_array_fixed_n(mp_obj_t o_in, machine_int_t n) { if (MP_OBJ_IS_TYPE(o_in, &tuple_type) || MP_OBJ_IS_TYPE(o_in, &list_type)) { uint seq_len; @@ -266,8 +238,8 @@ uint mp_get_index(const mp_obj_type_t *type, machine_uint_t len, mp_obj_t index) // may return MP_OBJ_NULL mp_obj_t mp_obj_len_maybe(mp_obj_t o_in) { mp_small_int_t len = 0; - if (MP_OBJ_IS_TYPE(o_in, &str_type)) { - len = qstr_len(mp_obj_str_get(o_in)); + if (MP_OBJ_IS_STR(o_in)) { + len = mp_obj_str_get_len(o_in); } else if (MP_OBJ_IS_TYPE(o_in, &tuple_type)) { uint seq_len; mp_obj_t *seq_items; diff --git a/py/obj.h b/py/obj.h index e98cc552ec..05ccb27574 100644 --- a/py/obj.h +++ b/py/obj.h @@ -40,6 +40,7 @@ typedef struct _mp_obj_base_t mp_obj_base_t; #define MP_OBJ_IS_QSTR(o) ((((mp_small_int_t)(o)) & 3) == 2) #define MP_OBJ_IS_OBJ(o) ((((mp_small_int_t)(o)) & 3) == 0) #define MP_OBJ_IS_TYPE(o, t) (MP_OBJ_IS_OBJ(o) && (((mp_obj_base_t*)(o))->type == (t))) +#define MP_OBJ_IS_STR(o) (MP_OBJ_IS_QSTR(o) || MP_OBJ_IS_TYPE(o, &str_type)) #define MP_OBJ_SMALL_INT_VALUE(o) (((mp_small_int_t)(o)) >> 1) #define MP_OBJ_NEW_SMALL_INT(small_int) ((mp_obj_t)(((small_int) << 1) | 1)) @@ -199,14 +200,14 @@ extern const mp_obj_t mp_const_stop_iteration; // special object indicating end // General API for objects -mp_obj_t mp_obj_new_type(qstr name, mp_obj_t bases_tuple, mp_obj_t locals_dict); +mp_obj_t mp_obj_new_type(const char *name, mp_obj_t bases_tuple, mp_obj_t locals_dict); mp_obj_t mp_obj_new_none(void); mp_obj_t mp_obj_new_bool(bool value); mp_obj_t mp_obj_new_cell(mp_obj_t obj); mp_obj_t mp_obj_new_int(machine_int_t value); mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value); mp_obj_t mp_obj_new_int_from_long_str(const char *s); -mp_obj_t mp_obj_new_str(qstr qstr); +mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already); #if MICROPY_ENABLE_FLOAT mp_obj_t mp_obj_new_float(mp_float_t val); mp_obj_t mp_obj_new_complex(mp_float_t real, mp_float_t imag); @@ -231,7 +232,7 @@ mp_obj_t mp_obj_new_slice(mp_obj_t start, mp_obj_t stop, mp_obj_t step); mp_obj_t mp_obj_new_bound_meth(mp_obj_t meth, mp_obj_t self); mp_obj_t mp_obj_new_module(qstr module_name); -mp_obj_t mp_obj_get_type(mp_obj_t o_in); +mp_obj_type_t *mp_obj_get_type(mp_obj_t o_in); const char *mp_obj_get_type_str(mp_obj_t o_in); void mp_obj_print_helper(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t o_in, mp_print_kind_t kind); @@ -248,7 +249,7 @@ machine_int_t mp_obj_get_int(mp_obj_t arg); mp_float_t mp_obj_get_float(mp_obj_t self_in); void mp_obj_get_complex(mp_obj_t self_in, mp_float_t *real, mp_float_t *imag); #endif -qstr mp_obj_get_qstr(mp_obj_t arg); +//qstr mp_obj_get_qstr(mp_obj_t arg); mp_obj_t *mp_obj_get_array_fixed_n(mp_obj_t o, machine_int_t n); uint mp_get_index(const mp_obj_type_t *type, machine_uint_t len, mp_obj_t index); mp_obj_t mp_obj_len_maybe(mp_obj_t o_in); /* may return NULL */ @@ -279,8 +280,13 @@ void mp_obj_exception_get_traceback(mp_obj_t self_in, machine_uint_t *n, machine // str extern const mp_obj_type_t str_type; -qstr mp_obj_str_get(mp_obj_t self_in); -void mp_obj_str_print_qstr(void (*print)(void *env, const char *fmt, ...), void *env, qstr q, mp_print_kind_t kind); +mp_obj_t mp_obj_str_builder_start(uint len, byte **data); +mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in); +bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2); +uint mp_obj_str_get_hash(mp_obj_t self_in); +uint mp_obj_str_get_len(mp_obj_t self_in); +const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated +const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len); #if MICROPY_ENABLE_FLOAT // float diff --git a/py/objarray.c b/py/objarray.c index 343a3f6e93..4f36561153 100644 --- a/py/objarray.c +++ b/py/objarray.c @@ -167,9 +167,11 @@ static mp_obj_t array_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m switch (n_args) { case 2: { - const char *code = qstr_str(mp_obj_str_get(args[0])); + // TODO check args + uint l; + const byte *s = mp_obj_str_get_data(args[0], &l); mp_obj_t initializer = args[1]; - return array_construct(*code, initializer); + return array_construct(*s, initializer); } default: diff --git a/py/objfun.c b/py/objfun.c index b749860c25..1f6ad68ea4 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -56,8 +56,7 @@ mp_obj_t fun_native_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_ // TODO if n_kw==0 then don't allocate any memory for map (either pass NULL or allocate it on the heap) mp_map_t *kw_args = mp_map_new(n_kw); for (int i = 0; i < 2 * n_kw; i += 2) { - qstr name = mp_obj_str_get(args[n_args + i]); - mp_map_lookup(kw_args, MP_OBJ_NEW_QSTR(name), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = args[n_args + i + 1]; + mp_map_lookup(kw_args, args[n_args + i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = args[n_args + i + 1]; } mp_obj_t res = ((mp_fun_kw_t)self->fun)(n_args, args, kw_args); // TODO clean up kw_args @@ -214,9 +213,10 @@ machine_uint_t convert_obj_for_inline_asm(mp_obj_t obj) { return 0; } else if (obj == mp_const_true) { return 1; - } else if (MP_OBJ_IS_TYPE(obj, &str_type)) { + } else if (MP_OBJ_IS_STR(obj)) { // pointer to the string (it's probably constant though!) - return (machine_uint_t)qstr_str(mp_obj_str_get(obj)); + uint l; + return (machine_uint_t)mp_obj_str_get_data(obj, &l); #if MICROPY_ENABLE_FLOAT } else if (MP_OBJ_IS_TYPE(obj, &float_type)) { // convert float to int (could also pass in float registers) diff --git a/py/objint.c b/py/objint.c index 02628b7ef9..1305f1900e 100644 --- a/py/objint.c +++ b/py/objint.c @@ -20,34 +20,35 @@ static mp_obj_t int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_ return MP_OBJ_NEW_SMALL_INT(0); case 1: - if (MP_OBJ_IS_TYPE(args[0], &str_type)) { + if (MP_OBJ_IS_STR(args[0])) { // a string, parse it - return MP_OBJ_NEW_SMALL_INT(strtonum(qstr_str(mp_obj_get_qstr(args[0])), 0)); + uint l; + const byte *s = mp_obj_str_get_data(args[0], &l); + return MP_OBJ_NEW_SMALL_INT(strtonum((const char*)s, 0)); } else { return MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(args[0])); } case 2: + { // should be a string, parse it // TODO proper error checking of argument types - return MP_OBJ_NEW_SMALL_INT(strtonum(qstr_str(mp_obj_get_qstr(args[0])), mp_obj_get_int(args[1]))); + uint l; + const byte *s = mp_obj_str_get_data(args[0], &l); + return MP_OBJ_NEW_SMALL_INT(strtonum((const char*)s, mp_obj_get_int(args[1]))); + } default: nlr_jump(mp_obj_new_exception_msg_1_arg(MP_QSTR_TypeError, "int takes at most 2 arguments, %d given", (void*)(machine_int_t)n_args)); } } -const mp_obj_type_t int_type = { - { &mp_const_type }, - "int", - .print = int_print, - .make_new = int_make_new, - .binary_op = int_binary_op, -}; - #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE -// This is called only for non-SMALL_INT + void int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { + if (MP_OBJ_IS_SMALL_INT(self_in)) { + print(env, "%d", (int)MP_OBJ_SMALL_INT_VALUE(self_in)); + } } // This is called only for non-SMALL_INT @@ -88,4 +89,12 @@ machine_int_t mp_obj_int_get_checked(mp_obj_t self_in) { return MP_OBJ_SMALL_INT_VALUE(self_in); } -#endif +#endif // MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE + +const mp_obj_type_t int_type = { + { &mp_const_type }, + "int", + .print = int_print, + .make_new = int_make_new, + .binary_op = int_binary_op, +}; diff --git a/py/objint_longlong.c b/py/objint_longlong.c index fd13a038b6..7eaee3bc9d 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -24,8 +24,12 @@ static mp_obj_t mp_obj_new_int_from_ll(long long val); #endif void int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { - mp_obj_int_t *self = self_in; - print(env, "%lld" SUFFIX, self->val); + if (MP_OBJ_IS_SMALL_INT(self_in)) { + print(env, "%d", (int)MP_OBJ_SMALL_INT_VALUE(self_in)); + } else { + mp_obj_int_t *self = self_in; + print(env, "%lld" SUFFIX, self->val); + } } mp_obj_t int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { diff --git a/py/objmodule.c b/py/objmodule.c index 73f146131e..749d345bcf 100644 --- a/py/objmodule.c +++ b/py/objmodule.c @@ -64,7 +64,7 @@ mp_obj_t mp_obj_new_module(qstr module_name) { o->name = module_name; o->globals = mp_map_new(1); el->value = o; - mp_map_lookup(o->globals, MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = mp_obj_new_str(module_name); + mp_map_lookup(o->globals, MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = MP_OBJ_NEW_QSTR(module_name); return o; } diff --git a/py/objstr.c b/py/objstr.c index f4dc857398..09d4958fbb 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -14,28 +14,35 @@ typedef struct _mp_obj_str_t { mp_obj_base_t base; - qstr qstr; + machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c) + machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte + byte data[]; } mp_obj_str_t; -static mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur); +// use this macro to extract the string hash +#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; } + +// use this macro to extract the string length +#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; } + +// use this macro to extract the string data and length +#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; } + +static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur); /******************************************************************************/ /* str */ -void mp_obj_str_print_qstr(void (*print)(void *env, const char *fmt, ...), void *env, qstr q, mp_print_kind_t kind) { +void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { + GET_STR_DATA_LEN(self_in, str_data, str_len); if (kind == PRINT_STR) { - print(env, "%s", qstr_str(q)); + print(env, "%.*s", str_len, str_data); } else { // TODO need to escape chars etc - print(env, "'%s'", qstr_str(q)); + print(env, "'%.*s'", str_len, str_data); } } -void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { - mp_obj_str_t *self = self_in; - mp_obj_str_print_qstr(print, env, self->qstr, kind); -} - // like strstr but with specified length and allows \0 bytes // TODO replace with something more efficient/standard static const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) { @@ -57,16 +64,14 @@ static const byte *find_subbytes(const byte *haystack, uint hlen, const byte *ne } mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { - mp_obj_str_t *lhs = lhs_in; - uint lhs_len; - const byte *lhs_data = qstr_data(lhs->qstr, &lhs_len); + GET_STR_DATA_LEN(lhs_in, lhs_data, lhs_len); switch (op) { case RT_BINARY_OP_SUBSCR: // TODO: need predicate to check for int-like type (bools are such for example) // ["no", "yes"][1 == 2] is common idiom if (MP_OBJ_IS_SMALL_INT(rhs_in)) { - uint index = mp_get_index(lhs->base.type, lhs_len, rhs_in); - return mp_obj_new_str(qstr_from_strn((const char*)lhs_data + index, 1)); + uint index = mp_get_index(mp_obj_get_type(lhs_in), lhs_len, rhs_in); + return mp_obj_new_str(lhs_data + index, 1, true); #if MICROPY_ENABLE_SLICE } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { machine_int_t start, stop, step; @@ -89,7 +94,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { } else if (stop > lhs_len) { stop = lhs_len; } - return mp_obj_new_str(qstr_from_strn((const char*)lhs_data + start, stop - start)); + return mp_obj_new_str(lhs_data + start, stop - start, false); #endif } else { // Message doesn't match CPython, but we don't have so much bytes as they @@ -99,37 +104,48 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { case RT_BINARY_OP_ADD: case RT_BINARY_OP_INPLACE_ADD: - if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) { + if (MP_OBJ_IS_STR(rhs_in)) { // add 2 strings - uint rhs_len; - const byte *rhs_data = qstr_data(((mp_obj_str_t*)rhs_in)->qstr, &rhs_len); + + GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len); int alloc_len = lhs_len + rhs_len; + + /* code for making qstr byte *q_ptr; byte *val = qstr_build_start(alloc_len, &q_ptr); memcpy(val, lhs_data, lhs_len); memcpy(val + lhs_len, rhs_data, rhs_len); - return mp_obj_new_str(qstr_build_end(q_ptr)); + return MP_OBJ_NEW_QSTR(qstr_build_end(q_ptr)); + */ + + // code for non-qstr + byte *data; + mp_obj_t s = mp_obj_str_builder_start(alloc_len, &data); + memcpy(data, lhs_data, lhs_len); + memcpy(data + lhs_len, rhs_data, rhs_len); + return mp_obj_str_builder_end(s); } break; + case RT_COMPARE_OP_IN: case RT_COMPARE_OP_NOT_IN: /* NOTE `a in b` is `b.__contains__(a)` */ - if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) { - uint rhs_len; - const byte *rhs_data = qstr_data(((mp_obj_str_t*)rhs_in)->qstr, &rhs_len); + if (MP_OBJ_IS_STR(rhs_in)) { + GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len); return MP_BOOL((op == RT_COMPARE_OP_IN) ^ (find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len) == NULL)); - return mp_const_false; } break; + case RT_BINARY_OP_MULTIPLY: { if (!MP_OBJ_IS_SMALL_INT(rhs_in)) { return NULL; } int n = MP_OBJ_SMALL_INT_VALUE(rhs_in); - char *s = m_new(char, lhs_len * n); - mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, s); - return MP_OBJ_NEW_QSTR(qstr_from_strn_take(s, lhs_len * n, lhs_len * n)); + byte *data; + mp_obj_t s = mp_obj_str_builder_start(lhs_len * n, &data); + mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data); + return mp_obj_str_builder_end(s); } } @@ -141,12 +157,10 @@ static mp_obj_t str_getiter(mp_obj_t o_in) { } mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { - assert(MP_OBJ_IS_TYPE(self_in, &str_type)); - mp_obj_str_t *self = self_in; + assert(MP_OBJ_IS_STR(self_in)); // get separation string - const char *sep_str = qstr_str(self->qstr); - size_t sep_len = strlen(sep_str); + GET_STR_DATA_LEN(self_in, sep_str, sep_len); // process args uint seq_len; @@ -162,32 +176,31 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { // count required length int required_len = 0; for (int i = 0; i < seq_len; i++) { - if (!MP_OBJ_IS_TYPE(seq_items[i], &str_type)) { + if (!MP_OBJ_IS_STR(seq_items[i])) { goto bad_arg; } if (i > 0) { required_len += sep_len; } - required_len += strlen(qstr_str(mp_obj_str_get(seq_items[i]))); + GET_STR_LEN(seq_items[i], l); + required_len += l; } // make joined string - byte *q_ptr; - byte *s_dest = qstr_build_start(required_len, &q_ptr); + byte *data; + mp_obj_t joined_str = mp_obj_str_builder_start(required_len, &data); for (int i = 0; i < seq_len; i++) { if (i > 0) { - memcpy(s_dest, sep_str, sep_len); - s_dest += sep_len; + memcpy(data, sep_str, sep_len); + data += sep_len; } - uint s2_len; - const byte *s2 = qstr_data(mp_obj_str_get(seq_items[i]), &s2_len); - memcpy(s_dest, s2, s2_len); - s_dest += s2_len; + GET_STR_DATA_LEN(seq_items[i], s, l); + memcpy(data, s, l); + data += l; } - qstr q = qstr_build_end(q_ptr); // return joined string - return mp_obj_new_str(q); + return mp_obj_str_builder_end(joined_str); bad_arg: nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's")); @@ -207,47 +220,39 @@ static mp_obj_t str_split(uint n_args, const mp_obj_t *args) { assert(sep == mp_const_none); (void)sep; // unused; to hush compiler warning mp_obj_t res = mp_obj_new_list(0, NULL); - const char *s = qstr_str(mp_obj_str_get(args[0])); - const char *start; + GET_STR_DATA_LEN(args[0], s, len); + const byte *top = s + len; + const byte *start; // Initial whitespace is not counted as split, so we pre-do it - while (is_ws(*s)) s++; - while (*s && splits != 0) { + while (s < top && is_ws(*s)) s++; + while (s < top && splits != 0) { start = s; - while (*s != 0 && !is_ws(*s)) s++; - rt_list_append(res, MP_OBJ_NEW_QSTR(qstr_from_strn(start, s - start))); - if (*s == 0) { + while (s < top && !is_ws(*s)) s++; + rt_list_append(res, mp_obj_new_str(start, s - start, false)); + if (s >= top) { break; } - while (is_ws(*s)) s++; + while (s < top && is_ws(*s)) s++; if (splits > 0) { splits--; } } - if (*s != 0) { - rt_list_append(res, MP_OBJ_NEW_QSTR(qstr_from_str(s))); + if (s < top) { + rt_list_append(res, mp_obj_new_str(s, top - s, false)); } return res; } -static bool chr_in_str(const char* const str, const size_t str_len, const char c) { - for (size_t i = 0; i < str_len; i++) { - if (str[i] == c) { - return true; - } - } - return false; -} - static mp_obj_t str_find(uint n_args, const mp_obj_t *args) { assert(2 <= n_args && n_args <= 4); - const char* haystack = qstr_str(mp_obj_str_get(args[0])); - const char* needle = qstr_str(mp_obj_str_get(args[1])); + assert(MP_OBJ_IS_STR(args[0])); + assert(MP_OBJ_IS_STR(args[1])); - size_t haystack_len = strlen(haystack); - size_t needle_len = strlen(needle); + GET_STR_DATA_LEN(args[0], haystack, haystack_len); + GET_STR_DATA_LEN(args[1], needle, needle_len); size_t start = 0; size_t end = haystack_len; @@ -259,7 +264,7 @@ static mp_obj_t str_find(uint n_args, const mp_obj_t *args) { end = mp_get_index(&str_type, haystack_len, args[3]); } - char *p = strstr(haystack + start, needle); + const byte *p = find_subbytes(haystack + start, haystack_len - start, needle, needle_len); if (p == NULL) { // not found return MP_OBJ_NEW_SMALL_INT(-1); @@ -273,21 +278,34 @@ static mp_obj_t str_find(uint n_args, const mp_obj_t *args) { } } +static bool chr_in_str(const byte* const str, const size_t str_len, int c) { + for (size_t i = 0; i < str_len; i++) { + if (str[i] == c) { + return true; + } + } + return false; +} + mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { assert(1 <= n_args && n_args <= 2); - assert(MP_OBJ_IS_TYPE(args[0], &str_type)); - const char *chars_to_del; - static const char whitespace[] = " \t\n\r\v\f"; + assert(MP_OBJ_IS_STR(args[0])); + + const byte *chars_to_del; + uint chars_to_del_len; + static const byte whitespace[] = " \t\n\r\v\f"; if (n_args == 1) { chars_to_del = whitespace; + chars_to_del_len = sizeof(whitespace); } else { - chars_to_del = qstr_str(mp_obj_str_get(args[1])); + assert(MP_OBJ_IS_STR(args[1])); + GET_STR_DATA_LEN(args[1], s, l); + chars_to_del = s; + chars_to_del_len = l; } - const size_t chars_to_del_len = strlen(chars_to_del); - const char *orig_str = qstr_str(mp_obj_str_get(args[0])); - const size_t orig_str_len = strlen(orig_str); + GET_STR_DATA_LEN(args[0], orig_str, orig_str_len); size_t first_good_char_pos = 0; bool first_good_char_pos_set = false; @@ -303,30 +321,29 @@ mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { } if (first_good_char_pos == 0 && last_good_char_pos == 0) { - //string is all whitespace, return '' - return mp_obj_new_str(MP_QSTR_); + // string is all whitespace, return '' + return MP_OBJ_NEW_QSTR(MP_QSTR_); } assert(last_good_char_pos >= first_good_char_pos); //+1 to accomodate the last character size_t stripped_len = last_good_char_pos - first_good_char_pos + 1; - return mp_obj_new_str(qstr_from_strn(orig_str + first_good_char_pos, stripped_len)); + return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false); } mp_obj_t str_format(uint n_args, const mp_obj_t *args) { - assert(MP_OBJ_IS_TYPE(args[0], &str_type)); - mp_obj_str_t *self = args[0]; + assert(MP_OBJ_IS_STR(args[0])); - const char *str = qstr_str(self->qstr); + GET_STR_DATA_LEN(args[0], str, len); int arg_i = 1; vstr_t *vstr = vstr_new(); - for (; *str; str++) { + for (const byte *top = str + len; str < top; str++) { if (*str == '{') { str++; - if (*str == '{') { + if (str < top && *str == '{') { vstr_add_char(vstr, '{'); } else { - while (*str != '}') str++; + while (str < top && *str != '}') str++; if (arg_i >= n_args) { nlr_jump(mp_obj_new_exception_msg(MP_QSTR_IndexError, "tuple index out of range")); } @@ -339,7 +356,9 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) { } } - return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len)); + mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false); + vstr_free(vstr); + return s; } static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find); @@ -366,23 +385,102 @@ const mp_obj_type_t str_type = { .methods = str_type_methods, }; -mp_obj_t mp_obj_new_str(qstr qstr) { - mp_obj_str_t *o = m_new_obj(mp_obj_str_t); +mp_obj_t mp_obj_str_builder_start(uint len, byte **data) { + mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); o->base.type = &str_type; - o->qstr = qstr; + o->len = len; + *data = o->data; return o; } -qstr mp_obj_str_get(mp_obj_t self_in) { - if (MP_OBJ_IS_QSTR(self_in)) { - return MP_OBJ_QSTR_VALUE(self_in); +mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) { + assert(MP_OBJ_IS_STR(o_in)); + mp_obj_str_t *o = o_in; + o->hash = qstr_compute_hash(o->data, o->len); + o->data[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + return o; +} + +mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already) { + qstr q = qstr_find_strn(data, len); + if (q != MP_QSTR_NULL) { + // qstr with this data already exists + return MP_OBJ_NEW_QSTR(q); + } else if (make_qstr_if_not_already) { + // no existing qstr, make a new one + return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len)); + } else { + // no existing qstr, don't make one + mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); + o->base.type = &str_type; + o->hash = qstr_compute_hash(data, len); + o->len = len; + memcpy(o->data, data, len * sizeof(byte)); + o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + return o; } - if (MP_OBJ_IS_TYPE(self_in, &str_type)) { - mp_obj_str_t *self = self_in; - return self->qstr; +} + +bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) { + if (MP_OBJ_IS_QSTR(s1) && MP_OBJ_IS_QSTR(s2)) { + return s1 == s2; + } else { + GET_STR_HASH(s1, h1); + GET_STR_HASH(s2, h2); + if (h1 != h2) { + return false; + } + GET_STR_DATA_LEN(s1, d1, l1); + GET_STR_DATA_LEN(s2, d2, l2); + if (l1 != l2) { + return false; + } + return strncmp((const char*)d1, (const char*)d2, l1) == 0; + } +} + +uint mp_obj_str_get_hash(mp_obj_t self_in) { + if (MP_OBJ_IS_STR(self_in)) { + GET_STR_HASH(self_in, h); + return h; + } else { + nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", + mp_obj_get_type_str(self_in))); + } +} + +uint mp_obj_str_get_len(mp_obj_t self_in) { + if (MP_OBJ_IS_STR(self_in)) { + GET_STR_LEN(self_in, l); + return l; + } else { + nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", + mp_obj_get_type_str(self_in))); + } +} + +// only use this function if you need the str data to be zero terminated +// at the moment all strings are zero terminated to help with C ASCIIZ compatibility +const char *mp_obj_str_get_str(mp_obj_t self_in) { + if (MP_OBJ_IS_STR(self_in)) { + GET_STR_DATA_LEN(self_in, s, l); + (void)l; // len unused + return (const char*)s; + } else { + nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", + mp_obj_get_type_str(self_in))); + } +} + +const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len) { + if (MP_OBJ_IS_STR(self_in)) { + GET_STR_DATA_LEN(self_in, s, l); + *len = l; + return s; + } else { + nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", + mp_obj_get_type_str(self_in))); } - nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", - mp_obj_get_type_str(self_in))); } /******************************************************************************/ @@ -390,15 +488,15 @@ qstr mp_obj_str_get(mp_obj_t self_in) { typedef struct _mp_obj_str_it_t { mp_obj_base_t base; - mp_obj_str_t *str; + mp_obj_t str; machine_uint_t cur; } mp_obj_str_it_t; mp_obj_t str_it_iternext(mp_obj_t self_in) { mp_obj_str_it_t *self = self_in; - const char *str = qstr_str(self->str->qstr); - if (self->cur < strlen(str)) { - mp_obj_t o_out = mp_obj_new_str(qstr_from_strn(str + self->cur, 1)); + GET_STR_DATA_LEN(self->str, str, len); + if (self->cur < len) { + mp_obj_t o_out = mp_obj_new_str(str + self->cur, 1, true); self->cur += 1; return o_out; } else { @@ -412,7 +510,7 @@ static const mp_obj_type_t str_it_type = { .iternext = str_it_iternext, }; -mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur) { +mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur) { mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t); o->base.type = &str_it_type; o->str = str; diff --git a/py/objtype.c b/py/objtype.c index 5dea6938d7..75755f4fb9 100644 --- a/py/objtype.c +++ b/py/objtype.c @@ -251,7 +251,7 @@ static mp_obj_t type_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp // args[0] = name // args[1] = bases tuple // args[2] = locals dict - return mp_obj_new_type(mp_obj_get_qstr(args[0]), args[1], args[2]); + return mp_obj_new_type(mp_obj_str_get_str(args[0]), args[1], args[2]); default: nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "type takes at 1 or 3 arguments")); @@ -323,12 +323,12 @@ const mp_obj_type_t mp_const_type = { .store_attr = type_store_attr, }; -mp_obj_t mp_obj_new_type(qstr name, mp_obj_t bases_tuple, mp_obj_t locals_dict) { +mp_obj_t mp_obj_new_type(const char *name, mp_obj_t bases_tuple, mp_obj_t locals_dict) { assert(MP_OBJ_IS_TYPE(bases_tuple, &tuple_type)); // Micro Python restriction, for now assert(MP_OBJ_IS_TYPE(locals_dict, &dict_type)); // Micro Python restriction, for now mp_obj_type_t *o = m_new0(mp_obj_type_t, 1); o->base.type = &mp_const_type; - o->name = qstr_str(name); + o->name = name; o->print = class_print; o->make_new = class_make_new; o->binary_op = class_binary_op; diff --git a/py/qstr.c b/py/qstr.c index 2830341a2d..6ce9e8be5a 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -29,7 +29,7 @@ #define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8)) #define Q_GET_DATA(q) ((q) + 4) -static machine_uint_t compute_hash(const byte *data, uint len) { +machine_uint_t qstr_compute_hash(const byte *data, uint len) { machine_uint_t hash = 0; for (const byte *top = data + len; data < top; data++) { hash += *data; @@ -99,9 +99,9 @@ static qstr qstr_add(const byte *q_ptr) { return last_pool->total_prev_len + last_pool->len - 1; } -static qstr qstr_find_strn(const byte *str, uint str_len) { +qstr qstr_find_strn(const byte *str, uint str_len) { // work out hash of str - machine_uint_t str_hash = compute_hash((const byte*)str, str_len); + machine_uint_t str_hash = qstr_compute_hash((const byte*)str, str_len); // search pools for the data for (qstr_pool_t *pool = last_pool; pool != NULL; pool = pool->prev) { @@ -123,7 +123,7 @@ qstr qstr_from_str(const char *str) { qstr qstr_from_strn(const char *str, uint len) { qstr q = qstr_find_strn((const byte*)str, len); if (q == 0) { - machine_uint_t hash = compute_hash((const byte*)str, len); + machine_uint_t hash = qstr_compute_hash((const byte*)str, len); byte *q_ptr = m_new(byte, 4 + len + 1); q_ptr[0] = hash; q_ptr[1] = hash >> 8; @@ -154,7 +154,7 @@ qstr qstr_build_end(byte *q_ptr) { qstr q = qstr_find_strn(Q_GET_DATA(q_ptr), Q_GET_LENGTH(q_ptr)); if (q == 0) { machine_uint_t len = Q_GET_LENGTH(q_ptr); - machine_uint_t hash = compute_hash(Q_GET_DATA(q_ptr), len); + machine_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len); q_ptr[0] = hash; q_ptr[1] = hash >> 8; q_ptr[4 + len] = '\0'; diff --git a/py/qstr.h b/py/qstr.h index 5c331c34af..271e2117c9 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -20,6 +20,9 @@ typedef machine_uint_t qstr; void qstr_init(void); +machine_uint_t qstr_compute_hash(const byte *data, uint len); +qstr qstr_find_strn(const byte *str, uint str_len); // returns MP_QSTR_NULL if not found + qstr qstr_from_str(const char *str); qstr qstr_from_strn(const char *str, uint len); //qstr qstr_from_str_static(const char *str); diff --git a/py/runtime.c b/py/runtime.c index ccd3d7d0af..7937a65b83 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -18,6 +18,7 @@ #include "map.h" #include "builtin.h" #include "objarray.h" +#include "bc.h" #if 0 // print debugging info #define DEBUG_PRINT (1) @@ -77,7 +78,7 @@ static void mp_map_add_qstr(mp_map_t *map, qstr qstr, mp_obj_t value) { void rt_init(void) { // locals = globals for outer module (see Objects/frameobject.c/PyFrame_New()) map_locals = map_globals = mp_map_new(1); - mp_map_add_qstr(map_globals, MP_QSTR___name__, mp_obj_new_str(MP_QSTR___main__)); + mp_map_add_qstr(map_globals, MP_QSTR___name__, MP_OBJ_NEW_QSTR(MP_QSTR___main__)); // init built-in hash table mp_map_init(&map_builtins, 3); @@ -306,12 +307,8 @@ int rt_is_true(mp_obj_t arg) { return 0; } else if (arg == mp_const_true) { return 1; - } else if (MP_OBJ_IS_QSTR(arg)) { - // TODO: \0 - return *qstr_str(MP_OBJ_QSTR_VALUE(arg)) != 0; - } else if (MP_OBJ_IS_TYPE(arg, &str_type)) { - // TODO: \0 - return *qstr_str(mp_obj_str_get(arg)) != 0; + } else if (MP_OBJ_IS_STR(arg)) { + return mp_obj_str_get_len(arg) != 0; } else if (MP_OBJ_IS_TYPE(arg, &list_type)) { uint len; mp_obj_t *dummy; @@ -404,7 +401,7 @@ mp_obj_t rt_load_const_dec(qstr qstr) { mp_obj_t rt_load_const_str(qstr qstr) { DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); - return mp_obj_new_str(qstr); + return MP_OBJ_NEW_QSTR(qstr); } mp_obj_t rt_load_name(qstr qstr) { @@ -616,25 +613,23 @@ mp_obj_t rt_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) { * needs to go below */ if (op == RT_COMPARE_OP_IN || op == RT_COMPARE_OP_NOT_IN) { - if (!MP_OBJ_IS_SMALL_INT(rhs)) { - mp_obj_base_t *o = rhs; - if (o->type->binary_op != NULL) { - mp_obj_t res = o->type->binary_op(op, rhs, lhs); - if (res != NULL) { - return res; + mp_obj_type_t *type = mp_obj_get_type(rhs); + if (type->binary_op != NULL) { + mp_obj_t res = type->binary_op(op, rhs, lhs); + if (res != NULL) { + return res; + } + } + if (type->getiter != NULL) { + /* second attempt, walk the iterator */ + mp_obj_t next = NULL; + mp_obj_t iter = rt_getiter(rhs); + while ((next = rt_iternext(iter)) != mp_const_stop_iteration) { + if (mp_obj_equal(next, lhs)) { + return MP_BOOL(op == RT_COMPARE_OP_IN); } } - if (o->type->getiter != NULL) { - /* second attempt, walk the iterator */ - mp_obj_t next = NULL; - mp_obj_t iter = rt_getiter(rhs); - while ((next = rt_iternext(iter)) != mp_const_stop_iteration) { - if (mp_obj_equal(next, lhs)) { - return MP_BOOL(op == RT_COMPARE_OP_IN); - } - } - return MP_BOOL(op != RT_COMPARE_OP_IN); - } + return MP_BOOL(op != RT_COMPARE_OP_IN); } nlr_jump(mp_obj_new_exception_msg_varg( @@ -643,17 +638,17 @@ mp_obj_t rt_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) { return mp_const_none; } - if (MP_OBJ_IS_OBJ(lhs)) { - mp_obj_base_t *o = lhs; - if (o->type->binary_op != NULL) { - mp_obj_t result = o->type->binary_op(op, lhs, rhs); - if (result != NULL) { - return result; - } + // generic binary_op supplied by type + mp_obj_type_t *type = mp_obj_get_type(lhs); + if (type->binary_op != NULL) { + mp_obj_t result = type->binary_op(op, lhs, rhs); + if (result != MP_OBJ_NULL) { + return result; } - // TODO implement dispatch for reverse binary ops } + // TODO implement dispatch for reverse binary ops + // TODO specify in error message what the operator is nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "unsupported operand types for binary operator: '%s', '%s'", @@ -900,35 +895,27 @@ void rt_store_subscr(mp_obj_t base, mp_obj_t index, mp_obj_t value) { } mp_obj_t rt_getiter(mp_obj_t o_in) { - if (MP_OBJ_IS_SMALL_INT(o_in)) { - nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "'int' object is not iterable")); + mp_obj_type_t *type = mp_obj_get_type(o_in); + if (type->getiter != NULL) { + return type->getiter(o_in); } else { - mp_obj_base_t *o = o_in; - if (o->type->getiter != NULL) { - return o->type->getiter(o_in); - } else { - nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not iterable", o->type->name)); - } + nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not iterable", type->name)); } } mp_obj_t rt_iternext(mp_obj_t o_in) { - if (MP_OBJ_IS_SMALL_INT(o_in)) { - nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "'int' object is not an iterator")); + mp_obj_type_t *type = mp_obj_get_type(o_in); + if (type->iternext != NULL) { + return type->iternext(o_in); } else { - mp_obj_base_t *o = o_in; - if (o->type->iternext != NULL) { - return o->type->iternext(o_in); - } else { - nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not an iterator", o->type->name)); - } + nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not an iterator", type->name)); } } mp_obj_t rt_import_name(qstr name, mp_obj_t fromlist, mp_obj_t level) { // build args array mp_obj_t args[5]; - args[0] = mp_obj_new_str(name); + args[0] = MP_OBJ_NEW_QSTR(name); args[1] = mp_const_none; // TODO should be globals args[2] = mp_const_none; // TODO should be locals args[3] = fromlist; diff --git a/py/stream.c b/py/stream.c index be560d3c22..d47d7e4196 100644 --- a/py/stream.c +++ b/py/stream.c @@ -23,15 +23,15 @@ static mp_obj_t stream_read(uint n_args, const mp_obj_t *args) { if (n_args == 1 || ((sz = mp_obj_get_int(args[1])) == -1)) { return stream_readall(args[0]); } - char *buf = m_new(char, sz); + byte *buf = m_new(byte, sz); int error; machine_int_t out_sz = o->type->stream_p.read(o, buf, sz, &error); if (out_sz == -1) { nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_OSError, "[Errno %d]", error)); } else { - // TODO don't intern this string - buf = m_realloc(buf, sz, out_sz); - return mp_obj_new_str(qstr_from_strn_take(buf, out_sz, out_sz)); + mp_obj_t s = mp_obj_new_str(buf, out_sz, false); // will reallocate to use exact size + m_free(buf, sz); + return s; } } @@ -43,7 +43,7 @@ static mp_obj_t stream_write(mp_obj_t self_in, mp_obj_t arg) { } uint sz; - const byte *buf = qstr_data(mp_obj_get_qstr(arg), &sz); + const byte *buf = mp_obj_str_get_data(arg, &sz); int error; machine_int_t out_sz = o->type->stream_p.write(self_in, buf, sz, &error); if (out_sz == -1) { @@ -92,9 +92,10 @@ static mp_obj_t stream_readall(mp_obj_t self_in) { } } } - // TODO don't intern this string - vstr_set_size(vstr, total_size); - return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, total_size)); + + mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, total_size, false); + vstr_free(vstr); + return s; } // Unbuffered, inefficient implementation of readline() for raw I/O files. diff --git a/stm/lcd.c b/stm/lcd.c index 56f0ffe64c..0567b43333 100644 --- a/stm/lcd.c +++ b/stm/lcd.c @@ -165,7 +165,9 @@ mp_obj_t lcd_pix_show(void) { } mp_obj_t lcd_print(mp_obj_t text) { - lcd_print_str(qstr_str(mp_obj_get_qstr(text))); + uint len; + const byte *data = mp_obj_str_get_data(text, &len); + lcd_print_strn((const char*)data, len); return mp_const_none; } diff --git a/stm/main.c b/stm/main.c index 990450202b..8d2c2d6b3d 100644 --- a/stm/main.c +++ b/stm/main.c @@ -69,16 +69,20 @@ void __fatal_error(const char *msg) { } } -static qstr pyb_config_source_dir = 0; -static qstr pyb_config_main = 0; +static mp_obj_t pyb_config_source_dir = MP_OBJ_NULL; +static mp_obj_t pyb_config_main = MP_OBJ_NULL; mp_obj_t pyb_source_dir(mp_obj_t source_dir) { - pyb_config_source_dir = mp_obj_get_qstr(source_dir); + if (MP_OBJ_IS_STR(source_dir)) { + pyb_config_source_dir = source_dir; + } return mp_const_none; } mp_obj_t pyb_main(mp_obj_t main) { - pyb_config_main = mp_obj_get_qstr(main); + if (MP_OBJ_IS_STR(main)) { + pyb_config_main = main; + } return mp_const_none; } @@ -482,7 +486,7 @@ mp_obj_t pyb_gc(void) { mp_obj_t pyb_gpio(uint n_args, mp_obj_t *args) { //assert(1 <= n_args && n_args <= 2); - const char *pin_name = qstr_str(mp_obj_get_qstr(args[0])); + const char *pin_name = mp_obj_str_get_str(args[0]); GPIO_TypeDef *port; switch (pin_name[0]) { case 'A': case 'a': port = GPIOA; break; @@ -630,21 +634,22 @@ void file_obj_print(void (*print)(void *env, const char *fmt, ...), void *env, m mp_obj_t file_obj_read(mp_obj_t self_in, mp_obj_t arg) { pyb_file_obj_t *self = self_in; int n = mp_obj_get_int(arg); - char *buf = m_new(char, n); + byte *buf = m_new(byte, n); UINT n_out; f_read(&self->fp, buf, n, &n_out); - return mp_obj_new_str(qstr_from_strn_take(buf, n, n_out)); + return mp_obj_new_str(buf, n_out, false); } mp_obj_t file_obj_write(mp_obj_t self_in, mp_obj_t arg) { pyb_file_obj_t *self = self_in; - const char *s = qstr_str(mp_obj_get_qstr(arg)); + uint l; + const byte *s = mp_obj_str_get_data(arg, &l); UINT n_out; - FRESULT res = f_write(&self->fp, s, strlen(s), &n_out); + FRESULT res = f_write(&self->fp, s, l, &n_out); if (res != FR_OK) { printf("File error: could not write to file; error code %d\n", res); - } else if (n_out != strlen(s)) { - printf("File error: could not write all data to file; wrote %d / %d bytes\n", n_out, strlen(s)); + } else if (n_out != l) { + printf("File error: could not write all data to file; wrote %d / %d bytes\n", n_out, l); } return mp_const_none; } @@ -676,8 +681,8 @@ static const mp_obj_type_t file_obj_type = { }; mp_obj_t pyb_io_open(mp_obj_t o_filename, mp_obj_t o_mode) { - const char *filename = qstr_str(mp_obj_get_qstr(o_filename)); - const char *mode = qstr_str(mp_obj_get_qstr(o_mode)); + const char *filename = mp_obj_str_get_str(o_filename); + const char *mode = mp_obj_str_get_str(o_mode); pyb_file_obj_t *self = m_new_obj(pyb_file_obj_t); self->base.type = &file_obj_type; if (mode[0] == 'r') { @@ -931,16 +936,16 @@ soft_reset: { vstr_t *vstr = vstr_new(); vstr_add_str(vstr, "0:/"); - if (pyb_config_source_dir == 0) { + if (pyb_config_source_dir == MP_OBJ_NULL) { vstr_add_str(vstr, "src"); } else { - vstr_add_str(vstr, qstr_str(pyb_config_source_dir)); + vstr_add_str(vstr, mp_obj_str_get_str(pyb_config_source_dir)); } vstr_add_char(vstr, '/'); - if (pyb_config_main == 0) { + if (pyb_config_main == MP_OBJ_NULL) { vstr_add_str(vstr, "main.py"); } else { - vstr_add_str(vstr, qstr_str(pyb_config_main)); + vstr_add_str(vstr, mp_obj_str_get_str(pyb_config_main)); } if (!do_file(vstr_str(vstr))) { flash_error(3); diff --git a/stm/string0.c b/stm/string0.c index d67c5f2b17..4899e7b0f5 100644 --- a/stm/string0.c +++ b/stm/string0.c @@ -34,7 +34,7 @@ void *memset(void *s, int c, size_t n) { return s; } -int strlen(const char *str) { +size_t strlen(const char *str) { int len = 0; for (const char *s = str; *s; s++) { len += 1; diff --git a/stm/usart.c b/stm/usart.c index c687cff05f..17ff146d5b 100644 --- a/stm/usart.c +++ b/stm/usart.c @@ -151,6 +151,12 @@ void usart_tx_str(pyb_usart_t usart_id, const char *str) { } } +void usart_tx_bytes(pyb_usart_t usart_id, const byte *data, uint len) { + for (; len > 0; data++, len--) { + usart_tx_char(usart_id, *data); + } +} + void usart_tx_strn_cooked(pyb_usart_t usart_id, const char *str, int len) { for (const char *top = str + len; str < top; str++) { if (*str == '\n') { @@ -201,8 +207,9 @@ static mp_obj_t usart_obj_tx_str(mp_obj_t self_in, mp_obj_t s) { pyb_usart_obj_t *self = self_in; if (self->is_enabled) { if (MP_OBJ_IS_TYPE(s, &str_type)) { - const char *str = qstr_str(mp_obj_get_qstr(s)); - usart_tx_str(self->usart_id, str); + uint len; + const byte *data = mp_obj_str_get_data(s, &len); + usart_tx_bytes(self->usart_id, data, len); } } return mp_const_none; diff --git a/unix-cpy/main.c b/unix-cpy/main.c index 0da1fca291..4bd924954f 100644 --- a/unix-cpy/main.c +++ b/unix-cpy/main.c @@ -5,6 +5,7 @@ #include "nlr.h" #include "misc.h" #include "mpconfig.h" +#include "qstr.h" #include "lexer.h" #include "lexerunix.h" #include "parse.h" diff --git a/unix/file.c b/unix/file.c index 3181c08b69..5249b5bba3 100644 --- a/unix/file.c +++ b/unix/file.c @@ -64,10 +64,10 @@ static mp_obj_t fdfile_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const return o; } - const char *fname = qstr_str(mp_obj_get_qstr(args[0])); + const char *fname = mp_obj_str_get_str(args[0]); const char *mode_s; if (n_args > 1) { - mode_s = qstr_str(mp_obj_get_qstr(args[1])); + mode_s = mp_obj_str_get_str(args[1]); } else { mode_s = "r"; } diff --git a/unix/main.c b/unix/main.c index facc250a78..c936106706 100644 --- a/unix/main.c +++ b/unix/main.c @@ -235,7 +235,7 @@ int main(int argc, char **argv) { // test_obj = TestClass() // test_obj.attr = 42 mp_obj_t test_class_type, test_class_instance; - test_class_type = mp_obj_new_type(QSTR_FROM_STR_STATIC("TestClass"), mp_const_empty_tuple, mp_obj_new_dict(0)); + test_class_type = mp_obj_new_type("TestClass", mp_const_empty_tuple, mp_obj_new_dict(0)); rt_store_name(QSTR_FROM_STR_STATIC("test_obj"), test_class_instance = rt_call_function_0(test_class_type)); rt_store_attr(test_class_instance, QSTR_FROM_STR_STATIC("attr"), mp_obj_new_int(42)); diff --git a/unix/socket.c b/unix/socket.c index ae87ba4656..36b848468c 100644 --- a/unix/socket.c +++ b/unix/socket.c @@ -153,8 +153,8 @@ static mp_obj_t socket_send(uint n_args, const mp_obj_t *args) { flags = MP_OBJ_SMALL_INT_VALUE(args[2]); } - const char *buf = qstr_str(mp_obj_str_get(args[1])); - int sz = strlen(buf); + uint sz; + const byte *buf = mp_obj_str_get_data(args[1], &sz); int out_sz = send(self->fd, buf, sz, flags); RAISE_ERRNO(out_sz, errno); @@ -225,7 +225,7 @@ static MP_DEFINE_CONST_FUN_OBJ_1(mod_socket_htons_obj, mod_socket_htons); static mp_obj_t mod_socket_inet_aton(mp_obj_t arg) { assert(MP_OBJ_IS_TYPE(arg, &str_type)); - const char *s = qstr_str(mp_obj_str_get(arg)); + const char *s = mp_obj_str_get_str(arg); struct in_addr addr; if (!inet_aton(s, &addr)) { nlr_jump(mp_obj_new_exception_msg(MP_QSTR_OSError, "Invalid IP address")); @@ -238,7 +238,7 @@ static MP_DEFINE_CONST_FUN_OBJ_1(mod_socket_inet_aton_obj, mod_socket_inet_aton) #if MICROPY_SOCKET_EXTRA static mp_obj_t mod_socket_gethostbyname(mp_obj_t arg) { assert(MP_OBJ_IS_TYPE(arg, &str_type)); - const char *s = qstr_str(mp_obj_str_get(arg)); + const char *s = mp_obj_str_get_str(arg); struct hostent *h = gethostbyname(s); if (h == NULL) { nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_OSError, "[Errno %d]", errno)); @@ -252,9 +252,9 @@ static MP_DEFINE_CONST_FUN_OBJ_1(mod_socket_gethostbyname_obj, mod_socket_gethos static mp_obj_t mod_socket_getaddrinfo(uint n_args, const mp_obj_t *args) { // TODO: Implement all args assert(n_args == 2); - assert(MP_OBJ_IS_TYPE(args[0], &str_type)); + assert(MP_OBJ_IS_STR(args[0])); - const char *host = qstr_str(mp_obj_str_get(args[0])); + const char *host = mp_obj_str_get_str(args[0]); const char *serv = NULL; // getaddrinfo accepts port in string notation, so however // it may seem stupid, we need to convert int to str @@ -264,7 +264,7 @@ static mp_obj_t mod_socket_getaddrinfo(uint n_args, const mp_obj_t *args) { sprintf(buf, "%d", port); serv = buf; } else { - serv = qstr_str(mp_obj_str_get(args[1])); + serv = mp_obj_str_get_str(args[1]); } struct addrinfo hints;