diff --git a/py/builtin.c b/py/builtin.c index 8621b0b003..8ee9054b69 100644 --- a/py/builtin.c +++ b/py/builtin.c @@ -340,14 +340,23 @@ STATIC mp_obj_t mp_builtin_oct(mp_obj_t o_in) { MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_oct_obj, mp_builtin_oct); STATIC mp_obj_t mp_builtin_ord(mp_obj_t o_in) { - uint len; - const char *str = mp_obj_str_get_data(o_in, &len); - if (len == 1) { - // don't sign extend when converting to ord - // TODO unicode - return mp_obj_new_int(((const byte*)str)[0]); + uint len, charlen; + const char *str = mp_obj_str_get_data_len(o_in, &len, &charlen); + if (charlen == 1) { + if (MP_OBJ_IS_STR(o_in) && (*str & 0x80)) { + machine_int_t ord = *str++ & 0x7F; + for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) { + ord &= ~mask; + } + while ((*str & 0xC0) == 0x80) { + ord = (ord << 6) | (*str++ & 0x3F); + } + return mp_obj_new_int(ord); + } else { + return mp_obj_new_int(((const byte*)str)[0]); + } } else { - nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "ord() expected a character, but string of length %d found", len)); + nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "ord() expected a character, but string of length %d found", charlen)); } } diff --git a/py/obj.h b/py/obj.h index aa78b2a22d..06efad0888 100644 --- a/py/obj.h +++ b/py/obj.h @@ -468,6 +468,7 @@ uint mp_obj_str_get_len(mp_obj_t self_in); qstr mp_obj_str_get_qstr(mp_obj_t self_in); // use this if you will anyway convert the string to a qstr const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len); +const char *mp_obj_str_get_data_len(mp_obj_t self_in, uint *len, uint *charlen); void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len); #if MICROPY_PY_BUILTINS_FLOAT diff --git a/py/objstr.c b/py/objstr.c index 857028c593..e40ba0cc5b 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -53,7 +53,6 @@ const mp_obj_t mp_const_empty_bytes; #define GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_flags) const byte *str_data; uint str_len; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; } // use this macro to extract the string data, lengths, and flags -// NOTE: Currently buggy as regards qstr, which doesn't record a charlen #define GET_STR_INFO(str_obj_in, str_data, str_len, str_charlen, str_flags) const byte *str_data; uint str_len, str_charlen; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); str_charlen = qstr_charlen(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_charlen = ((mp_obj_str_t*)str_obj_in)->charlen; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; } // don't use this macro, it's only for conversions @@ -1864,6 +1863,16 @@ const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len) { } } +const char *mp_obj_str_get_data_len(mp_obj_t self_in, uint *len, uint *charlen) { + if (is_str_or_bytes(self_in)) { + GET_STR_INFO(self_in, s, l, cl, f); + *len = l; *charlen = cl; + return (const char*)s; + } else { + bad_implicit_conversion(self_in); + } +} + /******************************************************************************/ /* str iterator */