Make ord() Unicode-aware
This commit is contained in:
parent
03f0cbe905
commit
bb13212071
23
py/builtin.c
23
py/builtin.c
|
@ -340,14 +340,23 @@ STATIC mp_obj_t mp_builtin_oct(mp_obj_t o_in) {
|
||||||
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_oct_obj, mp_builtin_oct);
|
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_oct_obj, mp_builtin_oct);
|
||||||
|
|
||||||
STATIC mp_obj_t mp_builtin_ord(mp_obj_t o_in) {
|
STATIC mp_obj_t mp_builtin_ord(mp_obj_t o_in) {
|
||||||
uint len;
|
uint len, charlen;
|
||||||
const char *str = mp_obj_str_get_data(o_in, &len);
|
const char *str = mp_obj_str_get_data_len(o_in, &len, &charlen);
|
||||||
if (len == 1) {
|
if (charlen == 1) {
|
||||||
// don't sign extend when converting to ord
|
if (MP_OBJ_IS_STR(o_in) && (*str & 0x80)) {
|
||||||
// TODO unicode
|
machine_int_t ord = *str++ & 0x7F;
|
||||||
return mp_obj_new_int(((const byte*)str)[0]);
|
for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) {
|
||||||
|
ord &= ~mask;
|
||||||
|
}
|
||||||
|
while ((*str & 0xC0) == 0x80) {
|
||||||
|
ord = (ord << 6) | (*str++ & 0x3F);
|
||||||
|
}
|
||||||
|
return mp_obj_new_int(ord);
|
||||||
|
} else {
|
||||||
|
return mp_obj_new_int(((const byte*)str)[0]);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "ord() expected a character, but string of length %d found", len));
|
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "ord() expected a character, but string of length %d found", charlen));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
1
py/obj.h
1
py/obj.h
|
@ -468,6 +468,7 @@ uint mp_obj_str_get_len(mp_obj_t self_in);
|
||||||
qstr mp_obj_str_get_qstr(mp_obj_t self_in); // use this if you will anyway convert the string to a qstr
|
qstr mp_obj_str_get_qstr(mp_obj_t self_in); // use this if you will anyway convert the string to a qstr
|
||||||
const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated
|
const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated
|
||||||
const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len);
|
const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len);
|
||||||
|
const char *mp_obj_str_get_data_len(mp_obj_t self_in, uint *len, uint *charlen);
|
||||||
void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len);
|
void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len);
|
||||||
|
|
||||||
#if MICROPY_PY_BUILTINS_FLOAT
|
#if MICROPY_PY_BUILTINS_FLOAT
|
||||||
|
|
11
py/objstr.c
11
py/objstr.c
|
@ -53,7 +53,6 @@ const mp_obj_t mp_const_empty_bytes;
|
||||||
#define GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_flags) const byte *str_data; uint str_len; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; }
|
#define GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_flags) const byte *str_data; uint str_len; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; }
|
||||||
|
|
||||||
// use this macro to extract the string data, lengths, and flags
|
// use this macro to extract the string data, lengths, and flags
|
||||||
// NOTE: Currently buggy as regards qstr, which doesn't record a charlen
|
|
||||||
#define GET_STR_INFO(str_obj_in, str_data, str_len, str_charlen, str_flags) const byte *str_data; uint str_len, str_charlen; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); str_charlen = qstr_charlen(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_charlen = ((mp_obj_str_t*)str_obj_in)->charlen; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; }
|
#define GET_STR_INFO(str_obj_in, str_data, str_len, str_charlen, str_flags) const byte *str_data; uint str_len, str_charlen; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); str_charlen = qstr_charlen(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_charlen = ((mp_obj_str_t*)str_obj_in)->charlen; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; }
|
||||||
|
|
||||||
// don't use this macro, it's only for conversions
|
// don't use this macro, it's only for conversions
|
||||||
|
@ -1864,6 +1863,16 @@ const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *mp_obj_str_get_data_len(mp_obj_t self_in, uint *len, uint *charlen) {
|
||||||
|
if (is_str_or_bytes(self_in)) {
|
||||||
|
GET_STR_INFO(self_in, s, l, cl, f);
|
||||||
|
*len = l; *charlen = cl;
|
||||||
|
return (const char*)s;
|
||||||
|
} else {
|
||||||
|
bad_implicit_conversion(self_in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/* str iterator */
|
/* str iterator */
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue