diff --git a/py/obj.h b/py/obj.h index d62bc7b341..1f5a83f919 100644 --- a/py/obj.h +++ b/py/obj.h @@ -469,7 +469,7 @@ qstr mp_obj_str_get_qstr(mp_obj_t self_in); // use this if you will anyway conve const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len); mp_obj_t mp_obj_str_intern(mp_obj_t str); -void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len); +void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len, bool is_bytes); #if MICROPY_PY_BUILTINS_FLOAT // float diff --git a/py/objarray.c b/py/objarray.c index edf3ee8121..05821e8de4 100644 --- a/py/objarray.c +++ b/py/objarray.c @@ -58,7 +58,7 @@ STATIC void array_print(void (*print)(void *env, const char *fmt, ...), void *en mp_obj_array_t *o = o_in; if (o->typecode == BYTEARRAY_TYPECODE) { print(env, "bytearray(b", o->typecode); - mp_str_print_quoted(print, env, o->items, o->len); + mp_str_print_quoted(print, env, o->items, o->len, true); } else { print(env, "array('%c'", o->typecode); if (o->len > 0) { diff --git a/py/objstr.c b/py/objstr.c index 6656090c84..f9cc273447 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -64,7 +64,8 @@ STATIC bool is_str_or_bytes(mp_obj_t o) { /******************************************************************************/ /* str */ -void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len) { +void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, + const byte *str_data, uint str_len, bool is_bytes) { // this escapes characters, but it will be very slow to print (calling print many times) bool has_single_quote = false; bool has_double_quote = false; @@ -85,7 +86,10 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e print(env, "\\%c", quote_char); } else if (*s == '\\') { print(env, "\\\\"); - } else if (32 <= *s && *s <= 126) { + } else if (*s >= 0x20 && *s != 0x7f && (!is_bytes || *s < 0x80)) { + // In strings, anything which is not ascii control character + // is printed as is, this includes characters in range 0x80-0xff + // (which can be non-Latin letters, etc.) print(env, "%c", *s); } else if (*s == '\n') { print(env, "\\n"); @@ -109,7 +113,7 @@ STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, if (is_bytes) { print(env, "b"); } - mp_str_print_quoted(print, env, str_data, str_len); + mp_str_print_quoted(print, env, str_data, str_len, is_bytes); } } diff --git a/tests/basics/string-repr.py b/tests/basics/string-repr.py index 34da483a57..2a3ef2527c 100644 --- a/tests/basics/string-repr.py +++ b/tests/basics/string-repr.py @@ -1,3 +1,4 @@ # anything above 0xa0 is printed as Unicode by CPython -for c in range(0xa1): +# the abobe is CPython implementation detail, stick to ASCII +for c in range(0x80): print("0x%02x: %s" % (c, repr(chr(c))))