micropython/py/objstrunicode.c

281 lines
10 KiB
C

/*
* This file is part of the MicroPython project, http://micropython.org/
*
* The MIT License (MIT)
*
* Copyright (c) 2013, 2014 Damien P. George
* Copyright (c) 2014-2016 Paul Sokolovsky
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <string.h>
#include <assert.h>
#include "py/objstr.h"
#include "py/objlist.h"
#include "py/runtime.h"
#if MICROPY_PY_BUILTINS_STR_UNICODE
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf);
/******************************************************************************/
/* str */
STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint str_len) {
// this escapes characters, but it will be very slow to print (calling print many times)
bool has_single_quote = false;
bool has_double_quote = false;
for (const byte *s = str_data, *top = str_data + str_len; !has_double_quote && s < top; s++) {
if (*s == '\'') {
has_single_quote = true;
} else if (*s == '"') {
has_double_quote = true;
}
}
unichar quote_char = '\'';
if (has_single_quote && !has_double_quote) {
quote_char = '"';
}
mp_printf(print, "%c", quote_char);
const byte *s = str_data, *top = str_data + str_len;
while (s < top) {
unichar ch;
ch = utf8_get_char(s);
s = utf8_next_char(s);
if (ch == quote_char) {
mp_printf(print, "\\%c", quote_char);
} else if (ch == '\\') {
mp_print_str(print, "\\\\");
} else if (32 <= ch && ch <= 126) {
mp_printf(print, "%c", ch);
} else if (ch == '\n') {
mp_print_str(print, "\\n");
} else if (ch == '\r') {
mp_print_str(print, "\\r");
} else if (ch == '\t') {
mp_print_str(print, "\\t");
} else if (ch < 0x100) {
mp_printf(print, "\\x%02x", ch);
} else if (ch < 0x10000) {
mp_printf(print, "\\u%04x", ch);
} else {
mp_printf(print, "\\U%08x", ch);
}
}
mp_printf(print, "%c", quote_char);
}
STATIC void uni_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
GET_STR_DATA_LEN(self_in, str_data, str_len);
#if MICROPY_PY_JSON
if (kind == PRINT_JSON) {
mp_str_print_json(print, str_data, str_len);
return;
}
#endif
if (kind == PRINT_STR) {
print->print_strn(print->data, (const char *)str_data, str_len);
} else {
uni_print_quoted(print, str_data, str_len);
}
}
STATIC mp_obj_t uni_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
GET_STR_DATA_LEN(self_in, str_data, str_len);
switch (op) {
case MP_UNARY_OP_BOOL:
return mp_obj_new_bool(str_len != 0);
case MP_UNARY_OP_LEN:
return MP_OBJ_NEW_SMALL_INT(utf8_charlen(str_data, str_len));
default:
return MP_OBJ_NULL; // op not supported
}
}
// Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or
// be capped to the first/last character of the string, depending on is_slice.
const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len,
mp_obj_t index, bool is_slice) {
// All str functions also handle bytes objects, and they call str_index_to_ptr(),
// so it must handle bytes.
if (type == &mp_type_bytes
#if MICROPY_PY_BUILTINS_BYTEARRAY
|| type == &mp_type_bytearray
#endif
) {