Revert "Add PEP 393-flags to strings and stub usage."

This reverts commit c239f50952.
This commit is contained in:
Chris Angelico 2014-06-08 02:10:59 +10:00
parent f9bebb28ad
commit bc990dad9a
7 changed files with 68 additions and 88 deletions

View File

@ -499,9 +499,8 @@ STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vst
case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break;
case MP_PARSE_NODE_STRING:
case MP_PARSE_NODE_BYTES: {
uint len; char flags;
const byte *str = qstr_data(arg, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
uint len;
const byte *str = qstr_data(arg, &len);
cpython_c_print_quoted_str(vstr, (const char*)str, len, MP_PARSE_NODE_LEAF_KIND(pn) == MP_PARSE_NODE_BYTES);
break;
}
@ -1440,9 +1439,8 @@ void do_import_name(compiler_t *comp, mp_parse_node_t pn, qstr *q_base) {
if (i > 0) {
*str_dest++ = '.';
}
uint str_src_len; char str_src_flags;
const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len, &str_src_flags);
assert(str_src_flags == 1); //TODO: Support multibyte strings
uint str_src_len;
const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len);
memcpy(str_dest, str_src, str_src_len);
str_dest += str_src_len;
}
@ -1546,9 +1544,8 @@ void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) {
vstr_printf(vstr, ", ");
}
vstr_printf(vstr, "'");
uint len; char flags;
const byte *str = qstr_data(id2, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
uint len;
const byte *str = qstr_data(id2, &len);
vstr_add_strn(vstr, (const char*)str, len);
vstr_printf(vstr, "'");
}
@ -2544,9 +2541,8 @@ void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
byte *s_dest = qstr_build_start(n_bytes, &q_ptr);
for (int i = 0; i < n; i++) {
if (MP_PARSE_NODE_IS_LEAF(pns->nodes[i])) {
uint s_len; char s_flags;
const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len, &s_flags);
assert(s_flags == 1); //TODO: Support multibyte strings
uint s_len;
const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len);
memcpy(s_dest, s, s_len);
s_dest += s_len;
} else {

View File

@ -60,7 +60,7 @@ def do_work(infiles):
qhash = compute_hash(qstr)
qlen = len(qstr)
qchlen = len(qstr.decode("utf-8"))
print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\1" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qchlen & 0xff, (qchlen >> 8) & 0xff, qstr))
print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qchlen & 0xff, (qchlen >> 8) & 0xff, qstr))
return True

View File

@ -50,13 +50,10 @@ const mp_obj_t mp_const_empty_bytes;
#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }
// use this macro to extract the string data and length
#define GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_flags) const byte *str_data; uint str_len; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; }
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
// use this macro to extract the string data, lengths, and flags
#define GET_STR_INFO(str_obj_in, str_data, str_len, str_charlen, str_flags) const byte *str_data; uint str_len, str_charlen; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); str_charlen = qstr_charlen(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_charlen = ((mp_obj_str_t*)str_obj_in)->charlen; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; }
// don't use this macro, it's only for conversions
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_data ## _flags); assert(str_data ## _flags == 1);
// use this macro to extract the string data and both lengths
#define GET_STR_INFO(str_obj_in, str_data, str_len, str_charlen) const byte *str_data; uint str_len, str_charlen; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); str_charlen = qstr_charlen(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_charlen = ((mp_obj_str_t*)str_obj_in)->charlen; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
@ -101,32 +98,32 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e
print(env, "\\t");
} else if (*s == '\x7f') {
print(env, "\\x7f");
} else if (is_bytes) {
} else if (is_bytes) {
print(env, "\\x%02x", *s);
} else {
// Non-ASCII character. Decode UTF-8.
machine_int_t ord = *s++ & 0x7F;
machine_int_t ord = *s++ & 0x7F;
for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) {
ord &= ~mask;
}
while (UTF8_IS_CONT(*s)) {
ord = (ord << 6) | (*s++ & 0x3F);
}
--s; // s will be incremented by the main loop
if (ord < 0x100) {
ord &= ~mask;
}
while (UTF8_IS_CONT(*s)) {
ord = (ord << 6) | (*s++ & 0x3F);
}
--s; // s will be incremented by the main loop
if (ord < 0x100) {
print(env, "\\x%02x", ord);
} else if (ord < 0x10000) {
} else if (ord < 0x10000) {
print(env, "\\u%04x", ord);
} else {
} else {
print(env, "\\U%08x", ord);
}
}
}
}
print(env, "%c", quote_char);
}
STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
GET_STR_DATA_LEN_FLAGS(self_in, str_data, str_len, str_flags);
GET_STR_DATA_LEN(self_in, str_data, str_len);
bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes);
if (kind == PRINT_STR && !is_bytes) {
print(env, "%.*s", str_len, str_data);
@ -170,7 +167,6 @@ STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_
mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_str, NULL, str_len);
o->data = str_data;
o->hash = str_hash;
o->flags = 1;
return o;
}
@ -199,7 +195,6 @@ STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m
mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_bytes, NULL, str_len);
o->data = str_data;
o->hash = str_hash;
o->flags = 1;
return o;
}
@ -377,7 +372,7 @@ uncomparable:
STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
mp_obj_type_t *type = mp_obj_get_type(self_in);
GET_STR_INFO(self_in, self_data, self_len, self_charlen, self_flags);
GET_STR_INFO(self_in, self_data, self_len, self_charlen);
if (value == MP_OBJ_SENTINEL) {
// load
#if MICROPY_PY_BUILTINS_SLICE
@ -400,20 +395,20 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
const char *s;
for (s=(const char *)self_data; index_val; ++s) {
if (!UTF8_IS_CONT(*s)) {
--index_val;
}
}
// Skip continuation bytes after the last lead byte
--index_val;
}
}
// Skip continuation bytes after the last lead byte
while (UTF8_IS_CONT(*s)) {
++s;
}
++s;
}
int len = 1;
if (UTF8_IS_NONASCII(*s)) {
// Count the number of 1 bits (after the first)
// Count the number of 1 bits (after the first)
for (char mask = 0x40; *s & mask; mask >>= 1) {
++len;
}
}
++len;
}
}
return mp_obj_new_str(s, len, true); // This will create a one-character string
}
} else {
@ -1746,7 +1741,7 @@ const mp_obj_type_t mp_type_bytes = {
};
// the zero-length bytes
STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, 0, 1, NULL};
STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, 0, NULL};
const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj;
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
@ -1765,7 +1760,6 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
o->hash = qstr_compute_hash(o->data, o->len);
byte *p = (byte*)o->data;
p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
o->flags = 1;
return o;
}
@ -1773,7 +1767,6 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uin
mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
o->base.type = type;
o->len = len;
o->flags = 1;
if (data) {
if (MP_OBJ_IS_STR(o)) {
// Count non-continuation bytes so we know how long the string is in characters.
@ -1781,14 +1774,14 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uin
uint charlen = 0;
for (endptr = data; endptr < top; ++endptr) {
if (!UTF8_IS_CONT(*endptr)) {
++charlen;
}
}
++charlen;
}
}
o->charlen = charlen;
} else {
} else {
// For byte strings, the 'character' length (really the "exposed length" or "Python length") equals the byte length.
o->charlen = len;
}
}
o->hash = qstr_compute_hash(data, len);
byte *p = m_new(byte, len + 1);
o->data = p;
@ -1858,7 +1851,7 @@ uint mp_obj_str_get_hash(mp_obj_t self_in) {
uint mp_obj_str_get_len(mp_obj_t self_in) {
// TODO This has a double check for the type, one in obj.c and one here
if (MP_OBJ_IS_STR(self_in) || MP_OBJ_IS_TYPE(self_in, &mp_type_bytes)) {
GET_STR_INFO(self_in, self_data, self_len, self_charlen, self_flags); (void)self_data;
GET_STR_INFO(self_in, self_data, self_len, self_charlen); (void)self_data;
return self_charlen;
} else {
bad_implicit_conversion(self_in);
@ -1902,7 +1895,7 @@ const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len) {
const char *mp_obj_str_get_data_len(mp_obj_t self_in, uint *len, uint *charlen) {
if (is_str_or_bytes(self_in)) {
GET_STR_INFO(self_in, s, l, cl, f);
GET_STR_INFO(self_in, s, l, cl);
*len = l; *charlen = cl;
return (const char*)s;
} else {

View File

@ -32,12 +32,11 @@ typedef struct _mp_obj_str_t {
machine_uint_t len : 16;
// charlen == number of characters in the string - charlen <= len - 1, and is the value returned by len() in Python
machine_uint_t charlen : 16;
char flags; //Currently unused, always 1. Will later get markers eg ASCII-only.
const void *data; //Character data is encoded UTF-8 and should not be blindly indexed.
} mp_obj_str_t;
// This is valid ONLY for pure-ASCII strings!
#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, sizeof(str) - 1, 1, (const byte*)str};
#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, sizeof(str) - 1, (const byte*)str};
mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args);
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uint len);

View File

@ -47,16 +47,14 @@
// - hash is 2 bytes (see function below)
// - length is 2 bytes
// - character length is 2 bytes
// - flags byte
// - data follows
// - \0 terminated (for now, so they can be printed using printf)
#define Q_GET_HASH(q) ((q)[0] | ((q)[1] << 8))
#define Q_GET_ALLOC(q) (7 + Q_GET_LENGTH(q) + 1)
#define Q_GET_ALLOC(q) (6 + Q_GET_LENGTH(q) + 1)
#define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8))
#define Q_GET_CHARLEN(q) ((q)[4] | ((q)[5] << 8))
#define Q_GET_FLAGS(q) ((q)[6])
#define Q_GET_DATA(q) ((q) + 7)
#define Q_GET_DATA(q) ((q) + 6)
// this must match the equivalent function in makeqstrdata.py
// Note that this hashes the UTF-8 encoded data bytes.
@ -88,8 +86,8 @@ const static qstr_pool_t const_pool = {
10, // set so that the first dynamically allocated pool is twice this size; must be <= the len (just below)
MP_QSTR_number_of, // corresponds to number of strings in array just below
{
(const byte*) "\0\0\0\0\0\0\0", // invalid/no qstr has empty data
(const byte*) "\0\0\0\0\0\0\1", // empty qstr
(const byte*) "\0\0\0\0\0\0", // invalid/no qstr has empty data
(const byte*) "\0\0\0\0\0\0", // empty qstr
#define Q(id, str) str,
#include "genhdr/qstrdefs.generated.h"
#undef Q
@ -115,7 +113,7 @@ STATIC const byte *find_qstr(qstr q) {
}
STATIC qstr qstr_add(const byte *q_ptr) {
DEBUG_printf("QSTR: add hash=%d len=%d flags=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_FLAGS(q_ptr), Q_GET_DATA(q_ptr));
DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr));
// make sure we have room in the pool for a new qstr
if (last_pool->len >= last_pool->alloc) {
@ -160,22 +158,21 @@ qstr qstr_from_strn(const char *str, uint len) {
qstr q = qstr_find_strn(str, len);
if (q == 0) {
machine_uint_t hash = qstr_compute_hash((const byte*)str, len);
byte *q_ptr = m_new(byte, 7 + len + 1);
byte *q_ptr = m_new(byte, 6 + len + 1);
uint charlen = 0;
for (const char *s = str; s < str + len; ++s) {
if (!UTF8_IS_CONT(*s)) {
++charlen;
}
}
++charlen;
}
}
q_ptr[0] = hash;
q_ptr[1] = hash >> 8;
q_ptr[2] = len;
q_ptr[3] = len >> 8;
q_ptr[4] = charlen;
q_ptr[5] = charlen >> 8;
q_ptr[6] = 1;
memcpy(q_ptr + 7, str, len);
q_ptr[7 + len] = '\0';
memcpy(q_ptr + 6, str, len);
q_ptr[6 + len] = '\0';
q = qstr_add(q_ptr);
}
return q;
@ -200,13 +197,12 @@ qstr qstr_build_end(byte *q_ptr) {
uint charlen = 0;
for (const byte *s = str; s < str + len; ++s) {
if (!UTF8_IS_CONT(*s)) {
++charlen;
}
}
++charlen;
}
}
q_ptr[4] = charlen;
q_ptr[5] = charlen >> 8;
q_ptr[6] = 1;
q_ptr[7 + len] = '\0';
q_ptr[6 + len] = '\0';
q = qstr_add(q_ptr);
} else {
m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr));
@ -234,10 +230,9 @@ const char *qstr_str(qstr q) {
return (const char*)Q_GET_DATA(qd);
}
const byte *qstr_data(qstr q, uint *len, char *flags) {
const byte *qstr_data(qstr q, uint *len) {
const byte *qd = find_qstr(q);
*len = Q_GET_LENGTH(qd);
*flags = Q_GET_FLAGS(qd);
return Q_GET_DATA(qd);
}

View File

@ -60,6 +60,6 @@ machine_uint_t qstr_hash(qstr q);
const char* qstr_str(qstr q);
uint qstr_len(qstr q);
uint qstr_charlen(qstr q);
const byte* qstr_data(qstr q, uint *len, char *flags);
const byte* qstr_data(qstr q, uint *len);
void qstr_pool_info(uint *n_pool, uint *n_qstr, uint *n_str_data_bytes, uint *n_total_bytes);

View File

@ -101,17 +101,15 @@ void mp_deinit(void) {
mp_obj_t mp_load_const_int(qstr qstr) {
DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
uint len; char flags;
const byte* data = qstr_data(qstr, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
uint len;
const byte* data = qstr_data(qstr, &len);
return mp_parse_num_integer((const char*)data, len, 0);
}
mp_obj_t mp_load_const_dec(qstr qstr) {
DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
uint len; char flags;
const byte* data = qstr_data(qstr, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
uint len;
const byte* data = qstr_data(qstr, &len);
return mp_parse_num_decimal((const char*)data, len, true, false);
}
@ -122,9 +120,8 @@ mp_obj_t mp_load_const_str(qstr qstr) {
mp_obj_t mp_load_const_bytes(qstr qstr) {
DEBUG_OP_printf("load b'%s'\n", qstr_str(qstr));
uint len; char flags;
const byte* data = qstr_data(qstr, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
uint len;
const byte *data = qstr_data(qstr, &len);
return mp_obj_new_bytes(data, len);
}