From baf6f14deb567ab626c1b05213af346108f41700 Mon Sep 17 00:00:00 2001 From: Dave Hylands Date: Sun, 30 Mar 2014 21:06:50 -0700 Subject: [PATCH] Enhance str.format support This adds support for almost everything (the comma isn't currently supported). The "unspecified" type with floats also doesn't behave exactly like python. Tested under unix with float and double Spot tested on stmhal --- py/formatfloat.c | 1 + py/obj.c | 4 + py/objstr.c | 384 +++++++++++++++++++++++++++++++++- py/pfenv.c | 208 ++++++++++++++++++ py/pfenv.h | 23 ++ py/py.mk | 1 + stmhal/printf.c | 150 +++---------- tests/basics/string-format.py | 146 ++++++++++++- 8 files changed, 776 insertions(+), 141 deletions(-) create mode 100644 py/pfenv.c create mode 100644 py/pfenv.h diff --git a/py/formatfloat.c b/py/formatfloat.c index 83aee1ef74..b1c62fa6d5 100644 --- a/py/formatfloat.c +++ b/py/formatfloat.c @@ -14,6 +14,7 @@ ***********************************************************************/ #include +#include #include "mpconfig.h" diff --git a/py/obj.c b/py/obj.c index 95052d16d2..43863667ca 100644 --- a/py/obj.c +++ b/py/obj.c @@ -198,6 +198,10 @@ machine_int_t mp_obj_get_int(mp_obj_t arg) { return MP_OBJ_SMALL_INT_VALUE(arg); } else if (MP_OBJ_IS_TYPE(arg, &mp_type_int)) { return mp_obj_int_get_checked(arg); +#if MICROPY_ENABLE_FLOAT + } else if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) { + return mp_obj_float_get(arg); +#endif } else { nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "can't convert %s to int", mp_obj_get_type_str(arg))); } diff --git a/py/objstr.c b/py/objstr.c index 936542b0e9..8389bb0bdf 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -9,6 +9,7 @@ #include "obj.h" #include "runtime0.h" #include "runtime.h" +#include "pfenv.h" typedef struct _mp_obj_str_t { mp_obj_base_t base; @@ -492,28 +493,389 @@ STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false); } +// Takes an int arg, but only parses unsigned numbers, and only changes +// *num if at least one digit was parsed. +static int str_to_int(const char *str, int *num) { + const char *s = str; + if (unichar_isdigit(*s)) { + *num = 0; + do { + *num = *num * 10 + (*s - '0'); + s++; + } + while (unichar_isdigit(*s)); + } + return s - str; +} + +static bool isalignment(char ch) { + return ch && strchr("<>=^", ch) != NULL; +} + +static bool istype(char ch) { + return ch && strchr("bcdeEfFgGnosxX%", ch) != NULL; +} + +static bool arg_looks_integer(mp_obj_t arg) { + return MP_OBJ_IS_TYPE(arg, &mp_type_bool) || MP_OBJ_IS_INT(arg); +} + +static bool arg_looks_numeric(mp_obj_t arg) { + return arg_looks_integer(arg) +#if MICROPY_ENABLE_FLOAT + || MP_OBJ_IS_TYPE(arg, &mp_type_float) +#endif + ; +} + mp_obj_t str_format(uint n_args, const mp_obj_t *args) { assert(MP_OBJ_IS_STR(args[0])); GET_STR_DATA_LEN(args[0], str, len); - int arg_i = 1; + int arg_i = 0; vstr_t *vstr = vstr_new(); + pfenv_t pfenv_vstr; + pfenv_vstr.data = vstr; + pfenv_vstr.print_strn = pfenv_vstr_add_strn; + for (const byte *top = str + len; str < top; str++) { - if (*str == '{') { + if (*str == '}') { str++; - if (str < top && *str == '{') { - vstr_add_char(vstr, '{'); + if (str < top && *str == '}') { + vstr_add_char(vstr, '}'); + continue; + } + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "Single '}' encountered in format string")); + } + if (*str != '{') { + vstr_add_char(vstr, *str); + continue; + } + + str++; + if (str < top && *str == '{') { + vstr_add_char(vstr, '{'); + continue; + } + + // replacement_field ::= "{" [field_name] ["!" conversion] [":" format_spec] "}" + + vstr_t *field_name = NULL; + char conversion = '\0'; + vstr_t *format_spec = NULL; + + if (str < top && *str != '}' && *str != '!' && *str != ':') { + field_name = vstr_new(); + while (str < top && *str != '}' && *str != '!' && *str != ':') { + vstr_add_char(field_name, *str++); + } + vstr_add_char(field_name, '\0'); + } + + // conversion ::= "r" | "s" + + if (str < top && *str == '!') { + str++; + if (str < top && (*str == 'r' || *str == 's')) { + conversion = *str++; } else { - while (str < top && *str != '}') str++; - if (arg_i >= n_args) { - nlr_jump(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range")); + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "end of format while looking for conversion specifier")); + } + } + + if (str < top && *str == ':') { + str++; + // {:} is the same as {}, which is the same as {!s} + // This makes a difference when passing in a True or False + // '{}'.format(True) returns 'True' + // '{:d}'.format(True) returns '1' + // So we treat {:} as {} and this later gets treated to be {!s} + if (*str != '}') { + format_spec = vstr_new(); + while (str < top && *str != '}') { + vstr_add_char(format_spec, *str++); } - // TODO: may be PRINT_REPR depending on formatting code - mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[arg_i], PRINT_STR); - arg_i++; + vstr_add_char(format_spec, '\0'); + } + } + if (str >= top) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "unmatched '{' in format")); + } + if (*str != '}') { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "expected ':' after format specifier")); + } + + mp_obj_t arg = mp_const_none; + + if (field_name) { + if (arg_i > 0) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from automatic field numbering to manual field specification")); + } + int index; + if (str_to_int(vstr_str(field_name), &index) != vstr_len(field_name) - 1) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_KeyError, "attributes not supported yet")); + } + if (index >= n_args - 1) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range")); + } + arg = args[index + 1]; + arg_i = -1; + vstr_free(field_name); + field_name = NULL; + } else { + if (arg_i < 0) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from manual field specification to automatic field numbering")); + } + if (arg_i >= n_args - 1) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range")); + } + arg = args[arg_i + 1]; + arg_i++; + } + if (!format_spec && !conversion) { + conversion = 's'; + } + if (conversion) { + mp_print_kind_t print_kind; + if (conversion == 's') { + print_kind = PRINT_STR; + } else if (conversion == 'r') { + print_kind = PRINT_REPR; + } else { + nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "Unknown conversion specifier %c", conversion)); + } + vstr_t *arg_vstr = vstr_new(); + mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, arg_vstr, arg, print_kind); + arg = mp_obj_new_str((const byte *)vstr_str(arg_vstr), vstr_len(arg_vstr), false); + vstr_free(arg_vstr); + } + + char sign = '\0'; + char fill = '\0'; + char align = '\0'; + int width = -1; + int precision = -1; + char type = '\0'; + int flags = 0; + + if (format_spec) { + // The format specifier (from http://docs.python.org/2/library/string.html#formatspec) + // + // [[fill]align][sign][#][0][width][,][.precision][type] + // fill ::= + // align ::= "<" | ">" | "=" | "^" + // sign ::= "+" | "-" | " " + // width ::= integer + // precision ::= integer + // type ::= "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%" + + const char *s = vstr_str(format_spec); + if (isalignment(*s)) { + align = *s++; + } else if (*s && isalignment(s[1])) { + fill = *s++; + align = *s++; + } + if (*s == '+' || *s == '-' || *s == ' ') { + if (*s == '+') { + flags |= PF_FLAG_SHOW_SIGN; + } else if (*s == ' ') { + flags |= PF_FLAG_SPACE_SIGN; + } + sign = *s++; + } + if (*s == '#') { + flags |= PF_FLAG_SHOW_PREFIX; + s++; + } + if (*s == '0') { + if (!align) { + align = '='; + } + if (!fill) { + fill = '0'; + } + } + s += str_to_int(s, &width); + if (*s == ',') { + flags |= PF_FLAG_SHOW_COMMA; + s++; + } + if (*s == '.') { + s++; + s += str_to_int(s, &precision); + } + if (istype(*s)) { + type = *s++; + } + if (*s) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_KeyError, "Invalid conversion specification")); + } + vstr_free(format_spec); + format_spec = NULL; + } + if (!align) { + if (arg_looks_numeric(arg)) { + align = '>'; + } else { + align = '<'; + } + } + if (!fill) { + fill = ' '; + } + + if (sign) { + if (type == 's') { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed in string format specifier")); + } + if (type == 'c') { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed with integer format specifier 'c'")); } } else { - vstr_add_char(vstr, *str); + sign = '-'; + } + + switch (align) { + case '<': flags |= PF_FLAG_LEFT_ADJUST; break; + case '=': flags |= PF_FLAG_PAD_AFTER_SIGN; break; + case '^': flags |= PF_FLAG_CENTER_ADJUST; break; + } + + if (arg_looks_integer(arg)) { + switch (type) { + case 'b': + pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 2, 'a', flags, fill, width); + continue; + + case 'c': + { + char ch = mp_obj_get_int(arg); + pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, fill, width); + continue; + } + + case '\0': // No explicit format type implies 'd' + case 'n': // I don't think we support locales in uPy so use 'd' + case 'd': + pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 10, 'a', flags, fill, width); + continue; + + case 'o': + pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 8, 'a', flags, fill, width); + continue; + + case 'x': + pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'a', flags, fill, width); + continue; + + case 'X': + pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'A', flags, fill, width); + continue; + + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case '%': + // The floating point formatters all work with anything that + // looks like an integer + break; + + default: + nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, + "Unknown format code '%c' for object of type '%s'", type, mp_obj_get_type_str(arg))); + } + } +#if MICROPY_ENABLE_FLOAT + if (arg_looks_numeric(arg)) { + if (!type) { + + // Even though the docs say that an unspecified type is the same + // as 'g', there is one subtle difference, when the exponent + // is one less than the precision. + // + // '{:10.1}'.format(0.0) ==> '0e+00' + // '{:10.1g}'.format(0.0) ==> '0' + // + // TODO: Figure out how to deal with this. + // + // A proper solution would involve adding a special flag + // or something to format_float, and create a format_double + // to deal with doubles. In order to fix this when using + // sprintf, we'd need to use the e format and tweak the + // returned result to strip trailing zeros like the g format + // does. + // + // {:10.3} and {:10.2e} with 1.23e2 both produce 1.23e+02 + // but with 1.e2 you get 1e+02 and 1.00e+02 + // + // Stripping the trailing 0's (like g) does would make the + // e format give us the right format. + // + // CPython sources say: + // Omitted type specifier. Behaves in the same way as repr(x) + // and str(x) if no precision is given, else like 'g', but with + // at least one digit after the decimal point. */ + + type = 'g'; + } + if (type == 'n') { + type = 'g'; + } + + flags |= PF_FLAG_PAD_NAN_INF; // '{:06e}'.format(float('-inf')) should give '-00inf' + switch (type) { + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), type, flags, fill, width, precision); + break; + + case '%': + flags |= PF_FLAG_ADD_PERCENT; + pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg) * 100.0F, 'f', flags, fill, width, precision); + break; + + default: + nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, + "Unknown format code '%c' for object of type 'float'", + type, mp_obj_get_type_str(arg))); + } +#endif + } else { + if (align == '=') { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "'=' alignment not allowed in string format specifier")); + } + switch (type) { + case '\0': + mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, arg, PRINT_STR); + break; + + case 's': + { + uint len; + const char *s = mp_obj_str_get_data(arg, &len); + if (precision < 0) { + precision = len; + } + if (len > precision) { + len = precision; + } + pfenv_print_strn(&pfenv_vstr, s, len, flags, fill, width); + break; + } + + default: + nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, + "Unknown format code '%c' for object of type 'str'", + type, mp_obj_get_type_str(arg))); + } } } diff --git a/py/pfenv.c b/py/pfenv.c new file mode 100644 index 0000000000..07f35c335c --- /dev/null +++ b/py/pfenv.c @@ -0,0 +1,208 @@ +#include +#include + +///#include "std.h" +#include "misc.h" +#include "mpconfig.h" +#include "qstr.h" +#include "obj.h" +#include "pfenv.h" + +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE +#include +#endif + +#if MICROPY_ENABLE_FLOAT +#include "formatfloat.h" +#endif + +#define PF_PAD_SIZE 16 +static const char *pad_spaces = " "; +static const char *pad_zeroes = "0000000000000000"; + +void pfenv_vstr_add_strn(void *data, const char *str, unsigned int len){ + vstr_add_strn(data, str, len); +} + +int pfenv_print_strn(const pfenv_t *pfenv, const char *str, unsigned int len, int flags, char fill, int width) { + int left_pad = 0; + int right_pad = 0; + int pad = width - len; + char pad_fill[PF_PAD_SIZE]; + const char *pad_chars; + + if (!fill || fill == ' ' ) { + pad_chars = pad_spaces; + } else if (fill == '0') { + pad_chars = pad_zeroes; + } else { + memset(pad_fill, fill, PF_PAD_SIZE); + pad_chars = pad_fill; + } + + if (flags & PF_FLAG_CENTER_ADJUST) { + left_pad = pad / 2; + right_pad = pad - left_pad; + } else if (flags & PF_FLAG_LEFT_ADJUST) { + right_pad = pad; + } else { + left_pad = pad; + } + + if (left_pad) { + while (left_pad > 0) { + int p = left_pad; + if (p > PF_PAD_SIZE) + p = PF_PAD_SIZE; + pfenv->print_strn(pfenv->data, pad_chars, p); + left_pad -= p; + } + } + pfenv->print_strn(pfenv->data, str, len); + if (right_pad) { + while (right_pad > 0) { + int p = right_pad; + if (p > PF_PAD_SIZE) + p = PF_PAD_SIZE; + pfenv->print_strn(pfenv->data, pad_chars, p); + right_pad -= p; + } + } + return len; +} + +// enough room for 32 signed number +#define INT_BUF_SIZE (16) + +int pfenv_print_int(const pfenv_t *pfenv, unsigned int x, int sgn, int base, int base_char, int flags, char fill, int width) { + char sign = 0; + if (sgn) { + if ((int)x < 0) { + sign = '-'; + x = -x; + } else if (flags & PF_FLAG_SHOW_SIGN) { + sign = '+'; + } else if (flags & PF_FLAG_SPACE_SIGN) { + sign = ' '; + } + } + + char buf[INT_BUF_SIZE]; + char *b = buf + INT_BUF_SIZE; + + if (x == 0) { + *(--b) = '0'; + } else { + do { + int c = x % base; + x /= base; + if (c >= 10) { + c += base_char - 10; + } else { + c += '0'; + } + *(--b) = c; + } while (b > buf && x != 0); + } + + char prefix_char = '\0'; + + if (flags & PF_FLAG_SHOW_PREFIX) { + if (base == 2) { + prefix_char = base_char + 'b' - 'a'; + } else if (base == 8) { + prefix_char = base_char + 'o' - 'a'; + } else if (base == 16) { + prefix_char = base_char + 'x' - 'a'; + } + } + + int len = 0; + if (flags & PF_FLAG_PAD_AFTER_SIGN) { + if (sign) { + len += pfenv_print_strn(pfenv, &sign, 1, flags, fill, 1); + width--; + } + if (prefix_char) { + len += pfenv_print_strn(pfenv, "0", 1, flags, fill, 1); + len += pfenv_print_strn(pfenv, &prefix_char, 1, flags, fill, 1); + width -= 2; + } + } else { + if (prefix_char && b > &buf[1]) { + *(--b) = prefix_char; + *(--b) = '0'; + } + if (sign && b > buf) { + *(--b) = sign; + } + } + + len += pfenv_print_strn(pfenv, b, buf + INT_BUF_SIZE - b, flags, fill, width); + return len; +} + +#if MICROPY_ENABLE_FLOAT +int pfenv_print_float(const pfenv_t *pfenv, mp_float_t f, char fmt, int flags, char fill, int width, int prec) { + char buf[32]; + char sign = '\0'; + int chrs = 0; + + if (flags & PF_FLAG_SHOW_SIGN) { + sign = '+'; + } + else + if (flags & PF_FLAG_SPACE_SIGN) { + sign = ' '; + } + int len; +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT + len = format_float(f, buf, sizeof(buf), fmt, prec, sign); +#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE + char fmt_buf[6]; + char *fmt_s = fmt_buf; + + *fmt_s++ = '%'; + if (sign) { + *fmt_s++ = sign; + } + *fmt_s++ = '.'; + *fmt_s++ = '*'; + *fmt_s++ = fmt; + *fmt_s = '\0'; + + len = snprintf(buf, sizeof(buf), fmt_buf, prec, f); +#else +#error Unknown MICROPY FLOAT IMPL +#endif + char *s = buf; + + if ((flags & PF_FLAG_ADD_PERCENT) && (len + 1) < sizeof(buf)) { + buf[len++] = '%'; + buf[len] = '\0'; + } + + // buf[0] < '0' returns true if the first character is space, + or - + if ((flags & PF_FLAG_PAD_AFTER_SIGN) && buf[0] < '0') { + // We have a sign character + s++; + if (*s <= '9' || (flags & PF_FLAG_PAD_NAN_INF)) { + // We have a number, or we have a inf/nan and PAD_NAN_INF is set + // With '{:06e}'.format(float('-inf')) you get '-00inf' + chrs += pfenv_print_strn(pfenv, &buf[0], 1, 0, 0, 1); + width--; + len--; + } + } + + if (*s > 'A' && (flags & PF_FLAG_PAD_NAN_INF) == 0) { + // We have one of the inf or nan variants, suppress zero fill. + // With printf, if you use: printf("%06e", -inf) then you get " -inf" + // so suppress the zero fill. + fill = ' '; + } + chrs += pfenv_print_strn(pfenv, s, len, flags, fill, width); + + return chrs; +} +#endif diff --git a/py/pfenv.h b/py/pfenv.h new file mode 100644 index 0000000000..edceaf3e4e --- /dev/null +++ b/py/pfenv.h @@ -0,0 +1,23 @@ +#define PF_FLAG_LEFT_ADJUST (0x001) +#define PF_FLAG_SHOW_SIGN (0x002) +#define PF_FLAG_SPACE_SIGN (0x004) +#define PF_FLAG_NO_TRAILZ (0x008) +#define PF_FLAG_SHOW_PREFIX (0x010) +#define PF_FLAG_SHOW_COMMA (0x020) +#define PF_FLAG_PAD_AFTER_SIGN (0x040) +#define PF_FLAG_CENTER_ADJUST (0x080) +#define PF_FLAG_ADD_PERCENT (0x100) +#define PF_FLAG_PAD_NAN_INF (0x200) + +typedef struct _pfenv_t { + void *data; + void (*print_strn)(void *, const char *str, unsigned int len); +} pfenv_t; + +void pfenv_vstr_add_strn(void *data, const char *str, unsigned int len); + +int pfenv_print_strn(const pfenv_t *pfenv, const char *str, unsigned int len, int flags, char fill, int width); +int pfenv_print_int(const pfenv_t *pfenv, unsigned int x, int sgn, int base, int base_char, int flags, char fill, int width); +#if MICROPY_ENABLE_FLOAT +int pfenv_print_float(const pfenv_t *pfenv, mp_float_t f, char fmt, int flags, char fill, int width, int prec); +#endif diff --git a/py/py.mk b/py/py.mk index 3905aa77df..e2e83eb36a 100644 --- a/py/py.mk +++ b/py/py.mk @@ -84,6 +84,7 @@ PY_O_BASENAME = \ showbc.o \ repl.o \ intdivmod.o \ + pfenv.o \ # prepend the build destination prefix to the py object files PY_O = $(addprefix $(PY_BUILD)/, $(PY_O_BASENAME)) diff --git a/stmhal/printf.c b/stmhal/printf.c index 6fd06508ea..b1eace28de 100644 --- a/stmhal/printf.c +++ b/stmhal/printf.c @@ -8,6 +8,7 @@ #include "mpconfig.h" #include "qstr.h" #include "obj.h" +#include "pfenv.h" #if 0 #include "lcd.h" #endif @@ -18,89 +19,6 @@ #include "formatfloat.h" #endif -#define PF_FLAG_LEFT_ADJUST (0x01) -#define PF_FLAG_SHOW_SIGN (0x02) -#define PF_FLAG_SPACE_SIGN (0x04) -#define PF_FLAG_NO_TRAILZ (0x08) -#define PF_FLAG_ZERO_PAD (0x10) - -// tricky; we compute pad string by: pad_chars + (flags & PF_FLAG_ZERO_PAD) -#define PF_PAD_SIZE PF_FLAG_ZERO_PAD -static const char *pad_chars = " 0000000000000000"; - -typedef struct _pfenv_t { - void *data; - void (*print_strn)(void *, const char *str, unsigned int len); -} pfenv_t; - -static void print_str_dummy(void *data, const char *str, unsigned int len) { -} - -const pfenv_t pfenv_dummy = {0, print_str_dummy}; - -static int pfenv_print_strn(const pfenv_t *pfenv, const char *str, unsigned int len, int flags, int width) { - int pad = width - len; - if (pad > 0 && (flags & PF_FLAG_LEFT_ADJUST) == 0) { - while (pad > 0) { - int p = pad; - if (p > PF_PAD_SIZE) - p = PF_PAD_SIZE; - pfenv->print_strn(pfenv->data, pad_chars + (flags & PF_FLAG_ZERO_PAD), p); - pad -= p; - } - } - pfenv->print_strn(pfenv->data, str, len); - while (pad > 0) { - int p = pad; - if (p > PF_PAD_SIZE) - p = PF_PAD_SIZE; - pfenv->print_strn(pfenv->data, pad_chars, p); - pad -= p; - } - return len; -} - -// enough room for 32 signed number -#define INT_BUF_SIZE (12) - -static int pfenv_print_int(const pfenv_t *pfenv, unsigned int x, int sgn, int base, int base_char, int flags, int width) { - char sign = 0; - if (sgn) { - if ((int)x < 0) { - sign = '-'; - x = -x; - } else if (flags & PF_FLAG_SHOW_SIGN) { - sign = '+'; - } else if (flags & PF_FLAG_SPACE_SIGN) { - sign = ' '; - } - } - - char buf[INT_BUF_SIZE]; - char *b = buf + INT_BUF_SIZE; - - if (x == 0) { - *(--b) = '0'; - } else { - do { - int c = x % base; - x /= base; - if (c >= 10) { - c += base_char - 10; - } else { - c += '0'; - } - *(--b) = c; - } while (b > buf && x != 0); - } - - if (b > buf && sign != 0) { - *(--b) = sign; - } - - return pfenv_print_strn(pfenv, b, buf + INT_BUF_SIZE - b, flags, width); -} - void pfenv_prints(const pfenv_t *pfenv, const char *str) { pfenv->print_strn(pfenv->data, str, strlen(str)); } @@ -129,13 +47,16 @@ int pfenv_printf(const pfenv_t *pfenv, const char *fmt, va_list args) { // parse flags, if they exist int flags = 0; + char fill = ' '; while (*fmt != '\0') { if (*fmt == '-') flags |= PF_FLAG_LEFT_ADJUST; else if (*fmt == '+') flags |= PF_FLAG_SHOW_SIGN; else if (*fmt == ' ') flags |= PF_FLAG_SPACE_SIGN; else if (*fmt == '!') flags |= PF_FLAG_NO_TRAILZ; - else if (*fmt == '0') flags |= PF_FLAG_ZERO_PAD; - else break; + else if (*fmt == '0') { + flags |= PF_FLAG_PAD_AFTER_SIGN; + fill = '0'; + } else break; ++fmt; } @@ -177,15 +98,15 @@ int pfenv_printf(const pfenv_t *pfenv, const char *fmt, va_list args) { switch (*fmt) { case 'b': if (va_arg(args, int)) { - chrs += pfenv_print_strn(pfenv, "true", 4, flags, width); + chrs += pfenv_print_strn(pfenv, "true", 4, flags, fill, width); } else { - chrs += pfenv_print_strn(pfenv, "false", 5, flags, width); + chrs += pfenv_print_strn(pfenv, "false", 5, flags, fill, width); } break; case 'c': { char str = va_arg(args, int); - chrs += pfenv_print_strn(pfenv, &str, 1, flags, width); + chrs += pfenv_print_strn(pfenv, &str, 1, flags, fill, width); break; } case 's': @@ -195,25 +116,25 @@ int pfenv_printf(const pfenv_t *pfenv, const char *fmt, va_list args) { if (prec < 0) { prec = strlen(str); } - chrs += pfenv_print_strn(pfenv, str, prec, flags, width); + chrs += pfenv_print_strn(pfenv, str, prec, flags, fill, width); } else { - chrs += pfenv_print_strn(pfenv, "(null)", 6, flags, width); + chrs += pfenv_print_strn(pfenv, "(null)", 6, flags, fill, width); } break; } case 'u': - chrs += pfenv_print_int(pfenv, va_arg(args, int), 0, 10, 'a', flags, width); + chrs += pfenv_print_int(pfenv, va_arg(args, int), 0, 10, 'a', flags, fill, width); break; case 'd': - chrs += pfenv_print_int(pfenv, va_arg(args, int), 1, 10, 'a', flags, width); + chrs += pfenv_print_int(pfenv, va_arg(args, int), 1, 10, 'a', flags, fill, width); break; case 'x': case 'p': // ? - chrs += pfenv_print_int(pfenv, va_arg(args, int), 0, 16, 'a', flags, width); + chrs += pfenv_print_int(pfenv, va_arg(args, int), 0, 16, 'a', flags, fill, width); break; case 'X': case 'P': // ? - chrs += pfenv_print_int(pfenv, va_arg(args, int), 0, 16, 'A', flags, width); + chrs += pfenv_print_int(pfenv, va_arg(args, int), 0, 16, 'A', flags, fill, width); break; #if MICROPY_ENABLE_FLOAT case 'e': @@ -223,33 +144,18 @@ int pfenv_printf(const pfenv_t *pfenv, const char *fmt, va_list args) { case 'g': case 'G': { - char buf[32]; - char sign = '\0'; - - if (flags & PF_FLAG_SHOW_SIGN) { - sign = '+'; - } - else - if (flags & PF_FLAG_SPACE_SIGN) { - sign = ' '; - } - float f = va_arg(args, double); - int len = format_float(f, buf, sizeof(buf), *fmt, prec, sign); - char *s = buf; - - // buf[0] < '0' returns true if the first character is space, + or - - // buf[1] < '9' matches a digit, and doesn't match when we get back +nan or +inf - if (buf[0] < '0' && buf[1] <= '9' && (flags & PF_FLAG_ZERO_PAD)) { - chrs += pfenv_print_strn(pfenv, &buf[0], 1, 0, 1); - s++; - width--; - len--; - } - if (*s < '0' || *s >= '9') { - // For inf or nan, we don't want to zero pad. - flags &= ~PF_FLAG_ZERO_PAD; - } - chrs += pfenv_print_strn(pfenv, s, len, flags, width); +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT + mp_float_t f = va_arg(args, double); + chrs += pfenv_print_float(pfenv, f, *fmt, flags, fill, width, prec); +#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE + // Currently pfenv_print_float uses snprintf, so if you want + // to use pfenv_print_float with doubles then you'll need + // fix it to not use snprintf first. Otherwise you'll have + // inifinite recursion. +#error Calling pfenv_print_float with double not supported from within printf +#else +#error Unknown MICROPY FLOAT IMPL +#endif break; } #endif @@ -338,7 +244,7 @@ void strn_print_strn(void *data, const char *str, unsigned int len) { strn_pfenv->cur += len; strn_pfenv->remain -= len; } - + int vsnprintf(char *str, size_t size, const char *fmt, va_list ap) { strn_pfenv_t strn_pfenv; strn_pfenv.cur = str; diff --git a/tests/basics/string-format.py b/tests/basics/string-format.py index ba51e0890b..0a9764bf25 100644 --- a/tests/basics/string-format.py +++ b/tests/basics/string-format.py @@ -1,8 +1,138 @@ -print("{}-{}".format(1, [4, 5])) -print("{0}-{1}".format(1, [4, 5])) -print("{:x}".format(1)) -print("{!r}".format(2)) -# TODO -#print("{1}-{0}".format(1, [4, 5])) -#print("{:x}".format(0x10)) -#print("{!r}".format("foo")) +def test(fmt, *args): + print('{:8s}'.format(fmt) + '>' + fmt.format(*args) + '<') + +test("{}-{}", 1, [4, 5]) +test("{0}-{1}", 1, [4, 5]) +test("{1}-{0}", 1, [4, 5]) +test("{:x}", 1) +test("{!r}", 2) +test("{1}-{0}", 1, [4, 5]) +test("{:x}", 0x10) +test("{!r}", "foo") +test("{!s}", "foo") + +def test_fmt(conv, fill, alignment, sign, prefix, width, precision, type, arg): + fmt = '{' + if conv: + fmt += '!' + fmt += conv + fmt += ':' + if alignment: + fmt += fill + fmt += alignment + fmt += sign + fmt += prefix + fmt += width + if precision: + fmt += '.' + fmt += precision + fmt += type + fmt += '}' + test(fmt, arg) + if fill == '0' and alignment == '=': + fmt = '{:' + fmt += sign + fmt += prefix + fmt += width + if precision: + fmt += '.' + fmt += precision + fmt += type + fmt += '}' + test(fmt, arg) + +int_nums = (-1234, -123, -12, -1, 0, 1, 12, 123, 1234, True, False) +int_nums2 = (-12, -1, 0, 1, 12, True, False) + +if True: + for type in ('', 'b', 'd', 'o', 'x', 'X'): + for width in ('', '1', '3', '5', '7'): + for alignment in ('', '<', '>', '=', '^'): + for fill in ('', ' ', '0', '@'): + for sign in ('', '+', '-', ' '): + for prefix in ('', '#'): + for num in int_nums: + test_fmt('', fill, alignment, sign, prefix, width, '', type, num) + +if True: + for width in ('', '1', '2'): + for alignment in ('', '<', '>', '^'): + for fill in ('', ' ', '0', '@'): + test_fmt('', fill, alignment, '', '', width, '', 'c', 48) + +if True: + for conv in ('', 'r', 's'): + for width in ('', '1', '4', '10'): + for alignment in ('', '<', '>', '^'): + for fill in ('', ' ', '0', '@'): + for str in ('', 'a', 'bcd', 'This is a test with a longer string'): + test_fmt(conv, fill, alignment, '', '', width, '', 's', str) + +eg_nums = (0.0, -0.0, 0.1, 1.234, 12.3459, 1.23456789, 123456789.0, -0.0, + -0.1, -1.234, -12.3459, 1e4, 1e-4, 1e5, 1e-5, 1e6, 1e-6, 1e10, + 1e37, -1e37, 1e-37, -1e-37, + 1.23456e8, 1.23456e7, 1.23456e6, 1.23456e5, 1.23456e4, 1.23456e3, 1.23456e2, 1.23456e1, 1.23456e0, + 1.23456e-1, 1.23456e-2, 1.23456e-3, 1.23456e-4, 1.23456e-5, 1.23456e-6, 1.23456e-7, 1.23456e-8, + -1.23456e8, -1.23456e7, -1.23456e6, -1.23456e5, -1.23456e4, -1.23456e3, -1.23456e2, -1.23456e1, -1.23456e0, + -1.23456e-1, -1.23456e-2, -1.23456e-3, -1.23456e-4, -1.23456e-5, -1.23456e-6, -1.23456e-7, -1.23456e-8) + +if True: + for type in ('e', 'E', 'g', 'G', 'n'): + for width in ('', '4', '6', '8', '10'): + for alignment in ('', '<', '>', '=', '^'): + for fill in ('', '@', '0', ' '): + for sign in ('', '+', '-', ' '): + for prec in ('', '1', '3', '6'): + for num in eg_nums: + test_fmt('', fill, alignment, sign, '', width, prec, type, num) + +# Note: We use 1.23459 rather than 1.2345 because '{:3f}'.format(1.2345) +# rounds differently than print("%.3f", 1.2345); + +f_nums = (0.0, -0.0, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, + 0.0012, 0.0123, 0.1234, 1.23459, 12.3456, + -0.0001, -0.001, -0.01, -0.1, -1.0, -10.0, + -0.0012, -0.0123, -0.1234, -1.23459, -12.3456) + +if True: + for type in ('f', 'F'): + for width in ('', '4', '6', '8', '10'): + for alignment in ('', '<', '>', '=', '^'): + for fill in ('', ' ', '0', '@'): + for sign in ('', '+', '-', ' '): + # An empty precision defaults to 6, but when uPy is + # configured to use a float, we can only use a + # precision of 6 with numbers less than 10 and still + # get results that compare to CPython (which uses + # long doubles). + for prec in ('1', '2', '3'): + for num in f_nums: + test_fmt('', fill, alignment, sign, '', width, prec, type, num) + for num in int_nums2: + test_fmt('', fill, alignment, sign, '', width, '', type, num) + +pct_nums1 = (0.1, 0.58, 0.99, -0.1, -0.58, -0.99) +pct_nums2 = (True, False, 1, 0, -1) + +if True: + type = '%' + for width in ('', '4', '6', '8', '10'): + for alignment in ('', '<', '>', '=', '^'): + for fill in ('', ' ', '0', '@'): + for sign in ('', '+', '-', ' '): + # An empty precision defaults to 6, but when uPy is + # configured to use a float, we can only use a + # precision of 6 with numbers less than 10 and still + # get results that compare to CPython (which uses + # long doubles). + for prec in ('1', '2', '3'): + for num in pct_nums1: + test_fmt('', fill, alignment, sign, '', width, prec, type, num) + for num in pct_nums2: + test_fmt('', fill, alignment, sign, '', width, '', type, num) + +# We don't currently test a type of '' with floats (see the detailed comment +# in objstr.c) + +# TODO Add tests for erroneous format strings. +