py/parsenum: Improve parsing of floating point numbers.

This patch improves parsing of floating point numbers by converting all the digits (integer and fractional) together into a number 1 or greater, and then applying the correct power of 10 at the very end. In particular the multiple "multiply by 0.1" operations to build a fraction are now combined together and applied at the same time as the exponent, at the very end. This helps to retain precision during parsing of floats, and also includes a check that the number doesn't overflow during the parsing. One benefit is that a float will have the same value no matter where the decimal point is located, eg 1.23 == 123e-2.
2017-11-27 12:51:52 +11:00 · 2017-11-27 12:51:52 +11:00 · 84895f1a21
parent f59c6b48ae
commit 84895f1a21
4 changed files with 60 additions and 6 deletions
--- a/py/parsenum.c
+++ b/py/parsenum.c
@ -170,6 +170,14 @@ typedef enum {

 mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex) {
 #if MICROPY_PY_BUILTINS_FLOAT
+
+// DEC_VAL_MAX only needs to be rough and is used to retain precision while not overflowing
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+#define DEC_VAL_MAX 1e20F
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+#define DEC_VAL_MAX 1e200
+#endif
+
    const char *top = str + len;
    mp_float_t dec_val = 0;
    bool dec_neg = false;
@ -214,8 +222,8 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
        // string should be a decimal number
        parse_dec_in_t in = PARSE_DEC_IN_INTG;
        bool exp_neg = false;
-        mp_float_t frac_mult = 0.1;
        mp_int_t exp_val = 0;
+        mp_int_t exp_extra = 0;
        while (str < top) {
            mp_uint_t dig = *str++;
            if ('0' <= dig && dig <= '9') {
@ -223,11 +231,18 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
                if (in == PARSE_DEC_IN_EXP) {
                    exp_val = 10 * exp_val + dig;
                } else {
-                    if (in == PARSE_DEC_IN_FRAC) {
-                        dec_val += dig * frac_mult;
-                        frac_mult *= MICROPY_FLOAT_CONST(0.1);
-                    } else {
+                    if (dec_val < DEC_VAL_MAX) {
+                        // dec_val won't overflow so keep accumulating
                        dec_val = 10 * dec_val + dig;
+                        if (in == PARSE_DEC_IN_FRAC) {
+                            --exp_extra;
+                        }
+                    } else {
+                        // dec_val might overflow and we anyway can't represent more digits
+                        // of precision, so ignore the digit and just adjust the exponent
+                        if (in == PARSE_DEC_IN_INTG) {
+                            ++exp_extra;
+                        }
                    }
                }
            } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
@ -261,7 +276,7 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
        }

        // apply the exponent
-        dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val);
+        dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val + exp_extra);
    }

    // negate value if needed
--- a/tests/float/float_parse.py
+++ b/tests/float/float_parse.py
@ -0,0 +1,22 @@
+# test parsing of floats
+
+inf = float('inf')
+
+# it shouldn't matter where the decimal point is if the exponent balances the value
+print(float('1234') - float('0.1234e4'))
+print(float('1.015625') - float('1015625e-6'))
+
+# very large integer part with a very negative exponent should cancel out
+print(float('9' * 60 + 'e-60'))
+print(float('9' * 60 + 'e-40'))
+print(float('9' * 60 + 'e-20') == float('1e40'))
+
+# many fractional digits
+print(float('.' + '9' * 70))
+print(float('.' + '9' * 70 + 'e20'))
+print(float('.' + '9' * 70 + 'e-50') == float('1e-50'))
+
+# tiny fraction with large exponent
+print(float('.' + '0' * 60 + '1e10') == float('1e-51'))
+print(float('.' + '0' * 60 + '9e25'))
+print(float('.' + '0' * 60 + '9e40'))
--- a/tests/float/float_parse_doubleprec.py
+++ b/tests/float/float_parse_doubleprec.py
@ -0,0 +1,16 @@
+# test parsing of floats, requiring double-precision
+
+# very large integer part with a very negative exponent should cancel out
+print(float('9' * 400 + 'e-100'))
+print(float('9' * 400 + 'e-200'))
+print(float('9' * 400 + 'e-400'))
+
+# many fractional digits
+print(float('.' + '9' * 400))
+print(float('.' + '9' * 400 + 'e100'))
+print(float('.' + '9' * 400 + 'e-100'))
+
+# tiny fraction with large exponent
+print(float('.' + '0' * 400 + '9e100'))
+print(float('.' + '0' * 400 + '9e200'))
+print(float('.' + '0' * 400 + '9e400'))
--- a/tests/run-tests
+++ b/tests/run-tests
@ -271,6 +271,7 @@ def run_tests(pyb, tests, args, base_path="."):
    if upy_float_precision < 64:
        skip_tests.add('float/float_divmod.py') # tested by float/float_divmod_relaxed.py instead
        skip_tests.add('float/float2int_doubleprec_intbig.py')
+        skip_tests.add('float/float_parse_doubleprec.py')

    if not has_complex:
        skip_tests.add('float/complex1.py')