From a73501b1d63e5240cbced95b884383b79f5d6efd Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Thu, 6 Apr 2017 17:27:33 +1000
Subject: [PATCH] py/objfloat: Add implementation of high-quality float
 hashing.

Disabled by default.
---
 py/mpconfig.h |  6 ++++++
 py/obj.h      |  4 ++++
 py/objfloat.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+)

diff --git a/py/mpconfig.h b/py/mpconfig.h
index 06c19f72b1..05cb5daaf1 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -548,6 +548,12 @@ typedef double mp_float_t;
 #define MICROPY_PY_BUILTINS_COMPLEX (MICROPY_PY_BUILTINS_FLOAT)
 #endif
 
+// Whether to provide a high-quality hash for float and complex numbers.
+// Otherwise the default is a very simple but correct hashing function.
+#ifndef MICROPY_FLOAT_HIGH_QUALITY_HASH
+#define MICROPY_FLOAT_HIGH_QUALITY_HASH (0)
+#endif
+
 // Enable features which improve CPython compatibility
 // but may lead to more code size/memory usage.
 // TODO: Originally intended as generic category to not
diff --git a/py/obj.h b/py/obj.h
index 597c7c8d97..b5a0c41906 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -730,7 +730,11 @@ void mp_str_print_quoted(const mp_print_t *print, const byte *str_data, size_t s
 
 #if MICROPY_PY_BUILTINS_FLOAT
 // float
+#if MICROPY_FLOAT_HIGH_QUALITY_HASH
+mp_int_t mp_float_hash(mp_float_t val);
+#else
 static inline mp_int_t mp_float_hash(mp_float_t val) { return (mp_int_t)val; }
+#endif
 mp_obj_t mp_obj_float_binary_op(mp_uint_t op, mp_float_t lhs_val, mp_obj_t rhs); // can return MP_OBJ_NULL if op not supported
 
 // complex
diff --git a/py/objfloat.c b/py/objfloat.c
index 2c355d3557..5cf9954175 100644
--- a/py/objfloat.c
+++ b/py/objfloat.c
@@ -59,6 +59,55 @@ const mp_obj_float_t mp_const_float_pi_obj = {{&mp_type_float}, M_PI};
 
 #endif
 
+#if MICROPY_FLOAT_HIGH_QUALITY_HASH
+// must return actual integer value if it fits in mp_int_t
+mp_int_t mp_float_hash(mp_float_t src) {
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+typedef uint64_t mp_float_uint_t;
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+typedef uint32_t mp_float_uint_t;
+#endif
+    union {
+        mp_float_t f;
+        #if MP_ENDIANNESS_LITTLE
+        struct { mp_float_uint_t frc:MP_FLOAT_FRAC_BITS, exp:MP_FLOAT_EXP_BITS, sgn:1; } p;
+        #else
+        struct { mp_float_uint_t sgn:1, exp:MP_FLOAT_EXP_BITS, frc:MP_FLOAT_FRAC_BITS; } p;
+        #endif
+        mp_float_uint_t i;
+    } u = {.f = src};
+
+    mp_int_t val;
+    const int adj_exp = (int)u.p.exp - MP_FLOAT_EXP_BIAS;
+    if (adj_exp < 0) {
+        // value < 1; must be sure to handle 0.0 correctly (ie return 0)
+        val = u.i;
+    } else {
+        // if adj_exp is max then: u.p.frc==0 indicates inf, else NaN
+        // else: 1 <= value
+        mp_float_uint_t frc = u.p.frc | ((mp_float_uint_t)1 << MP_FLOAT_FRAC_BITS);
+
+        if (adj_exp <= MP_FLOAT_FRAC_BITS) {
+            // number may have a fraction; xor the integer part with the fractional part
+            val = (frc >> (MP_FLOAT_FRAC_BITS - adj_exp))
+                ^ (frc & ((1 << (MP_FLOAT_FRAC_BITS - adj_exp)) - 1));
+        } else if ((unsigned int)adj_exp < BITS_PER_BYTE * sizeof(mp_int_t) - 1) {
+            // the number is a (big) whole integer and will fit in val's signed-width
+            val = (mp_int_t)frc << (adj_exp - MP_FLOAT_FRAC_BITS);
+        } else {
+            // integer part will overflow val's width so just use what bits we can
+            val = frc;
+        }
+    }
+
+    if (u.p.sgn) {
+        val = -val;
+    }
+
+    return val;
+}
+#endif
+
 STATIC void float_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t kind) {
     (void)kind;
     mp_float_t o_val = mp_obj_float_get(o_in);