py: Fix delete operation on map/dict and set objects.

Hash table can now be completely full (ie now NULL entry) before a
resize is triggered.  Use sentinel value to indicate delete entry in the
table.
This commit is contained in:
Damien George 2014-04-05 17:17:19 +01:00
parent e20b6b418c
commit 95004e5114
6 changed files with 191 additions and 81 deletions

190
py/map.c
View File

@ -83,7 +83,7 @@ STATIC void mp_map_rehash(mp_map_t *map) {
map->all_keys_are_qstrs = 1; map->all_keys_are_qstrs = 1;
map->table = m_new0(mp_map_elem_t, map->alloc); map->table = m_new0(mp_map_elem_t, map->alloc);
for (int i = 0; i < old_alloc; i++) { for (int i = 0; i < old_alloc; i++) {
if (old_table[i].key != NULL) { if (old_table[i].key != MP_OBJ_NULL && old_table[i].key != MP_OBJ_SENTINEL) {
mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value; mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value;
} }
} }
@ -106,8 +106,6 @@ mp_map_elem_t* mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
// map is a hash table (not a fixed array), so do a hash lookup // map is a hash table (not a fixed array), so do a hash lookup
machine_uint_t hash;
hash = mp_obj_hash(index);
if (map->alloc == 0) { if (map->alloc == 0) {
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
mp_map_rehash(map); mp_map_rehash(map);
@ -116,54 +114,79 @@ mp_map_elem_t* mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
} }
} }
machine_uint_t hash = mp_obj_hash(index);
uint pos = hash % map->alloc; uint pos = hash % map->alloc;
uint start_pos = pos;
mp_map_elem_t *avail_slot = NULL;
for (;;) { for (;;) {
mp_map_elem_t *elem = &map->table[pos]; mp_map_elem_t *slot = &map->table[pos];
if (elem->key == NULL) { if (slot->key == MP_OBJ_NULL) {
// not in table // found NULL slot, so index is not in table
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
if (map->used + 1 >= map->alloc) { map->used += 1;
// not enough room in table, rehash it if (avail_slot == NULL) {
mp_map_rehash(map); avail_slot = slot;
// restart the search for the new element
pos = hash % map->alloc;
continue;
} else {
map->used += 1;
elem->key = index;
elem->value = NULL;
if (!MP_OBJ_IS_QSTR(index)) {
map->all_keys_are_qstrs = 0;
}
return elem;
} }
} else if (elem->value == NULL) { slot->key = index;
return NULL; slot->value = MP_OBJ_NULL;
if (!MP_OBJ_IS_QSTR(index)) {
map->all_keys_are_qstrs = 0;
}
return slot;
} else {
return MP_OBJ_NULL;
} }
// Otherwise it's just entry marked as deleted, so continue with next one } else if (slot->key == MP_OBJ_SENTINEL) {
} else if (elem->key == index || (!map->all_keys_are_qstrs && mp_obj_equal(elem->key, index))) { // found deleted slot, remember for later
// found it if (avail_slot == NULL) {
/* it seems CPython does not replace the index; try x={True:'true'};x[1]='one';x avail_slot = slot;
if (add_if_not_found) {
elem->key = index;
} }
*/ } else if (slot->key == index || (!map->all_keys_are_qstrs && mp_obj_equal(slot->key, index))) {
// found index
// Note: CPython does not replace the index; try x={True:'true'};x[1]='one';x
if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) { if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
map->used--;
// this leaks this memory (but see dict_get_helper) // this leaks this memory (but see dict_get_helper)
mp_map_elem_t *retval = m_new(mp_map_elem_t, 1); mp_map_elem_t *retval = m_new(mp_map_elem_t, 1);
retval->key = elem->key; retval->key = slot->key;
retval->value = elem->value; retval->value = slot->value;
elem->key = NULL; // delete element in this slot
// elem->key = NULL && elem->value != NULL means "marked deleted" map->used--;
// assume value indeed never NULL if (map->table[(pos + 1) % map->alloc].key == MP_OBJ_NULL) {
// optimisation if next slot is empty
slot->key = MP_OBJ_NULL;
} else {
slot->key = MP_OBJ_SENTINEL;
}
return retval; return retval;
} }
return elem; return slot;
} }
// not yet found, keep searching in this table // not yet found, keep searching in this table
pos = (pos + 1) % map->alloc; pos = (pos + 1) % map->alloc;
if (pos == start_pos) {
// search got back to starting position, so index is not in table
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
if (avail_slot != NULL) {
// there was an available slot, so use that
map->used++;
avail_slot->key = index;
avail_slot->value = MP_OBJ_NULL;
if (!MP_OBJ_IS_QSTR(index)) {
map->all_keys_are_qstrs = 0;
}
return avail_slot;
} else {
// not enough room in table, rehash it
mp_map_rehash(map);
// restart the search for the new element
start_pos = pos = hash % map->alloc;
}
} else {
return MP_OBJ_NULL;
}
}
} }
} }
@ -183,16 +206,14 @@ STATIC void mp_set_rehash(mp_set_t *set) {
set->used = 0; set->used = 0;
set->table = m_new0(mp_obj_t, set->alloc); set->table = m_new0(mp_obj_t, set->alloc);
for (int i = 0; i < old_alloc; i++) { for (int i = 0; i < old_alloc; i++) {
if (old_table[i] != NULL) { if (old_table[i] != MP_OBJ_NULL && old_table[i] != MP_OBJ_SENTINEL) {
mp_set_lookup(set, old_table[i], true); mp_set_lookup(set, old_table[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
} }
} }
m_del(mp_obj_t, old_table, old_alloc); m_del(mp_obj_t, old_table, old_alloc);
} }
mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) { mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) {
int hash;
int pos;
if (set->alloc == 0) { if (set->alloc == 0) {
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
mp_set_rehash(set); mp_set_rehash(set);
@ -200,47 +221,86 @@ mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t looku
return NULL; return NULL;
} }
} }
if (lookup_kind & MP_MAP_LOOKUP_FIRST) { machine_uint_t hash = mp_obj_hash(index);
hash = 0; uint pos = hash % set->alloc;
pos = 0; uint start_pos = pos;
} else { mp_obj_t *avail_slot = NULL;
hash = mp_obj_hash(index);;
pos = hash % set->alloc;
}
for (;;) { for (;;) {
mp_obj_t elem = set->table[pos]; mp_obj_t elem = set->table[pos];
if (elem == MP_OBJ_NULL) { if (elem == MP_OBJ_NULL) {
// not in table // found NULL slot, so index is not in table
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
if (set->used + 1 >= set->alloc) { if (avail_slot == NULL) {
// not enough room in table, rehash it avail_slot = &set->table[pos];
mp_set_rehash(set);
// restart the search for the new element
pos = hash % set->alloc;
} else {
set->used += 1;
set->table[pos] = index;
return index;
} }
} else if (lookup_kind & MP_MAP_LOOKUP_FIRST) { set->used++;
pos++; *avail_slot = index;
return index;
} else { } else {
return MP_OBJ_NULL; return MP_OBJ_NULL;
} }
} else if ((lookup_kind & MP_MAP_LOOKUP_FIRST) || mp_obj_equal(elem, index)) { } else if (elem == MP_OBJ_SENTINEL) {
// found it // found deleted slot, remember for later
if (avail_slot == NULL) {
avail_slot = &set->table[pos];
}
} else if (mp_obj_equal(elem, index)) {
// found index
if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) { if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
// delete element
set->used--; set->used--;
set->table[pos] = NULL; if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
// optimisation if next slot is empty
set->table[pos] = MP_OBJ_NULL;
} else {
set->table[pos] = MP_OBJ_SENTINEL;
}
} }
return elem; return elem;
} else { }
// not yet found, keep searching in this table
pos = (pos + 1) % set->alloc; // not yet found, keep searching in this table
pos = (pos + 1) % set->alloc;
if (pos == start_pos) {
// search got back to starting position, so index is not in table
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
if (avail_slot != NULL) {
// there was an available slot, so use that
set->used++;
*avail_slot = index;
return index;
} else {
// not enough room in table, rehash it
mp_set_rehash(set);
// restart the search for the new element
start_pos = pos = hash % set->alloc;
}
} else {
return MP_OBJ_NULL;
}
} }
} }
} }
mp_obj_t mp_set_remove_first(mp_set_t *set) {
for (uint pos = 0; pos < set->alloc; pos++) {
if (set->table[pos] != MP_OBJ_NULL && set->table[pos] != MP_OBJ_SENTINEL) {
mp_obj_t elem = set->table[pos];
// delete element
set->used--;
if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
// optimisation if next slot is empty
set->table[pos] = MP_OBJ_NULL;
} else {
set->table[pos] = MP_OBJ_SENTINEL;
}
return elem;
}
}
return MP_OBJ_NULL;
}
void mp_set_clear(mp_set_t *set) { void mp_set_clear(mp_set_t *set) {
m_del(mp_obj_t, set->table, set->alloc); m_del(mp_obj_t, set->table, set->alloc);
set->alloc = 0; set->alloc = 0;

View File

@ -23,6 +23,11 @@ typedef struct _mp_obj_base_t mp_obj_base_t;
#define MP_OBJ_NULL ((mp_obj_t)NULL) #define MP_OBJ_NULL ((mp_obj_t)NULL)
// The SENTINEL object is used for various internal purposes where one needs
// an object which is unique from all other objects, including MP_OBJ_NULL.
#define MP_OBJ_SENTINEL ((mp_obj_t)8)
// These macros check for small int, qstr or object, and access small int and qstr values // These macros check for small int, qstr or object, and access small int and qstr values
// - xxxx...xxx1: a small int, bits 1 and above are the value // - xxxx...xxx1: a small int, bits 1 and above are the value
// - xxxx...xx10: a qstr, bits 2 and above are the value // - xxxx...xx10: a qstr, bits 2 and above are the value
@ -103,11 +108,11 @@ typedef struct _mp_map_t {
mp_map_elem_t *table; mp_map_elem_t *table;
} mp_map_t; } mp_map_t;
// These can be or'd together
typedef enum _mp_map_lookup_kind_t { typedef enum _mp_map_lookup_kind_t {
MP_MAP_LOOKUP, // 0 MP_MAP_LOOKUP, // 0
MP_MAP_LOOKUP_ADD_IF_NOT_FOUND, // 1 MP_MAP_LOOKUP_ADD_IF_NOT_FOUND, // 1
MP_MAP_LOOKUP_REMOVE_IF_FOUND, // 2 MP_MAP_LOOKUP_REMOVE_IF_FOUND, // 2
MP_MAP_LOOKUP_FIRST = 4,
} mp_map_lookup_kind_t; } mp_map_lookup_kind_t;
void mp_map_init(mp_map_t *map, int n); void mp_map_init(mp_map_t *map, int n);
@ -129,6 +134,7 @@ typedef struct _mp_set_t {
void mp_set_init(mp_set_t *set, int n); void mp_set_init(mp_set_t *set, int n);
mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind); mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind);
mp_obj_t mp_set_remove_first(mp_set_t *set);
void mp_set_clear(mp_set_t *set); void mp_set_clear(mp_set_t *set);
// Type definitions for methods // Type definitions for methods

View File

@ -103,7 +103,7 @@ STATIC mp_map_elem_t *dict_it_iternext_elem(mp_obj_t self_in) {
mp_map_elem_t *table = self->dict->map.table; mp_map_elem_t *table = self->dict->map.table;
for (int i = self->cur; i < max; i++) { for (int i = self->cur; i < max; i++) {
if (table[i].key != NULL) { if (table[i].key != MP_OBJ_NULL && table[i].key != MP_OBJ_SENTINEL) {
self->cur = i + 1; self->cur = i + 1;
return &(table[i]); return &(table[i]);
} }

View File

@ -32,7 +32,7 @@ STATIC void set_print(void (*print)(void *env, const char *fmt, ...), void *env,
bool first = true; bool first = true;
print(env, "{"); print(env, "{");
for (int i = 0; i < self->set.alloc; i++) { for (int i = 0; i < self->set.alloc; i++) {
if (self->set.table[i] != MP_OBJ_NULL) { if (self->set.table[i] != MP_OBJ_NULL && self->set.table[i] != MP_OBJ_SENTINEL) {
if (!first) { if (!first) {
print(env, ", "); print(env, ", ");
} }
@ -83,7 +83,7 @@ STATIC mp_obj_t set_it_iternext(mp_obj_t self_in) {
mp_obj_t *table = self->set->set.table; mp_obj_t *table = self->set->set.table;
for (machine_uint_t i = self->cur; i < max; i++) { for (machine_uint_t i = self->cur; i < max; i++) {
if (table[i] != NULL) { if (table[i] != MP_OBJ_NULL && table[i] != MP_OBJ_SENTINEL) {
self->cur = i + 1; self->cur = i + 1;
return table[i]; return table[i];
} }
@ -307,12 +307,10 @@ STATIC mp_obj_t set_equal(mp_obj_t self_in, mp_obj_t other_in) {
STATIC mp_obj_t set_pop(mp_obj_t self_in) { STATIC mp_obj_t set_pop(mp_obj_t self_in) {
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_set)); assert(MP_OBJ_IS_TYPE(self_in, &mp_type_set));
mp_obj_set_t *self = self_in; mp_obj_set_t *self = self_in;
mp_obj_t obj = mp_set_remove_first(&self->set);
if (self->set.used == 0) { if (obj == MP_OBJ_NULL) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_KeyError, "pop from an empty set")); nlr_jump(mp_obj_new_exception_msg(&mp_type_KeyError, "pop from an empty set"));
} }
mp_obj_t obj = mp_set_lookup(&self->set, NULL,
MP_MAP_LOOKUP_REMOVE_IF_FOUND | MP_MAP_LOOKUP_FIRST);
return obj; return obj;
} }
STATIC MP_DEFINE_CONST_FUN_OBJ_1(set_pop_obj, set_pop); STATIC MP_DEFINE_CONST_FUN_OBJ_1(set_pop_obj, set_pop);
@ -375,6 +373,14 @@ STATIC mp_obj_t set_union(mp_obj_t self_in, mp_obj_t other_in) {
} }
STATIC MP_DEFINE_CONST_FUN_OBJ_2(set_union_obj, set_union); STATIC MP_DEFINE_CONST_FUN_OBJ_2(set_union_obj, set_union);
STATIC mp_obj_t set_unary_op(int op, mp_obj_t self_in) {
mp_obj_set_t *self = self_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(self->set.used != 0);
case MP_UNARY_OP_LEN: return MP_OBJ_NEW_SMALL_INT((machine_int_t)self->set.used);
default: return MP_OBJ_NULL; // op not supported for None
}
}
STATIC mp_obj_t set_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) { STATIC mp_obj_t set_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) {
mp_obj_t args[] = {lhs, rhs}; mp_obj_t args[] = {lhs, rhs};
@ -450,6 +456,7 @@ const mp_obj_type_t mp_type_set = {
.name = MP_QSTR_set, .name = MP_QSTR_set,
.print = set_print, .print = set_print,
.make_new = set_make_new, .make_new = set_make_new,
.unary_op = set_unary_op,
.binary_op = set_binary_op, .binary_op = set_binary_op,
.getiter = set_getiter, .getiter = set_getiter,
.locals_dict = (mp_obj_t)&set_locals_dict, .locals_dict = (mp_obj_t)&set_locals_dict,

View File

@ -1,8 +1,21 @@
for i in range(100): for n in range(20):
d = dict() print('testing dict with {} items'.format(n))
for j in range(100): for i in range(n):
d[j] = j # create dict
del d[i] d = dict()
for j in range(100): for j in range(n):
if j not in d: d[str(j)] = j
print(j, 'not in d') print(len(d))
# delete an item
del d[str(i)]
print(len(d))
# check items
for j in range(n):
if str(j) in d:
if j == i:
print(j, 'in d, but it should not be')
else:
if j != i:
print(j, 'not in d, but it should be')

View File

@ -1,3 +1,4 @@
# basic test
s = {1} s = {1}
print(s.remove(1)) print(s.remove(1))
print(list(s)) print(list(s))
@ -7,3 +8,26 @@ except KeyError:
pass pass
else: else:
print("failed to raise KeyError") print("failed to raise KeyError")
# test sets of varying size
for n in range(20):
print('testing set with {} items'.format(n))
for i in range(n):
# create set
s = set()
for j in range(n):
s.add(str(j))
print(len(s))
# delete an item
s.remove(str(i))
print(len(s))
# check items
for j in range(n):
if str(j) in s:
if j == i:
print(j, 'in s, but it should not be')
else:
if j != i:
print(j, 'not in s, but it should be')