py: Fix delete operation on map/dict and set objects.
Hash table can now be completely full (ie now NULL entry) before a resize is triggered. Use sentinel value to indicate delete entry in the table.
This commit is contained in:
parent
e20b6b418c
commit
95004e5114
180
py/map.c
180
py/map.c
|
@ -83,7 +83,7 @@ STATIC void mp_map_rehash(mp_map_t *map) {
|
|||
map->all_keys_are_qstrs = 1;
|
||||
map->table = m_new0(mp_map_elem_t, map->alloc);
|
||||
for (int i = 0; i < old_alloc; i++) {
|
||||
if (old_table[i].key != NULL) {
|
||||
if (old_table[i].key != MP_OBJ_NULL && old_table[i].key != MP_OBJ_SENTINEL) {
|
||||
mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value;
|
||||
}
|
||||
}
|
||||
|
@ -106,8 +106,6 @@ mp_map_elem_t* mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
|
|||
|
||||
// map is a hash table (not a fixed array), so do a hash lookup
|
||||
|
||||
machine_uint_t hash;
|
||||
hash = mp_obj_hash(index);
|
||||
if (map->alloc == 0) {
|
||||
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
|
||||
mp_map_rehash(map);
|
||||
|
@ -116,54 +114,79 @@ mp_map_elem_t* mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
|
|||
}
|
||||
}
|
||||
|
||||
machine_uint_t hash = mp_obj_hash(index);
|
||||
uint pos = hash % map->alloc;
|
||||
uint start_pos = pos;
|
||||
mp_map_elem_t *avail_slot = NULL;
|
||||
for (;;) {
|
||||
mp_map_elem_t *elem = &map->table[pos];
|
||||
if (elem->key == NULL) {
|
||||
// not in table
|
||||
mp_map_elem_t *slot = &map->table[pos];
|
||||
if (slot->key == MP_OBJ_NULL) {
|
||||
// found NULL slot, so index is not in table
|
||||
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
|
||||
if (map->used + 1 >= map->alloc) {
|
||||
// not enough room in table, rehash it
|
||||
mp_map_rehash(map);
|
||||
// restart the search for the new element
|
||||
pos = hash % map->alloc;
|
||||
continue;
|
||||
} else {
|
||||
map->used += 1;
|
||||
elem->key = index;
|
||||
elem->value = NULL;
|
||||
if (avail_slot == NULL) {
|
||||
avail_slot = slot;
|
||||
}
|
||||
slot->key = index;
|
||||
slot->value = MP_OBJ_NULL;
|
||||
if (!MP_OBJ_IS_QSTR(index)) {
|
||||
map->all_keys_are_qstrs = 0;
|
||||
}
|
||||
return elem;
|
||||
return slot;
|
||||
} else {
|
||||
return MP_OBJ_NULL;
|
||||
}
|
||||
} else if (elem->value == NULL) {
|
||||
return NULL;
|
||||
} else if (slot->key == MP_OBJ_SENTINEL) {
|
||||
// found deleted slot, remember for later
|
||||
if (avail_slot == NULL) {
|
||||
avail_slot = slot;
|
||||
}
|
||||
// Otherwise it's just entry marked as deleted, so continue with next one
|
||||
} else if (elem->key == index || (!map->all_keys_are_qstrs && mp_obj_equal(elem->key, index))) {
|
||||
// found it
|
||||
/* it seems CPython does not replace the index; try x={True:'true'};x[1]='one';x
|
||||
if (add_if_not_found) {
|
||||
elem->key = index;
|
||||
}
|
||||
*/
|
||||
} else if (slot->key == index || (!map->all_keys_are_qstrs && mp_obj_equal(slot->key, index))) {
|
||||
// found index
|
||||
// Note: CPython does not replace the index; try x={True:'true'};x[1]='one';x
|
||||
if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
|
||||
map->used--;
|
||||
// this leaks this memory (but see dict_get_helper)
|
||||
mp_map_elem_t *retval = m_new(mp_map_elem_t, 1);
|
||||
retval->key = elem->key;
|
||||
retval->value = elem->value;
|
||||
elem->key = NULL;
|
||||
// elem->key = NULL && elem->value != NULL means "marked deleted"
|
||||
// assume value indeed never NULL
|
||||
retval->key = slot->key;
|
||||
retval->value = slot->value;
|
||||
// delete element in this slot
|
||||
map->used--;
|
||||
if (map->table[(pos + 1) % map->alloc].key == MP_OBJ_NULL) {
|
||||
// optimisation if next slot is empty
|
||||
slot->key = MP_OBJ_NULL;
|
||||
} else {
|
||||
slot->key = MP_OBJ_SENTINEL;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
return elem;
|
||||
return slot;
|
||||
}
|
||||
|
||||
// not yet found, keep searching in this table
|
||||
pos = (pos + 1) % map->alloc;
|
||||
|
||||
if (pos == start_pos) {
|
||||
// search got back to starting position, so index is not in table
|
||||
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
|
||||
if (avail_slot != NULL) {
|
||||
// there was an available slot, so use that
|
||||
map->used++;
|
||||
avail_slot->key = index;
|
||||
avail_slot->value = MP_OBJ_NULL;
|
||||
if (!MP_OBJ_IS_QSTR(index)) {
|
||||
map->all_keys_are_qstrs = 0;
|
||||
}
|
||||
return avail_slot;
|
||||
} else {
|
||||
// not enough room in table, rehash it
|
||||
mp_map_rehash(map);
|
||||
// restart the search for the new element
|
||||
start_pos = pos = hash % map->alloc;
|
||||
}
|
||||
} else {
|
||||
return MP_OBJ_NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -183,16 +206,14 @@ STATIC void mp_set_rehash(mp_set_t *set) {
|
|||
set->used = 0;
|
||||
set->table = m_new0(mp_obj_t, set->alloc);
|
||||
for (int i = 0; i < old_alloc; i++) {
|
||||
if (old_table[i] != NULL) {
|
||||
mp_set_lookup(set, old_table[i], true);
|
||||
if (old_table[i] != MP_OBJ_NULL && old_table[i] != MP_OBJ_SENTINEL) {
|
||||
mp_set_lookup(set, old_table[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
|
||||
}
|
||||
}
|
||||
m_del(mp_obj_t, old_table, old_alloc);
|
||||
}
|
||||
|
||||
mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) {
|
||||
int hash;
|
||||
int pos;
|
||||
if (set->alloc == 0) {
|
||||
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
|
||||
mp_set_rehash(set);
|
||||
|
@ -200,45 +221,84 @@ mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t looku
|
|||
return NULL;
|
||||
}
|
||||
}
|
||||
if (lookup_kind & MP_MAP_LOOKUP_FIRST) {
|
||||
hash = 0;
|
||||
pos = 0;
|
||||
} else {
|
||||
hash = mp_obj_hash(index);;
|
||||
pos = hash % set->alloc;
|
||||
}
|
||||
machine_uint_t hash = mp_obj_hash(index);
|
||||
uint pos = hash % set->alloc;
|
||||
uint start_pos = pos;
|
||||
mp_obj_t *avail_slot = NULL;
|
||||
for (;;) {
|
||||
mp_obj_t elem = set->table[pos];
|
||||
if (elem == MP_OBJ_NULL) {
|
||||
// not in table
|
||||
// found NULL slot, so index is not in table
|
||||
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
|
||||
if (set->used + 1 >= set->alloc) {
|
||||
// not enough room in table, rehash it
|
||||
mp_set_rehash(set);
|
||||
// restart the search for the new element
|
||||
pos = hash % set->alloc;
|
||||
} else {
|
||||
set->used += 1;
|
||||
set->table[pos] = index;
|
||||
return index;
|
||||
if (avail_slot == NULL) {
|
||||
avail_slot = &set->table[pos];
|
||||
}
|
||||
} else if (lookup_kind & MP_MAP_LOOKUP_FIRST) {
|
||||
pos++;
|
||||
set->used++;
|
||||
*avail_slot = index;
|
||||
return index;
|
||||
} else {
|
||||
return MP_OBJ_NULL;
|
||||
}
|
||||
} else if ((lookup_kind & MP_MAP_LOOKUP_FIRST) || mp_obj_equal(elem, index)) {
|
||||
// found it
|
||||
} else if (elem == MP_OBJ_SENTINEL) {
|
||||
// found deleted slot, remember for later
|
||||
if (avail_slot == NULL) {
|
||||
avail_slot = &set->table[pos];
|
||||
}
|
||||
} else if (mp_obj_equal(elem, index)) {
|
||||
// found index
|
||||
if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
|
||||
// delete element
|
||||
set->used--;
|
||||
set->table[pos] = NULL;
|
||||
if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
|
||||
// optimisation if next slot is empty
|
||||
set->table[pos] = MP_OBJ_NULL;
|
||||
} else {
|
||||
set->table[pos] = MP_OBJ_SENTINEL;
|
||||
}
|
||||
}
|
||||
return elem;
|
||||
} else {
|
||||
}
|
||||
|
||||
// not yet found, keep searching in this table
|
||||
pos = (pos + 1) % set->alloc;
|
||||
|
||||
if (pos == start_pos) {
|
||||
// search got back to starting position, so index is not in table
|
||||
if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
|
||||
if (avail_slot != NULL) {
|
||||
// there was an available slot, so use that
|
||||
set->used++;
|
||||
*avail_slot = index;
|
||||
return index;
|
||||
} else {
|
||||
// not enough room in table, rehash it
|
||||
mp_set_rehash(set);
|
||||
// restart the search for the new element
|
||||
start_pos = pos = hash % set->alloc;
|
||||
}
|
||||
} else {
|
||||
return MP_OBJ_NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mp_obj_t mp_set_remove_first(mp_set_t *set) {
|
||||
for (uint pos = 0; pos < set->alloc; pos++) {
|
||||
if (set->table[pos] != MP_OBJ_NULL && set->table[pos] != MP_OBJ_SENTINEL) {
|
||||
mp_obj_t elem = set->table[pos];
|
||||
// delete element
|
||||
set->used--;
|
||||
if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
|
||||
// optimisation if next slot is empty
|
||||
set->table[pos] = MP_OBJ_NULL;
|
||||
} else {
|
||||
set->table[pos] = MP_OBJ_SENTINEL;
|
||||
}
|
||||
return elem;
|
||||
}
|
||||
}
|
||||
return MP_OBJ_NULL;
|
||||
}
|
||||
|
||||
void mp_set_clear(mp_set_t *set) {
|
||||
|
|
8
py/obj.h
8
py/obj.h
|
@ -23,6 +23,11 @@ typedef struct _mp_obj_base_t mp_obj_base_t;
|
|||
|
||||
#define MP_OBJ_NULL ((mp_obj_t)NULL)
|
||||
|
||||
// The SENTINEL object is used for various internal purposes where one needs
|
||||
// an object which is unique from all other objects, including MP_OBJ_NULL.
|
||||
|
||||
#define MP_OBJ_SENTINEL ((mp_obj_t)8)
|
||||
|
||||
// These macros check for small int, qstr or object, and access small int and qstr values
|
||||
// - xxxx...xxx1: a small int, bits 1 and above are the value
|
||||
// - xxxx...xx10: a qstr, bits 2 and above are the value
|
||||
|
@ -103,11 +108,11 @@ typedef struct _mp_map_t {
|
|||
mp_map_elem_t *table;
|
||||
} mp_map_t;
|
||||
|
||||
// These can be or'd together
|
||||
typedef enum _mp_map_lookup_kind_t {
|
||||
MP_MAP_LOOKUP, // 0
|
||||
MP_MAP_LOOKUP_ADD_IF_NOT_FOUND, // 1
|
||||
MP_MAP_LOOKUP_REMOVE_IF_FOUND, // 2
|
||||
MP_MAP_LOOKUP_FIRST = 4,
|
||||
} mp_map_lookup_kind_t;
|
||||
|
||||
void mp_map_init(mp_map_t *map, int n);
|
||||
|
@ -129,6 +134,7 @@ typedef struct _mp_set_t {
|
|||
|
||||
void mp_set_init(mp_set_t *set, int n);
|
||||
mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind);
|
||||
mp_obj_t mp_set_remove_first(mp_set_t *set);
|
||||
void mp_set_clear(mp_set_t *set);
|
||||
|
||||
// Type definitions for methods
|
||||
|
|
|
@ -103,7 +103,7 @@ STATIC mp_map_elem_t *dict_it_iternext_elem(mp_obj_t self_in) {
|
|||
mp_map_elem_t *table = self->dict->map.table;
|
||||
|
||||
for (int i = self->cur; i < max; i++) {
|
||||
if (table[i].key != NULL) {
|
||||
if (table[i].key != MP_OBJ_NULL && table[i].key != MP_OBJ_SENTINEL) {
|
||||
self->cur = i + 1;
|
||||
return &(table[i]);
|
||||
}
|
||||
|
|
19
py/objset.c
19
py/objset.c
|
@ -32,7 +32,7 @@ STATIC void set_print(void (*print)(void *env, const char *fmt, ...), void *env,
|
|||
bool first = true;
|
||||
print(env, "{");
|
||||
for (int i = 0; i < self->set.alloc; i++) {
|
||||
if (self->set.table[i] != MP_OBJ_NULL) {
|
||||
if (self->set.table[i] != MP_OBJ_NULL && self->set.table[i] != MP_OBJ_SENTINEL) {
|
||||
if (!first) {
|
||||
print(env, ", ");
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ STATIC mp_obj_t set_it_iternext(mp_obj_t self_in) {
|
|||
mp_obj_t *table = self->set->set.table;
|
||||
|
||||
for (machine_uint_t i = self->cur; i < max; i++) {
|
||||
if (table[i] != NULL) {
|
||||
if (table[i] != MP_OBJ_NULL && table[i] != MP_OBJ_SENTINEL) {
|
||||
self->cur = i + 1;
|
||||
return table[i];
|
||||
}
|
||||
|
@ -307,12 +307,10 @@ STATIC mp_obj_t set_equal(mp_obj_t self_in, mp_obj_t other_in) {
|
|||
STATIC mp_obj_t set_pop(mp_obj_t self_in) {
|
||||
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_set));
|
||||
mp_obj_set_t *self = self_in;
|
||||
|
||||
if (self->set.used == 0) {
|
||||
mp_obj_t obj = mp_set_remove_first(&self->set);
|
||||
if (obj == MP_OBJ_NULL) {
|
||||
nlr_jump(mp_obj_new_exception_msg(&mp_type_KeyError, "pop from an empty set"));
|
||||
}
|
||||
mp_obj_t obj = mp_set_lookup(&self->set, NULL,
|
||||
MP_MAP_LOOKUP_REMOVE_IF_FOUND | MP_MAP_LOOKUP_FIRST);
|
||||
return obj;
|
||||
}
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_1(set_pop_obj, set_pop);
|
||||
|
@ -375,6 +373,14 @@ STATIC mp_obj_t set_union(mp_obj_t self_in, mp_obj_t other_in) {
|
|||
}
|
||||
STATIC MP_DEFINE_CONST_FUN_OBJ_2(set_union_obj, set_union);
|
||||
|
||||
STATIC mp_obj_t set_unary_op(int op, mp_obj_t self_in) {
|
||||
mp_obj_set_t *self = self_in;
|
||||
switch (op) {
|
||||
case MP_UNARY_OP_BOOL: return MP_BOOL(self->set.used != 0);
|
||||
case MP_UNARY_OP_LEN: return MP_OBJ_NEW_SMALL_INT((machine_int_t)self->set.used);
|
||||
default: return MP_OBJ_NULL; // op not supported for None
|
||||
}
|
||||
}
|
||||
|
||||
STATIC mp_obj_t set_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) {
|
||||
mp_obj_t args[] = {lhs, rhs};
|
||||
|
@ -450,6 +456,7 @@ const mp_obj_type_t mp_type_set = {
|
|||
.name = MP_QSTR_set,
|
||||
.print = set_print,
|
||||
.make_new = set_make_new,
|
||||
.unary_op = set_unary_op,
|
||||
.binary_op = set_binary_op,
|
||||
.getiter = set_getiter,
|
||||
.locals_dict = (mp_obj_t)&set_locals_dict,
|
||||
|
|
|
@ -1,8 +1,21 @@
|
|||
for i in range(100):
|
||||
for n in range(20):
|
||||
print('testing dict with {} items'.format(n))
|
||||
for i in range(n):
|
||||
# create dict
|
||||
d = dict()
|
||||
for j in range(100):
|
||||
d[j] = j
|
||||
del d[i]
|
||||
for j in range(100):
|
||||
if j not in d:
|
||||
print(j, 'not in d')
|
||||
for j in range(n):
|
||||
d[str(j)] = j
|
||||
print(len(d))
|
||||
|
||||
# delete an item
|
||||
del d[str(i)]
|
||||
print(len(d))
|
||||
|
||||
# check items
|
||||
for j in range(n):
|
||||
if str(j) in d:
|
||||
if j == i:
|
||||
print(j, 'in d, but it should not be')
|
||||
else:
|
||||
if j != i:
|
||||
print(j, 'not in d, but it should be')
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# basic test
|
||||
s = {1}
|
||||
print(s.remove(1))
|
||||
print(list(s))
|
||||
|
@ -7,3 +8,26 @@ except KeyError:
|
|||
pass
|
||||
else:
|
||||
print("failed to raise KeyError")
|
||||
|
||||
# test sets of varying size
|
||||
for n in range(20):
|
||||
print('testing set with {} items'.format(n))
|
||||
for i in range(n):
|
||||
# create set
|
||||
s = set()
|
||||
for j in range(n):
|
||||
s.add(str(j))
|
||||
print(len(s))
|
||||
|
||||
# delete an item
|
||||
s.remove(str(i))
|
||||
print(len(s))
|
||||
|
||||
# check items
|
||||
for j in range(n):
|
||||
if str(j) in s:
|
||||
if j == i:
|
||||
print(j, 'in s, but it should not be')
|
||||
else:
|
||||
if j != i:
|
||||
print(j, 'not in s, but it should be')
|
||||
|
|
Loading…
Reference in New Issue