🐐 Hash tables
This commit is contained in:
@@ -1172,44 +1172,123 @@ static int _cent_key_eq(CentValue a, CentValue b) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* splitmix64 finalizer — good distribution for sequential ints */
|
||||
static uint32_t _cent_hash_int(long v) {
|
||||
uint64_t x = (uint64_t)v;
|
||||
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
|
||||
x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
|
||||
x = x ^ (x >> 31);
|
||||
return (uint32_t)x;
|
||||
}
|
||||
|
||||
/* FNV-1a */
|
||||
static uint32_t _cent_hash_str(const char *s) {
|
||||
uint32_t h = 2166136261u;
|
||||
for (; *s; s++) {
|
||||
h ^= (uint8_t)*s;
|
||||
h *= 16777619u;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
static uint32_t _cent_hash_key(CentValue k) {
|
||||
if (k.type == CENT_INT) return _cent_hash_int(k.ival);
|
||||
if (k.type == CENT_STR) return _cent_hash_str(k.sval);
|
||||
cent_type_error("dict key must be a numeral or string");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int _next_pow2(int n) {
|
||||
int p = 1;
|
||||
while (p < n) p <<= 1;
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Probe for `key` in the bucket array. Returns the bucket slot — either
|
||||
one whose stored index points to a matching key (hit), or an empty
|
||||
slot (-1) where the key would be inserted. nbuckets is a power of 2. */
|
||||
static int _cent_dict_probe(const CentDict *d, CentValue key, uint32_t h) {
|
||||
uint32_t mask = (uint32_t)d->nbuckets - 1;
|
||||
uint32_t i = h & mask;
|
||||
while (1) {
|
||||
int idx = d->buckets[i];
|
||||
if (idx < 0) return (int)i;
|
||||
if (_cent_key_eq(d->keys[idx], key)) return (int)i;
|
||||
i = (i + 1) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
static void _cent_dict_rehash(CentDict *d, int new_nbuckets) {
|
||||
int *new_buckets = cent_arena_alloc(cent_arena, new_nbuckets * sizeof(int));
|
||||
for (int i = 0; i < new_nbuckets; i++) new_buckets[i] = -1;
|
||||
uint32_t mask = (uint32_t)new_nbuckets - 1;
|
||||
for (int idx = 0; idx < d->len; idx++) {
|
||||
uint32_t h = _cent_hash_key(d->keys[idx]);
|
||||
uint32_t i = h & mask;
|
||||
while (new_buckets[i] >= 0) i = (i + 1) & mask;
|
||||
new_buckets[i] = idx;
|
||||
}
|
||||
d->buckets = new_buckets;
|
||||
d->nbuckets = new_nbuckets;
|
||||
}
|
||||
|
||||
CentValue cent_dict_new(int cap) {
|
||||
if (cap < 4) cap = 4;
|
||||
int nbuckets = _next_pow2(cap * 2);
|
||||
CentValue *keys = cent_arena_alloc(cent_arena, cap * sizeof(CentValue));
|
||||
CentValue *vals = cent_arena_alloc(cent_arena, cap * sizeof(CentValue));
|
||||
return cent_dict_val(keys, vals, 0, cap);
|
||||
int *buckets = cent_arena_alloc(cent_arena, nbuckets * sizeof(int));
|
||||
for (int i = 0; i < nbuckets; i++) buckets[i] = -1;
|
||||
return cent_dict_val(keys, vals, buckets, 0, cap, nbuckets);
|
||||
}
|
||||
|
||||
void cent_dict_set(CentValue *dict, CentValue key, CentValue val) {
|
||||
if (dict->type != CENT_DICT)
|
||||
cent_type_error("dict-set requires a dict");
|
||||
for (int i = 0; i < dict->dval.len; i++) {
|
||||
if (_cent_key_eq(dict->dval.keys[i], key)) {
|
||||
dict->dval.vals[i] = val;
|
||||
CentDict *d = &dict->dval;
|
||||
|
||||
uint32_t h = _cent_hash_key(key);
|
||||
int slot = _cent_dict_probe(d, key, h);
|
||||
int idx = d->buckets[slot];
|
||||
if (idx >= 0) {
|
||||
d->vals[idx] = val;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (dict->dval.len >= dict->dval.cap) {
|
||||
int new_cap = dict->dval.cap * 2;
|
||||
|
||||
/* Grow the keys/vals arrays first so the new entry has a stable index. */
|
||||
if (d->len >= d->cap) {
|
||||
int new_cap = d->cap * 2;
|
||||
CentValue *new_keys = cent_arena_alloc(cent_arena, new_cap * sizeof(CentValue));
|
||||
CentValue *new_vals = cent_arena_alloc(cent_arena, new_cap * sizeof(CentValue));
|
||||
memcpy(new_keys, dict->dval.keys, dict->dval.len * sizeof(CentValue));
|
||||
memcpy(new_vals, dict->dval.vals, dict->dval.len * sizeof(CentValue));
|
||||
dict->dval.keys = new_keys;
|
||||
dict->dval.vals = new_vals;
|
||||
dict->dval.cap = new_cap;
|
||||
memcpy(new_keys, d->keys, d->len * sizeof(CentValue));
|
||||
memcpy(new_vals, d->vals, d->len * sizeof(CentValue));
|
||||
d->keys = new_keys;
|
||||
d->vals = new_vals;
|
||||
d->cap = new_cap;
|
||||
}
|
||||
|
||||
int new_idx = d->len;
|
||||
d->keys[new_idx] = key;
|
||||
d->vals[new_idx] = val;
|
||||
d->len++;
|
||||
|
||||
/* If load factor would exceed 0.75, rehash — this re-inserts every
|
||||
entry including the one we just appended, so we're done. Otherwise
|
||||
the slot picked by the earlier probe is still valid. */
|
||||
if (d->len * 4 >= d->nbuckets * 3) {
|
||||
_cent_dict_rehash(d, d->nbuckets * 2);
|
||||
} else {
|
||||
d->buckets[slot] = new_idx;
|
||||
}
|
||||
dict->dval.keys[dict->dval.len] = key;
|
||||
dict->dval.vals[dict->dval.len] = val;
|
||||
dict->dval.len++;
|
||||
}
|
||||
|
||||
CentValue cent_dict_get(CentValue dict, CentValue key) {
|
||||
if (dict.type != CENT_DICT)
|
||||
cent_type_error("dict-get requires a dict");
|
||||
for (int i = 0; i < dict.dval.len; i++) {
|
||||
if (_cent_key_eq(dict.dval.keys[i], key))
|
||||
return dict.dval.vals[i];
|
||||
}
|
||||
uint32_t h = _cent_hash_key(key);
|
||||
int slot = _cent_dict_probe(&dict.dval, key, h);
|
||||
int idx = dict.dval.buckets[slot];
|
||||
if (idx >= 0) return dict.dval.vals[idx];
|
||||
cent_runtime_error("Key not found in dict");
|
||||
return cent_null();
|
||||
}
|
||||
|
||||
@@ -47,10 +47,13 @@ struct CentList {
|
||||
};
|
||||
|
||||
struct CentDict {
|
||||
CentValue *keys;
|
||||
CentValue *vals;
|
||||
int len;
|
||||
int cap;
|
||||
CentValue *keys; /* insertion-order array, len entries */
|
||||
CentValue *vals; /* parallel to keys */
|
||||
int *buckets; /* hash table; values are indices into */
|
||||
/* keys/vals, or -1 for empty */
|
||||
int len; /* number of entries */
|
||||
int cap; /* capacity of keys/vals */
|
||||
int nbuckets; /* size of buckets, power of 2 */
|
||||
};
|
||||
|
||||
struct CentValue {
|
||||
@@ -135,13 +138,17 @@ static inline CentValue cent_func_val(CentFuncPtr fn, const char **param_names,
|
||||
r.fnval.param_count = param_count;
|
||||
return r;
|
||||
}
|
||||
static inline CentValue cent_dict_val(CentValue *keys, CentValue *vals, int len, int cap) {
|
||||
static inline CentValue cent_dict_val(CentValue *keys, CentValue *vals,
|
||||
int *buckets, int len, int cap,
|
||||
int nbuckets) {
|
||||
CentValue r;
|
||||
r.type = CENT_DICT;
|
||||
r.dval.keys = keys;
|
||||
r.dval.vals = vals;
|
||||
r.dval.buckets = buckets;
|
||||
r.dval.len = len;
|
||||
r.dval.cap = cap;
|
||||
r.dval.nbuckets = nbuckets;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
@@ -189,3 +189,34 @@ class TestDictDisplay(unittest.TestCase):
|
||||
@parameterized.expand(dict_display_tests)
|
||||
def test_dict_display(self, source, nodes, value, output):
|
||||
run_test(self, source, nodes, value, output)
|
||||
|
||||
|
||||
class TestDictGrowth(unittest.TestCase):
|
||||
def test_dict_growth_preserves_order_and_lookup(self):
|
||||
# Inserts XX entries via PER; pushes the compiled dict through
|
||||
# multiple rehashes (initial cap=4) and verifies that lookup, length,
|
||||
# and insertion-order iteration all still hold afterwards.
|
||||
source = (
|
||||
"DESIGNA d VT TABVLA {}\n"
|
||||
"PER i IN [I VSQVE XX] FAC {\n"
|
||||
"DESIGNA d[i] VT i * II\n"
|
||||
"}\n"
|
||||
"DIC(d[X])\n"
|
||||
"DIC(LONGITVDO(d))\n"
|
||||
"DIC(CLAVES(d))"
|
||||
)
|
||||
nodes = Program([], [
|
||||
Designa(ID("d"), DataDict([])),
|
||||
PerStatement(
|
||||
DataRangeArray(Numeral("I"), Numeral("XX")),
|
||||
ID("i"),
|
||||
[DesignaIndex(ID("d"), [ID("i")],
|
||||
BinOp(ID("i"), Numeral("II"), "SYMBOL_TIMES"))],
|
||||
),
|
||||
ExpressionStatement(BuiltIn("DIC", [ArrayIndex(ID("d"), Numeral("X"))])),
|
||||
ExpressionStatement(BuiltIn("DIC", [BuiltIn("LONGITVDO", [ID("d")])])),
|
||||
ExpressionStatement(BuiltIn("DIC", [BuiltIn("CLAVES", [ID("d")])])),
|
||||
])
|
||||
keys_str = "[" + " ".join(int_to_num(i, False) for i in range(1, 21)) + "]"
|
||||
output = f"XX\nXX\n{keys_str}\n"
|
||||
run_test(self, source, nodes, ValStr(keys_str), output)
|
||||
|
||||
Reference in New Issue
Block a user