diff options
author | Shiqing <shiqing-thu18@yandex.com> | 2019-09-21 14:32:40 +0800 |
---|---|---|
committer | Shiqing <shiqing-thu18@yandex.com> | 2019-09-21 14:32:40 +0800 |
commit | 78bee16e053f9703d4e04910eb7ab99c715d30e9 (patch) | |
tree | 416e9a374b4142424936350a3d7a217d97074c8e /core | |
parent | 2e065d8ad07bb20fede0d0c0b2d33d6628033024 (diff) |
Implement backward shift deletion for OAHashMap
Diffstat (limited to 'core')
-rw-r--r-- | core/oa_hash_map.h | 47 |
1 files changed, 18 insertions, 29 deletions
diff --git a/core/oa_hash_map.h b/core/oa_hash_map.h index 5ea6d8b0d4..1a466e57f4 100644 --- a/core/oa_hash_map.h +++ b/core/oa_hash_map.h @@ -37,10 +37,11 @@ #include "core/os/memory.h" /** - * A HashMap implementation that uses open addressing with robinhood hashing. - * Robinhood hashing swaps out entries that have a smaller probing distance + * A HashMap implementation that uses open addressing with Robin Hood hashing. + * Robin Hood hashing swaps out entries that have a smaller probing distance * than the to-be-inserted entry, that evens out the average probing distance - * and enables faster lookups. + * and enables faster lookups. Backward shift deletion is employed to further + * improve the performance and to avoid infinite loops in rare cases. * * The entries are stored inplace, so huge keys or values might fill cache lines * a lot faster. @@ -60,25 +61,20 @@ private: uint32_t num_elements; static const uint32_t EMPTY_HASH = 0; - static const uint32_t DELETED_HASH_BIT = 1 << 31; _FORCE_INLINE_ uint32_t _hash(const TKey &p_key) const { uint32_t hash = Hasher::hash(p_key); if (hash == EMPTY_HASH) { hash = EMPTY_HASH + 1; - } else if (hash & DELETED_HASH_BIT) { - hash &= ~DELETED_HASH_BIT; } return hash; } _FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash) const { - p_hash = p_hash & ~DELETED_HASH_BIT; // we don't care if it was deleted or not - uint32_t original_pos = p_hash % capacity; - return (p_pos - original_pos) % capacity; + return (p_pos - original_pos + capacity) % capacity; } _FORCE_INLINE_ void _construct(uint32_t p_pos, uint32_t p_hash, const TKey &p_key, const TValue &p_value) { @@ -132,14 +128,6 @@ private: // not an empty slot, let's check the probing length of the existing one uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos]); if (existing_probe_len < distance) { - - if (hashes[pos] & DELETED_HASH_BIT) { - // we found a place where we can fit in! - _construct(pos, hash, key, value); - - return; - } - SWAP(hash, hashes[pos]); SWAP(key, keys[pos]); SWAP(value, values[pos]); @@ -173,9 +161,6 @@ private: if (old_hashes[i] == EMPTY_HASH) { continue; } - if (old_hashes[i] & DELETED_HASH_BIT) { - continue; - } _insert_with_hash(old_hashes[i], old_keys[i], old_values[i]); } @@ -205,10 +190,6 @@ public: continue; } - if (hashes[i] & DELETED_HASH_BIT) { - continue; - } - hashes[i] = EMPTY_HASH; values[i].~TValue(); keys[i].~TKey(); @@ -219,7 +200,7 @@ public: void insert(const TKey &p_key, const TValue &p_value) { - if ((float)num_elements / (float)capacity > 0.9) { + if (num_elements + 1 > 0.9 * capacity) { _resize_and_rehash(); } @@ -272,9 +253,20 @@ public: return; } - hashes[pos] |= DELETED_HASH_BIT; + uint32_t next_pos = (pos + 1) % capacity; + while (hashes[next_pos] != EMPTY_HASH && + _get_probe_length(next_pos, hashes[next_pos]) != 0) { + SWAP(hashes[next_pos], hashes[pos]); + SWAP(keys[next_pos], keys[pos]); + SWAP(values[next_pos], values[pos]); + pos = next_pos; + next_pos = (pos + 1) % capacity; + } + + hashes[pos] = EMPTY_HASH; values[pos].~TValue(); keys[pos].~TKey(); + num_elements--; } @@ -326,9 +318,6 @@ public: if (hashes[i] == EMPTY_HASH) { continue; } - if (hashes[i] & DELETED_HASH_BIT) { - continue; - } it.valid = true; it.key = &keys[i]; |