summaryrefslogtreecommitdiff
path: root/core/templates
diff options
context:
space:
mode:
authorHendrik Brucker <hendrik.brucker@mail.de>2022-06-23 18:08:52 +0200
committerHendrik Brucker <hendrik.brucker@mail.de>2022-06-23 18:08:52 +0200
commitfddafed9192359e627efcafa63029bb06f96a469 (patch)
treeb3ace813cc9bea0244d7bfa82025617b257fe493 /core/templates
parentd1dac8427a6a41e8fc0a76807887a57a8fe6fd10 (diff)
Optimize HashMap/HashSet using fastmod
Diffstat (limited to 'core/templates')
-rw-r--r--core/templates/hash_map.h33
-rw-r--r--core/templates/hash_set.h33
-rw-r--r--core/templates/hashfuncs.h62
3 files changed, 98 insertions, 30 deletions
diff --git a/core/templates/hash_map.h b/core/templates/hash_map.h
index e5f73171a2..191f21a3dd 100644
--- a/core/templates/hash_map.h
+++ b/core/templates/hash_map.h
@@ -91,9 +91,9 @@ private:
return hash;
}
- _FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
- uint32_t original_pos = p_hash % p_capacity;
- return (p_pos - original_pos + p_capacity) % p_capacity;
+ static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
+ const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
+ return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
}
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
@@ -101,9 +101,10 @@ private:
return false; // Failed lookups, no elements
}
- uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = _hash(p_key);
- uint32_t pos = hash % capacity;
+ uint32_t pos = fastmod(hash, capacity_inv, capacity);
uint32_t distance = 0;
while (true) {
@@ -111,7 +112,7 @@ private:
return false;
}
- if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
+ if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
return false;
}
@@ -120,17 +121,18 @@ private:
return true;
}
- pos = (pos + 1) % capacity;
+ pos = fastmod((pos + 1), capacity_inv, capacity);
distance++;
}
}
void _insert_with_hash(uint32_t p_hash, HashMapElement<TKey, TValue> *p_value) {
- uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = p_hash;
HashMapElement<TKey, TValue> *value = p_value;
uint32_t distance = 0;
- uint32_t pos = hash % capacity;
+ uint32_t pos = fastmod(hash, capacity_inv, capacity);
while (true) {
if (hashes[pos] == EMPTY_HASH) {
@@ -143,14 +145,14 @@ private:
}
// Not an empty slot, let's check the probing length of the existing one.
- uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
+ uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
if (existing_probe_len < distance) {
SWAP(hash, hashes[pos]);
SWAP(value, elements[pos]);
distance = existing_probe_len;
}
- pos = (pos + 1) % capacity;
+ pos = fastmod((pos + 1), capacity_inv, capacity);
distance++;
}
}
@@ -316,13 +318,14 @@ public:
return false;
}
- uint32_t capacity = hash_table_size_primes[capacity_index];
- uint32_t next_pos = (pos + 1) % capacity;
- while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
+ uint32_t next_pos = fastmod((pos + 1), capacity_inv, capacity);
+ while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
SWAP(hashes[next_pos], hashes[pos]);
SWAP(elements[next_pos], elements[pos]);
pos = next_pos;
- next_pos = (pos + 1) % capacity;
+ next_pos = fastmod((pos + 1), capacity_inv, capacity);
}
hashes[pos] = EMPTY_HASH;
diff --git a/core/templates/hash_set.h b/core/templates/hash_set.h
index 2318067dcc..7b3a5d46f8 100644
--- a/core/templates/hash_set.h
+++ b/core/templates/hash_set.h
@@ -74,9 +74,9 @@ private:
return hash;
}
- _FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
- uint32_t original_pos = p_hash % p_capacity;
- return (p_pos - original_pos + p_capacity) % p_capacity;
+ static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
+ const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
+ return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
}
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
@@ -84,9 +84,10 @@ private:
return false; // Failed lookups, no elements
}
- uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = _hash(p_key);
- uint32_t pos = hash % capacity;
+ uint32_t pos = fastmod(hash, capacity_inv, capacity);
uint32_t distance = 0;
while (true) {
@@ -94,7 +95,7 @@ private:
return false;
}
- if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
+ if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
return false;
}
@@ -103,17 +104,18 @@ private:
return true;
}
- pos = (pos + 1) % capacity;
+ pos = fastmod(pos + 1, capacity_inv, capacity);
distance++;
}
}
uint32_t _insert_with_hash(uint32_t p_hash, uint32_t p_index) {
- uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = p_hash;
uint32_t index = p_index;
uint32_t distance = 0;
- uint32_t pos = hash % capacity;
+ uint32_t pos = fastmod(hash, capacity_inv, capacity);
while (true) {
if (hashes[pos] == EMPTY_HASH) {
@@ -124,7 +126,7 @@ private:
}
// Not an empty slot, let's check the probing length of the existing one.
- uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
+ uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
if (existing_probe_len < distance) {
key_to_hash[index] = pos;
SWAP(hash, hashes[pos]);
@@ -132,7 +134,7 @@ private:
distance = existing_probe_len;
}
- pos = (pos + 1) % capacity;
+ pos = fastmod(pos + 1, capacity_inv, capacity);
distance++;
}
}
@@ -265,9 +267,10 @@ public:
uint32_t key_pos = pos;
pos = key_to_hash[pos]; //make hash pos
- uint32_t capacity = hash_table_size_primes[capacity_index];
- uint32_t next_pos = (pos + 1) % capacity;
- while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
+ uint32_t next_pos = fastmod(pos + 1, capacity_inv, capacity);
+ while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
uint32_t kpos = hash_to_key[pos];
uint32_t kpos_next = hash_to_key[next_pos];
SWAP(key_to_hash[kpos], key_to_hash[kpos_next]);
@@ -275,7 +278,7 @@ public:
SWAP(hash_to_key[next_pos], hash_to_key[pos]);
pos = next_pos;
- next_pos = (pos + 1) % capacity;
+ next_pos = fastmod(pos + 1, capacity_inv, capacity);
}
hashes[pos] = EMPTY_HASH;
diff --git a/core/templates/hashfuncs.h b/core/templates/hashfuncs.h
index b0371f2ab5..547534f26a 100644
--- a/core/templates/hashfuncs.h
+++ b/core/templates/hashfuncs.h
@@ -437,4 +437,66 @@ const uint32_t hash_table_size_primes[HASH_TABLE_SIZE_MAX] = {
1610612741,
};
+// Computed with elem_i = UINT64_C (0 x FFFFFFFF FFFFFFFF ) / d_i + 1, where d_i is the i-th element of the above array.
+const uint64_t hash_table_size_primes_inv[HASH_TABLE_SIZE_MAX] = {
+ 3689348814741910324,
+ 1418980313362273202,
+ 802032351030850071,
+ 392483916461905354,
+ 190172619316593316,
+ 95578984837873325,
+ 47420935922132524,
+ 23987963684927896,
+ 11955116055547344,
+ 5991147799191151,
+ 2998982941588287,
+ 1501077717772769,
+ 750081082979285,
+ 375261795343686,
+ 187625172388393,
+ 93822606204624,
+ 46909513691883,
+ 23456218233098,
+ 11728086747027,
+ 5864041509391,
+ 2932024948977,
+ 1466014921160,
+ 733007198436,
+ 366503839517,
+ 183251896093,
+ 91625960335,
+ 45812983922,
+ 22906489714,
+ 11453246088
+};
+
+/**
+ * Fastmod computes ( n mod d ) given the precomputed c much faster than n % d.
+ * The implementation of fastmod is based on the following paper by Daniel Lemire et al.
+ * Faster Remainder by Direct Computation: Applications to Compilers and Software Libraries
+ * https://arxiv.org/abs/1902.01961
+ */
+static _FORCE_INLINE_ uint32_t fastmod(const uint32_t n, const uint64_t c, const uint32_t d) {
+#if defined(_MSC_VER)
+ // Returns the upper 64 bits of the product of two 64-bit unsigned integers.
+ // This intrinsic function is required since MSVC does not support unsigned 128-bit integers.
+#if defined(_M_X64) || defined(_M_ARM64)
+ return __umulh(c * n, d);
+#else
+ // Fallback to the slower method for 32-bit platforms.
+ return n % d;
+#endif // _M_X64 || _M_ARM64
+#else
+#ifdef __SIZEOF_INT128__
+ // Prevent compiler warning, because we know what we are doing.
+ uint64_t lowbits = c * n;
+ __extension__ typedef unsigned __int128 uint128;
+ return static_cast<uint64_t>(((uint128)lowbits * d) >> 64);
+#else
+ // Fallback to the slower method if no 128-bit unsigned integer type is available.
+ return n % d;
+#endif // __SIZEOF_INT128__
+#endif // _MSC_VER
+}
+
#endif // HASHFUNCS_H