summaryrefslogtreecommitdiff
path: root/core/templates
diff options
context:
space:
mode:
Diffstat (limited to 'core/templates')
-rw-r--r--core/templates/hash_map.h33
-rw-r--r--core/templates/hash_set.h33
-rw-r--r--core/templates/hashfuncs.h224
-rw-r--r--core/templates/local_vector.h9
4 files changed, 236 insertions, 63 deletions
diff --git a/core/templates/hash_map.h b/core/templates/hash_map.h
index e5f73171a2..191f21a3dd 100644
--- a/core/templates/hash_map.h
+++ b/core/templates/hash_map.h
@@ -91,9 +91,9 @@ private:
return hash;
}
- _FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
- uint32_t original_pos = p_hash % p_capacity;
- return (p_pos - original_pos + p_capacity) % p_capacity;
+ static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
+ const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
+ return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
}
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
@@ -101,9 +101,10 @@ private:
return false; // Failed lookups, no elements
}
- uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = _hash(p_key);
- uint32_t pos = hash % capacity;
+ uint32_t pos = fastmod(hash, capacity_inv, capacity);
uint32_t distance = 0;
while (true) {
@@ -111,7 +112,7 @@ private:
return false;
}
- if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
+ if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
return false;
}
@@ -120,17 +121,18 @@ private:
return true;
}
- pos = (pos + 1) % capacity;
+ pos = fastmod((pos + 1), capacity_inv, capacity);
distance++;
}
}
void _insert_with_hash(uint32_t p_hash, HashMapElement<TKey, TValue> *p_value) {
- uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = p_hash;
HashMapElement<TKey, TValue> *value = p_value;
uint32_t distance = 0;
- uint32_t pos = hash % capacity;
+ uint32_t pos = fastmod(hash, capacity_inv, capacity);
while (true) {
if (hashes[pos] == EMPTY_HASH) {
@@ -143,14 +145,14 @@ private:
}
// Not an empty slot, let's check the probing length of the existing one.
- uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
+ uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
if (existing_probe_len < distance) {
SWAP(hash, hashes[pos]);
SWAP(value, elements[pos]);
distance = existing_probe_len;
}
- pos = (pos + 1) % capacity;
+ pos = fastmod((pos + 1), capacity_inv, capacity);
distance++;
}
}
@@ -316,13 +318,14 @@ public:
return false;
}
- uint32_t capacity = hash_table_size_primes[capacity_index];
- uint32_t next_pos = (pos + 1) % capacity;
- while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
+ uint32_t next_pos = fastmod((pos + 1), capacity_inv, capacity);
+ while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
SWAP(hashes[next_pos], hashes[pos]);
SWAP(elements[next_pos], elements[pos]);
pos = next_pos;
- next_pos = (pos + 1) % capacity;
+ next_pos = fastmod((pos + 1), capacity_inv, capacity);
}
hashes[pos] = EMPTY_HASH;
diff --git a/core/templates/hash_set.h b/core/templates/hash_set.h
index 2318067dcc..7b3a5d46f8 100644
--- a/core/templates/hash_set.h
+++ b/core/templates/hash_set.h
@@ -74,9 +74,9 @@ private:
return hash;
}
- _FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
- uint32_t original_pos = p_hash % p_capacity;
- return (p_pos - original_pos + p_capacity) % p_capacity;
+ static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
+ const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
+ return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
}
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
@@ -84,9 +84,10 @@ private:
return false; // Failed lookups, no elements
}
- uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = _hash(p_key);
- uint32_t pos = hash % capacity;
+ uint32_t pos = fastmod(hash, capacity_inv, capacity);
uint32_t distance = 0;
while (true) {
@@ -94,7 +95,7 @@ private:
return false;
}
- if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
+ if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
return false;
}
@@ -103,17 +104,18 @@ private:
return true;
}
- pos = (pos + 1) % capacity;
+ pos = fastmod(pos + 1, capacity_inv, capacity);
distance++;
}
}
uint32_t _insert_with_hash(uint32_t p_hash, uint32_t p_index) {
- uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = p_hash;
uint32_t index = p_index;
uint32_t distance = 0;
- uint32_t pos = hash % capacity;
+ uint32_t pos = fastmod(hash, capacity_inv, capacity);
while (true) {
if (hashes[pos] == EMPTY_HASH) {
@@ -124,7 +126,7 @@ private:
}
// Not an empty slot, let's check the probing length of the existing one.
- uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
+ uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
if (existing_probe_len < distance) {
key_to_hash[index] = pos;
SWAP(hash, hashes[pos]);
@@ -132,7 +134,7 @@ private:
distance = existing_probe_len;
}
- pos = (pos + 1) % capacity;
+ pos = fastmod(pos + 1, capacity_inv, capacity);
distance++;
}
}
@@ -265,9 +267,10 @@ public:
uint32_t key_pos = pos;
pos = key_to_hash[pos]; //make hash pos
- uint32_t capacity = hash_table_size_primes[capacity_index];
- uint32_t next_pos = (pos + 1) % capacity;
- while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
+ const uint32_t capacity = hash_table_size_primes[capacity_index];
+ const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
+ uint32_t next_pos = fastmod(pos + 1, capacity_inv, capacity);
+ while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
uint32_t kpos = hash_to_key[pos];
uint32_t kpos_next = hash_to_key[next_pos];
SWAP(key_to_hash[kpos], key_to_hash[kpos_next]);
@@ -275,7 +278,7 @@ public:
SWAP(hash_to_key[next_pos], hash_to_key[pos]);
pos = next_pos;
- next_pos = (pos + 1) % capacity;
+ next_pos = fastmod(pos + 1, capacity_inv, capacity);
}
hashes[pos] = EMPTY_HASH;
diff --git a/core/templates/hashfuncs.h b/core/templates/hashfuncs.h
index 98ff7fa4ce..547534f26a 100644
--- a/core/templates/hashfuncs.h
+++ b/core/templates/hashfuncs.h
@@ -62,7 +62,7 @@ static _FORCE_INLINE_ uint32_t hash_djb2(const char *p_cstr) {
uint32_t c;
while ((c = *chr++)) {
- hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
+ hash = ((hash << 5) + hash) ^ c; /* hash * 33 ^ c */
}
return hash;
@@ -72,14 +72,14 @@ static _FORCE_INLINE_ uint32_t hash_djb2_buffer(const uint8_t *p_buff, int p_len
uint32_t hash = p_prev;
for (int i = 0; i < p_len; i++) {
- hash = ((hash << 5) + hash) + p_buff[i]; /* hash * 33 + c */
+ hash = ((hash << 5) + hash) ^ p_buff[i]; /* hash * 33 + c */
}
return hash;
}
static _FORCE_INLINE_ uint32_t hash_djb2_one_32(uint32_t p_in, uint32_t p_prev = 5381) {
- return ((p_prev << 5) + p_prev) + p_in;
+ return ((p_prev << 5) + p_prev) ^ p_in;
}
/**
@@ -100,14 +100,76 @@ static _FORCE_INLINE_ uint32_t hash_one_uint64(const uint64_t p_int) {
return uint32_t(v);
}
+#define HASH_MURMUR3_SEED 0x7F07C65
// Murmurhash3 32-bit version.
// All MurmurHash versions are public domain software, and the author disclaims all copyright to their code.
-static _FORCE_INLINE_ uint32_t rotl32(uint32_t x, int8_t r) {
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_32(uint32_t p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+ p_in *= 0xcc9e2d51;
+ p_in = (p_in << 15) | (p_in >> 17);
+ p_in *= 0x1b873593;
+
+ p_seed ^= p_in;
+ p_seed = (p_seed << 13) | (p_seed >> 19);
+ p_seed = p_seed * 5 + 0xe6546b64;
+
+ return p_seed;
+}
+
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_float(float p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+ union {
+ float f;
+ uint32_t i;
+ } u;
+
+ // Normalize +/- 0.0 and NaN values so they hash the same.
+ if (p_in == 0.0f) {
+ u.f = 0.0;
+ } else if (Math::is_nan(p_in)) {
+ u.f = NAN;
+ } else {
+ u.f = p_in;
+ }
+
+ return hash_murmur3_one_32(u.i, p_seed);
+}
+
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_64(uint64_t p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+ p_seed = hash_murmur3_one_32(p_in & 0xFFFFFFFF, p_seed);
+ return hash_murmur3_one_32(p_in >> 32, p_seed);
+}
+
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_double(double p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+ union {
+ double d;
+ uint64_t i;
+ } u;
+
+ // Normalize +/- 0.0 and NaN values so they hash the same.
+ if (p_in == 0.0f) {
+ u.d = 0.0;
+ } else if (Math::is_nan(p_in)) {
+ u.d = NAN;
+ } else {
+ u.d = p_in;
+ }
+
+ return hash_murmur3_one_64(u.i, p_seed);
+}
+
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_real(real_t p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+#ifdef REAL_T_IS_DOUBLE
+ return hash_murmur3_one_double(p_in, p_seed);
+#else
+ return hash_murmur3_one_float(p_in, p_seed);
+#endif
+}
+
+static _FORCE_INLINE_ uint32_t hash_rotl32(uint32_t x, int8_t r) {
return (x << r) | (x >> (32 - r));
}
-static _FORCE_INLINE_ uint32_t fmix32(uint32_t h) {
+static _FORCE_INLINE_ uint32_t hash_fmix32(uint32_t h) {
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
@@ -117,7 +179,7 @@ static _FORCE_INLINE_ uint32_t fmix32(uint32_t h) {
return h;
}
-static _FORCE_INLINE_ uint32_t hash_murmur3_32(const void *key, int length, const uint32_t seed = 0x7F07C65) {
+static _FORCE_INLINE_ uint32_t hash_murmur3_buffer(const void *key, int length, const uint32_t seed = HASH_MURMUR3_SEED) {
// Although not required, this is a random prime number.
const uint8_t *data = (const uint8_t *)key;
const int nblocks = length / 4;
@@ -133,11 +195,11 @@ static _FORCE_INLINE_ uint32_t hash_murmur3_32(const void *key, int length, cons
uint32_t k1 = blocks[i];
k1 *= c1;
- k1 = rotl32(k1, 15);
+ k1 = hash_rotl32(k1, 15);
k1 *= c2;
h1 ^= k1;
- h1 = rotl32(h1, 13);
+ h1 = hash_rotl32(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
}
@@ -155,14 +217,14 @@ static _FORCE_INLINE_ uint32_t hash_murmur3_32(const void *key, int length, cons
case 1:
k1 ^= tail[0];
k1 *= c1;
- k1 = rotl32(k1, 15);
+ k1 = hash_rotl32(k1, 15);
k1 *= c2;
h1 ^= k1;
};
// Finalize with additional bit mixing.
h1 ^= length;
- return fmix32(h1);
+ return hash_fmix32(h1);
}
static _FORCE_INLINE_ uint32_t hash_djb2_one_float(double p_in, uint32_t p_prev = 5381) {
@@ -184,7 +246,7 @@ static _FORCE_INLINE_ uint32_t hash_djb2_one_float(double p_in, uint32_t p_prev
}
template <class T>
-static _FORCE_INLINE_ uint32_t make_uint32_t(T p_in) {
+static _FORCE_INLINE_ uint32_t hash_make_uint32_t(T p_in) {
union {
T t;
uint32_t _u32;
@@ -213,11 +275,11 @@ static _FORCE_INLINE_ uint64_t hash_djb2_one_float_64(double p_in, uint64_t p_pr
}
static _FORCE_INLINE_ uint64_t hash_djb2_one_64(uint64_t p_in, uint64_t p_prev = 5381) {
- return ((p_prev << 5) + p_prev) + p_in;
+ return ((p_prev << 5) + p_prev) ^ p_in;
}
template <class T>
-static _FORCE_INLINE_ uint64_t make_uint64_t(T p_in) {
+static _FORCE_INLINE_ uint64_t hash_make_uint64_t(T p_in) {
union {
T t;
uint64_t _u64;
@@ -241,9 +303,9 @@ struct HashMapHasherDefault {
static _FORCE_INLINE_ uint32_t hash(const String &p_string) { return p_string.hash(); }
static _FORCE_INLINE_ uint32_t hash(const char *p_cstr) { return hash_djb2(p_cstr); }
- static _FORCE_INLINE_ uint32_t hash(const wchar_t p_wchar) { return fmix32(p_wchar); }
- static _FORCE_INLINE_ uint32_t hash(const char16_t p_uchar) { return fmix32(p_uchar); }
- static _FORCE_INLINE_ uint32_t hash(const char32_t p_uchar) { return fmix32(p_uchar); }
+ static _FORCE_INLINE_ uint32_t hash(const wchar_t p_wchar) { return hash_fmix32(p_wchar); }
+ static _FORCE_INLINE_ uint32_t hash(const char16_t p_uchar) { return hash_fmix32(p_uchar); }
+ static _FORCE_INLINE_ uint32_t hash(const char32_t p_uchar) { return hash_fmix32(p_uchar); }
static _FORCE_INLINE_ uint32_t hash(const RID &p_rid) { return hash_one_uint64(p_rid.get_id()); }
static _FORCE_INLINE_ uint32_t hash(const StringName &p_string_name) { return p_string_name.hash(); }
static _FORCE_INLINE_ uint32_t hash(const NodePath &p_path) { return p_path.hash(); }
@@ -251,21 +313,59 @@ struct HashMapHasherDefault {
static _FORCE_INLINE_ uint32_t hash(const uint64_t p_int) { return hash_one_uint64(p_int); }
static _FORCE_INLINE_ uint32_t hash(const int64_t p_int) { return hash_one_uint64(p_int); }
- static _FORCE_INLINE_ uint32_t hash(const float p_float) { return hash_djb2_one_float(p_float); }
- static _FORCE_INLINE_ uint32_t hash(const double p_double) { return hash_djb2_one_float(p_double); }
- static _FORCE_INLINE_ uint32_t hash(const uint32_t p_int) { return fmix32(p_int); }
- static _FORCE_INLINE_ uint32_t hash(const int32_t p_int) { return fmix32(p_int); }
- static _FORCE_INLINE_ uint32_t hash(const uint16_t p_int) { return fmix32(p_int); }
- static _FORCE_INLINE_ uint32_t hash(const int16_t p_int) { return fmix32(p_int); }
- static _FORCE_INLINE_ uint32_t hash(const uint8_t p_int) { return fmix32(p_int); }
- static _FORCE_INLINE_ uint32_t hash(const int8_t p_int) { return fmix32(p_int); }
- static _FORCE_INLINE_ uint32_t hash(const Vector2i &p_vec) { return hash_murmur3_32(&p_vec, sizeof(Vector2i)); }
- static _FORCE_INLINE_ uint32_t hash(const Vector3i &p_vec) { return hash_murmur3_32(&p_vec, sizeof(Vector3i)); }
- static _FORCE_INLINE_ uint32_t hash(const Vector2 &p_vec) { return hash_murmur3_32(&p_vec, sizeof(Vector2)); }
- static _FORCE_INLINE_ uint32_t hash(const Vector3 &p_vec) { return hash_murmur3_32(&p_vec, sizeof(Vector3)); }
- static _FORCE_INLINE_ uint32_t hash(const Rect2i &p_rect) { return hash_murmur3_32(&p_rect, sizeof(Rect2i)); }
- static _FORCE_INLINE_ uint32_t hash(const Rect2 &p_rect) { return hash_murmur3_32(&p_rect, sizeof(Rect2)); }
- static _FORCE_INLINE_ uint32_t hash(const AABB &p_aabb) { return hash_murmur3_32(&p_aabb, sizeof(AABB)); }
+ static _FORCE_INLINE_ uint32_t hash(const float p_float) { return hash_murmur3_one_float(p_float); }
+ static _FORCE_INLINE_ uint32_t hash(const double p_double) { return hash_murmur3_one_double(p_double); }
+ static _FORCE_INLINE_ uint32_t hash(const uint32_t p_int) { return hash_fmix32(p_int); }
+ static _FORCE_INLINE_ uint32_t hash(const int32_t p_int) { return hash_fmix32(p_int); }
+ static _FORCE_INLINE_ uint32_t hash(const uint16_t p_int) { return hash_fmix32(p_int); }
+ static _FORCE_INLINE_ uint32_t hash(const int16_t p_int) { return hash_fmix32(p_int); }
+ static _FORCE_INLINE_ uint32_t hash(const uint8_t p_int) { return hash_fmix32(p_int); }
+ static _FORCE_INLINE_ uint32_t hash(const int8_t p_int) { return hash_fmix32(p_int); }
+ static _FORCE_INLINE_ uint32_t hash(const Vector2i &p_vec) {
+ uint32_t h = hash_murmur3_one_32(p_vec.x);
+ h = hash_murmur3_one_32(p_vec.y, h);
+ return hash_fmix32(h);
+ }
+ static _FORCE_INLINE_ uint32_t hash(const Vector3i &p_vec) {
+ uint32_t h = hash_murmur3_one_32(p_vec.x);
+ h = hash_murmur3_one_32(p_vec.y, h);
+ h = hash_murmur3_one_32(p_vec.z, h);
+ return hash_fmix32(h);
+ }
+ static _FORCE_INLINE_ uint32_t hash(const Vector2 &p_vec) {
+ uint32_t h = hash_murmur3_one_real(p_vec.x);
+ h = hash_murmur3_one_real(p_vec.y, h);
+ return hash_fmix32(h);
+ }
+ static _FORCE_INLINE_ uint32_t hash(const Vector3 &p_vec) {
+ uint32_t h = hash_murmur3_one_real(p_vec.x);
+ h = hash_murmur3_one_real(p_vec.y, h);
+ h = hash_murmur3_one_real(p_vec.z, h);
+ return hash_fmix32(h);
+ }
+ static _FORCE_INLINE_ uint32_t hash(const Rect2i &p_rect) {
+ uint32_t h = hash_murmur3_one_32(p_rect.position.x);
+ h = hash_murmur3_one_32(p_rect.position.y, h);
+ h = hash_murmur3_one_32(p_rect.size.x, h);
+ h = hash_murmur3_one_32(p_rect.size.y, h);
+ return hash_fmix32(h);
+ }
+ static _FORCE_INLINE_ uint32_t hash(const Rect2 &p_rect) {
+ uint32_t h = hash_murmur3_one_real(p_rect.position.x);
+ h = hash_murmur3_one_real(p_rect.position.y, h);
+ h = hash_murmur3_one_real(p_rect.size.x, h);
+ h = hash_murmur3_one_real(p_rect.size.y, h);
+ return hash_fmix32(h);
+ }
+ static _FORCE_INLINE_ uint32_t hash(const AABB &p_aabb) {
+ uint32_t h = hash_murmur3_one_real(p_aabb.position.x);
+ h = hash_murmur3_one_real(p_aabb.position.y, h);
+ h = hash_murmur3_one_real(p_aabb.position.z, h);
+ h = hash_murmur3_one_real(p_aabb.size.x, h);
+ h = hash_murmur3_one_real(p_aabb.size.y, h);
+ h = hash_murmur3_one_real(p_aabb.size.z, h);
+ return hash_fmix32(h);
+ }
};
template <typename T>
@@ -337,4 +437,66 @@ const uint32_t hash_table_size_primes[HASH_TABLE_SIZE_MAX] = {
1610612741,
};
+// Computed with elem_i = UINT64_C (0 x FFFFFFFF FFFFFFFF ) / d_i + 1, where d_i is the i-th element of the above array.
+const uint64_t hash_table_size_primes_inv[HASH_TABLE_SIZE_MAX] = {
+ 3689348814741910324,
+ 1418980313362273202,
+ 802032351030850071,
+ 392483916461905354,
+ 190172619316593316,
+ 95578984837873325,
+ 47420935922132524,
+ 23987963684927896,
+ 11955116055547344,
+ 5991147799191151,
+ 2998982941588287,
+ 1501077717772769,
+ 750081082979285,
+ 375261795343686,
+ 187625172388393,
+ 93822606204624,
+ 46909513691883,
+ 23456218233098,
+ 11728086747027,
+ 5864041509391,
+ 2932024948977,
+ 1466014921160,
+ 733007198436,
+ 366503839517,
+ 183251896093,
+ 91625960335,
+ 45812983922,
+ 22906489714,
+ 11453246088
+};
+
+/**
+ * Fastmod computes ( n mod d ) given the precomputed c much faster than n % d.
+ * The implementation of fastmod is based on the following paper by Daniel Lemire et al.
+ * Faster Remainder by Direct Computation: Applications to Compilers and Software Libraries
+ * https://arxiv.org/abs/1902.01961
+ */
+static _FORCE_INLINE_ uint32_t fastmod(const uint32_t n, const uint64_t c, const uint32_t d) {
+#if defined(_MSC_VER)
+ // Returns the upper 64 bits of the product of two 64-bit unsigned integers.
+ // This intrinsic function is required since MSVC does not support unsigned 128-bit integers.
+#if defined(_M_X64) || defined(_M_ARM64)
+ return __umulh(c * n, d);
+#else
+ // Fallback to the slower method for 32-bit platforms.
+ return n % d;
+#endif // _M_X64 || _M_ARM64
+#else
+#ifdef __SIZEOF_INT128__
+ // Prevent compiler warning, because we know what we are doing.
+ uint64_t lowbits = c * n;
+ __extension__ typedef unsigned __int128 uint128;
+ return static_cast<uint64_t>(((uint128)lowbits * d) >> 64);
+#else
+ // Fallback to the slower method if no 128-bit unsigned integer type is available.
+ return n % d;
+#endif // __SIZEOF_INT128__
+#endif // _MSC_VER
+}
+
#endif // HASHFUNCS_H
diff --git a/core/templates/local_vector.h b/core/templates/local_vector.h
index f4e0748c27..8d687effcf 100644
--- a/core/templates/local_vector.h
+++ b/core/templates/local_vector.h
@@ -38,7 +38,9 @@
#include <initializer_list>
-template <class T, class U = uint32_t, bool force_trivial = false>
+// If tight, it grows strictly as much as needed.
+// Otherwise, it grows exponentially (the default and what you want in most cases).
+template <class T, class U = uint32_t, bool force_trivial = false, bool tight = false>
class LocalVector {
private:
U count = 0;
@@ -121,7 +123,7 @@ public:
_FORCE_INLINE_ bool is_empty() const { return count == 0; }
_FORCE_INLINE_ U get_capacity() const { return capacity; }
_FORCE_INLINE_ void reserve(U p_size) {
- p_size = nearest_power_of_2_templated(p_size);
+ p_size = tight ? p_size : nearest_power_of_2_templated(p_size);
if (p_size > capacity) {
capacity = p_size;
data = (T *)memrealloc(data, capacity * sizeof(T));
@@ -262,4 +264,7 @@ public:
}
};
+template <class T, class U = uint32_t, bool force_trivial = false>
+using TightLocalVector = LocalVector<T, U, force_trivial, true>;
+
#endif // LOCAL_VECTOR_H