16 files changed, 307 insertions, 334 deletions
diff --git a/core/templates/bin_sorted_array.h b/core/templates/bin_sorted_array.h
index d928bd7a82..38beb9c04d 100644
--- a/core/templates/bin_sorted_array.h
+++ b/core/templates/bin_sorted_array.h
@@ -178,4 +178,4 @@ public:
 	}
 };
 
-#endif //BIN_SORTED_ARRAY_H
+#endif // BIN_SORTED_ARRAY_H
diff --git a/core/templates/cowdata.h b/core/templates/cowdata.h
index e760fc2176..f98b2308c9 100644
--- a/core/templates/cowdata.h
+++ b/core/templates/cowdata.h
@@ -36,6 +36,7 @@
 #include "core/templates/safe_refcount.h"
 
 #include <string.h>
+#include <type_traits>
 
 template <class T>
 class Vector;
@@ -158,6 +159,7 @@ public:
 		return _ptr[p_index];
 	}
 
+	template <bool p_ensure_zero = false>
 	Error resize(int p_size);
 
 	_FORCE_INLINE_ void remove_at(int p_index) {
@@ -204,7 +206,7 @@ void CowData<T>::_unref(void *p_data) {
 	}
 	// clean up
 
-	if (!__has_trivial_destructor(T)) {
+	if (!std::is_trivially_destructible<T>::value) {
 		uint32_t *count = _get_size();
 		T *data = (T *)(count + 1);
 
@@ -239,7 +241,7 @@ uint32_t CowData<T>::_copy_on_write() {
 		T *_data = (T *)(mem_new);
 
 		// initialize new elements
-		if (__has_trivial_copy(T)) {
+		if (std::is_trivially_copyable<T>::value) {
 			memcpy(mem_new, _ptr, current_size * sizeof(T));
 
 		} else {
@@ -257,6 +259,7 @@ uint32_t CowData<T>::_copy_on_write() {
 }
 
 template <class T>
+template <bool p_ensure_zero>
 Error CowData<T>::resize(int p_size) {
 	ERR_FAIL_COND_V(p_size < 0, ERR_INVALID_PARAMETER);
 
@@ -302,16 +305,18 @@ Error CowData<T>::resize(int p_size) {
 
 		// construct the newly created elements
 
-		if (!__has_trivial_constructor(T)) {
+		if (!std::is_trivially_constructible<T>::value) {
 			for (int i = *_get_size(); i < p_size; i++) {
 				memnew_placement(&_ptr[i], T);
 			}
+		} else if (p_ensure_zero) {
+			memset((void *)(_ptr + current_size), 0, (p_size - current_size) * sizeof(T));
 		}
 
 		*_get_size() = p_size;
 
 	} else if (p_size < current_size) {
-		if (!__has_trivial_destructor(T)) {
+		if (!std::is_trivially_destructible<T>::value) {
 			// deinitialize no longer needed elements
 			for (uint32_t i = p_size; i < *_get_size(); i++) {
 				T *t = &_ptr[i];
diff --git a/core/templates/hash_map.h b/core/templates/hash_map.h
index e5f73171a2..191f21a3dd 100644
--- a/core/templates/hash_map.h
+++ b/core/templates/hash_map.h
@@ -91,9 +91,9 @@ private:
 		return hash;
 	}
 
-	_FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
-		uint32_t original_pos = p_hash % p_capacity;
-		return (p_pos - original_pos + p_capacity) % p_capacity;
+	static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
+		const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
+		return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
 	}
 
 	bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
@@ -101,9 +101,10 @@ private:
 			return false; // Failed lookups, no elements
 		}
 
-		uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
 		uint32_t hash = _hash(p_key);
-		uint32_t pos = hash % capacity;
+		uint32_t pos = fastmod(hash, capacity_inv, capacity);
 		uint32_t distance = 0;
 
 		while (true) {
@@ -111,7 +112,7 @@ private:
 				return false;
 			}
 
-			if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
+			if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
 				return false;
 			}
 
@@ -120,17 +121,18 @@ private:
 				return true;
 			}
 
-			pos = (pos + 1) % capacity;
+			pos = fastmod((pos + 1), capacity_inv, capacity);
 			distance++;
 		}
 	}
 
 	void _insert_with_hash(uint32_t p_hash, HashMapElement<TKey, TValue> *p_value) {
-		uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
 		uint32_t hash = p_hash;
 		HashMapElement<TKey, TValue> *value = p_value;
 		uint32_t distance = 0;
-		uint32_t pos = hash % capacity;
+		uint32_t pos = fastmod(hash, capacity_inv, capacity);
 
 		while (true) {
 			if (hashes[pos] == EMPTY_HASH) {
@@ -143,14 +145,14 @@ private:
 			}
 
 			// Not an empty slot, let's check the probing length of the existing one.
-			uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
+			uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
 			if (existing_probe_len < distance) {
 				SWAP(hash, hashes[pos]);
 				SWAP(value, elements[pos]);
 				distance = existing_probe_len;
 			}
 
-			pos = (pos + 1) % capacity;
+			pos = fastmod((pos + 1), capacity_inv, capacity);
 			distance++;
 		}
 	}
@@ -316,13 +318,14 @@ public:
 			return false;
 		}
 
-		uint32_t capacity = hash_table_size_primes[capacity_index];
-		uint32_t next_pos = (pos + 1) % capacity;
-		while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
+		const uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
+		uint32_t next_pos = fastmod((pos + 1), capacity_inv, capacity);
+		while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
 			SWAP(hashes[next_pos], hashes[pos]);
 			SWAP(elements[next_pos], elements[pos]);
 			pos = next_pos;
-			next_pos = (pos + 1) % capacity;
+			next_pos = fastmod((pos + 1), capacity_inv, capacity);
 		}
 
 		hashes[pos] = EMPTY_HASH;
diff --git a/core/templates/hash_set.h b/core/templates/hash_set.h
index 2318067dcc..7b3a5d46f8 100644
--- a/core/templates/hash_set.h
+++ b/core/templates/hash_set.h
@@ -74,9 +74,9 @@ private:
 		return hash;
 	}
 
-	_FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
-		uint32_t original_pos = p_hash % p_capacity;
-		return (p_pos - original_pos + p_capacity) % p_capacity;
+	static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
+		const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
+		return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
 	}
 
 	bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
@@ -84,9 +84,10 @@ private:
 			return false; // Failed lookups, no elements
 		}
 
-		uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
 		uint32_t hash = _hash(p_key);
-		uint32_t pos = hash % capacity;
+		uint32_t pos = fastmod(hash, capacity_inv, capacity);
 		uint32_t distance = 0;
 
 		while (true) {
@@ -94,7 +95,7 @@ private:
 				return false;
 			}
 
-			if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
+			if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
 				return false;
 			}
 
@@ -103,17 +104,18 @@ private:
 				return true;
 			}
 
-			pos = (pos + 1) % capacity;
+			pos = fastmod(pos + 1, capacity_inv, capacity);
 			distance++;
 		}
 	}
 
 	uint32_t _insert_with_hash(uint32_t p_hash, uint32_t p_index) {
-		uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
 		uint32_t hash = p_hash;
 		uint32_t index = p_index;
 		uint32_t distance = 0;
-		uint32_t pos = hash % capacity;
+		uint32_t pos = fastmod(hash, capacity_inv, capacity);
 
 		while (true) {
 			if (hashes[pos] == EMPTY_HASH) {
@@ -124,7 +126,7 @@ private:
 			}
 
 			// Not an empty slot, let's check the probing length of the existing one.
-			uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
+			uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
 			if (existing_probe_len < distance) {
 				key_to_hash[index] = pos;
 				SWAP(hash, hashes[pos]);
@@ -132,7 +134,7 @@ private:
 				distance = existing_probe_len;
 			}
 
-			pos = (pos + 1) % capacity;
+			pos = fastmod(pos + 1, capacity_inv, capacity);
 			distance++;
 		}
 	}
@@ -265,9 +267,10 @@ public:
 		uint32_t key_pos = pos;
 		pos = key_to_hash[pos]; //make hash pos
 
-		uint32_t capacity = hash_table_size_primes[capacity_index];
-		uint32_t next_pos = (pos + 1) % capacity;
-		while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
+		const uint32_t capacity = hash_table_size_primes[capacity_index];
+		const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
+		uint32_t next_pos = fastmod(pos + 1, capacity_inv, capacity);
+		while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
 			uint32_t kpos = hash_to_key[pos];
 			uint32_t kpos_next = hash_to_key[next_pos];
 			SWAP(key_to_hash[kpos], key_to_hash[kpos_next]);
@@ -275,7 +278,7 @@ public:
 			SWAP(hash_to_key[next_pos], hash_to_key[pos]);
 
 			pos = next_pos;
-			next_pos = (pos + 1) % capacity;
+			next_pos = fastmod(pos + 1, capacity_inv, capacity);
 		}
 
 		hashes[pos] = EMPTY_HASH;
diff --git a/core/templates/hashfuncs.h b/core/templates/hashfuncs.h
index 98ff7fa4ce..d85cdf7adc 100644
--- a/core/templates/hashfuncs.h
+++ b/core/templates/hashfuncs.h
@@ -40,6 +40,8 @@
 #include "core/math/vector2i.h"
 #include "core/math/vector3.h"
 #include "core/math/vector3i.h"
+#include "core/math/vector4.h"
+#include "core/math/vector4i.h"
 #include "core/object/object_id.h"
 #include "core/string/node_path.h"
 #include "core/string/string_name.h"
@@ -62,7 +64,7 @@ static _FORCE_INLINE_ uint32_t hash_djb2(const char *p_cstr) {
 	uint32_t c;
 
 	while ((c = *chr++)) {
-		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
+		hash = ((hash << 5) + hash) ^ c; /* hash * 33 ^ c */
 	}
 
 	return hash;
@@ -72,14 +74,14 @@ static _FORCE_INLINE_ uint32_t hash_djb2_buffer(const uint8_t *p_buff, int p_len
 	uint32_t hash = p_prev;
 
 	for (int i = 0; i < p_len; i++) {
-		hash = ((hash << 5) + hash) + p_buff[i]; /* hash * 33 + c */
+		hash = ((hash << 5) + hash) ^ p_buff[i]; /* hash * 33 + c */
 	}
 
 	return hash;
 }
 
 static _FORCE_INLINE_ uint32_t hash_djb2_one_32(uint32_t p_in, uint32_t p_prev = 5381) {
-	return ((p_prev << 5) + p_prev) + p_in;
+	return ((p_prev << 5) + p_prev) ^ p_in;
 }
 
 /**
@@ -100,14 +102,76 @@ static _FORCE_INLINE_ uint32_t hash_one_uint64(const uint64_t p_int) {
 	return uint32_t(v);
 }
 
+#define HASH_MURMUR3_SEED 0x7F07C65
 // Murmurhash3 32-bit version.
 // All MurmurHash versions are public domain software, and the author disclaims all copyright to their code.
 
-static _FORCE_INLINE_ uint32_t rotl32(uint32_t x, int8_t r) {
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_32(uint32_t p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+	p_in *= 0xcc9e2d51;
+	p_in = (p_in << 15) | (p_in >> 17);
+	p_in *= 0x1b873593;
+
+	p_seed ^= p_in;
+	p_seed = (p_seed << 13) | (p_seed >> 19);
+	p_seed = p_seed * 5 + 0xe6546b64;
+
+	return p_seed;
+}
+
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_float(float p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+	union {
+		float f;
+		uint32_t i;
+	} u;
+
+	// Normalize +/- 0.0 and NaN values so they hash the same.
+	if (p_in == 0.0f) {
+		u.f = 0.0;
+	} else if (Math::is_nan(p_in)) {
+		u.f = NAN;
+	} else {
+		u.f = p_in;
+	}
+
+	return hash_murmur3_one_32(u.i, p_seed);
+}
+
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_64(uint64_t p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+	p_seed = hash_murmur3_one_32(p_in & 0xFFFFFFFF, p_seed);
+	return hash_murmur3_one_32(p_in >> 32, p_seed);
+}
+
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_double(double p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+	union {
+		double d;
+		uint64_t i;
+	} u;
+
+	// Normalize +/- 0.0 and NaN values so they hash the same.
+	if (p_in == 0.0f) {
+		u.d = 0.0;
+	} else if (Math::is_nan(p_in)) {
+		u.d = NAN;
+	} else {
+		u.d = p_in;
+	}
+
+	return hash_murmur3_one_64(u.i, p_seed);
+}
+
+static _FORCE_INLINE_ uint32_t hash_murmur3_one_real(real_t p_in, uint32_t p_seed = HASH_MURMUR3_SEED) {
+#ifdef REAL_T_IS_DOUBLE
+	return hash_murmur3_one_double(p_in, p_seed);
+#else
+	return hash_murmur3_one_float(p_in, p_seed);
+#endif
+}
+
+static _FORCE_INLINE_ uint32_t hash_rotl32(uint32_t x, int8_t r) {
 	return (x << r) | (x >> (32 - r));
 }
 
-static _FORCE_INLINE_ uint32_t fmix32(uint32_t h) {
+static _FORCE_INLINE_ uint32_t hash_fmix32(uint32_t h) {
 	h ^= h >> 16;
 	h *= 0x85ebca6b;
 	h ^= h >> 13;
@@ -117,7 +181,7 @@ static _FORCE_INLINE_ uint32_t fmix32(uint32_t h) {
 	return h;
 }
 
-static _FORCE_INLINE_ uint32_t hash_murmur3_32(const void *key, int length, const uint32_t seed = 0x7F07C65) {
+static _FORCE_INLINE_ uint32_t hash_murmur3_buffer(const void *key, int length, const uint32_t seed = HASH_MURMUR3_SEED) {
 	// Although not required, this is a random prime number.
 	const uint8_t *data = (const uint8_t *)key;
 	const int nblocks = length / 4;
@@ -133,11 +197,11 @@ static _FORCE_INLINE_ uint32_t hash_murmur3_32(const void *key, int length, cons
 		uint32_t k1 = blocks[i];
 
 		k1 *= c1;
-		k1 = rotl32(k1, 15);
+		k1 = hash_rotl32(k1, 15);
 		k1 *= c2;
 
 		h1 ^= k1;
-		h1 = rotl32(h1, 13);
+		h1 = hash_rotl32(h1, 13);
 		h1 = h1 * 5 + 0xe6546b64;
 	}
 
@@ -155,14 +219,14 @@ static _FORCE_INLINE_ uint32_t hash_murmur3_32(const void *key, int length, cons
 		case 1:
 			k1 ^= tail[0];
 			k1 *= c1;
-			k1 = rotl32(k1, 15);
+			k1 = hash_rotl32(k1, 15);
 			k1 *= c2;
 			h1 ^= k1;
 	};
 
 	// Finalize with additional bit mixing.
 	h1 ^= length;
-	return fmix32(h1);
+	return hash_fmix32(h1);
 }
 
 static _FORCE_INLINE_ uint32_t hash_djb2_one_float(double p_in, uint32_t p_prev = 5381) {
@@ -184,7 +248,7 @@ static _FORCE_INLINE_ uint32_t hash_djb2_one_float(double p_in, uint32_t p_prev
 }
 
 template <class T>
-static _FORCE_INLINE_ uint32_t make_uint32_t(T p_in) {
+static _FORCE_INLINE_ uint32_t hash_make_uint32_t(T p_in) {
 	union {
 		T t;
 		uint32_t _u32;
@@ -213,11 +277,11 @@ static _FORCE_INLINE_ uint64_t hash_djb2_one_float_64(double p_in, uint64_t p_pr
 }
 
 static _FORCE_INLINE_ uint64_t hash_djb2_one_64(uint64_t p_in, uint64_t p_prev = 5381) {
-	return ((p_prev << 5) + p_prev) + p_in;
+	return ((p_prev << 5) + p_prev) ^ p_in;
 }
 
 template <class T>
-static _FORCE_INLINE_ uint64_t make_uint64_t(T p_in) {
+static _FORCE_INLINE_ uint64_t hash_make_uint64_t(T p_in) {
 	union {
 		T t;
 		uint64_t _u64;
@@ -241,9 +305,9 @@ struct HashMapHasherDefault {
 
 	static _FORCE_INLINE_ uint32_t hash(const String &p_string) { return p_string.hash(); }
 	static _FORCE_INLINE_ uint32_t hash(const char *p_cstr) { return hash_djb2(p_cstr); }
-	static _FORCE_INLINE_ uint32_t hash(const wchar_t p_wchar) { return fmix32(p_wchar); }
-	static _FORCE_INLINE_ uint32_t hash(const char16_t p_uchar) { return fmix32(p_uchar); }
-	static _FORCE_INLINE_ uint32_t hash(const char32_t p_uchar) { return fmix32(p_uchar); }
+	static _FORCE_INLINE_ uint32_t hash(const wchar_t p_wchar) { return hash_fmix32(p_wchar); }
+	static _FORCE_INLINE_ uint32_t hash(const char16_t p_uchar) { return hash_fmix32(p_uchar); }
+	static _FORCE_INLINE_ uint32_t hash(const char32_t p_uchar) { return hash_fmix32(p_uchar); }
 	static _FORCE_INLINE_ uint32_t hash(const RID &p_rid) { return hash_one_uint64(p_rid.get_id()); }
 	static _FORCE_INLINE_ uint32_t hash(const StringName &p_string_name) { return p_string_name.hash(); }
 	static _FORCE_INLINE_ uint32_t hash(const NodePath &p_path) { return p_path.hash(); }
@@ -251,21 +315,73 @@ struct HashMapHasherDefault {
 
 	static _FORCE_INLINE_ uint32_t hash(const uint64_t p_int) { return hash_one_uint64(p_int); }
 	static _FORCE_INLINE_ uint32_t hash(const int64_t p_int) { return hash_one_uint64(p_int); }
-	static _FORCE_INLINE_ uint32_t hash(const float p_float) { return hash_djb2_one_float(p_float); }
-	static _FORCE_INLINE_ uint32_t hash(const double p_double) { return hash_djb2_one_float(p_double); }
-	static _FORCE_INLINE_ uint32_t hash(const uint32_t p_int) { return fmix32(p_int); }
-	static _FORCE_INLINE_ uint32_t hash(const int32_t p_int) { return fmix32(p_int); }
-	static _FORCE_INLINE_ uint32_t hash(const uint16_t p_int) { return fmix32(p_int); }
-	static _FORCE_INLINE_ uint32_t hash(const int16_t p_int) { return fmix32(p_int); }
-	static _FORCE_INLINE_ uint32_t hash(const uint8_t p_int) { return fmix32(p_int); }
-	static _FORCE_INLINE_ uint32_t hash(const int8_t p_int) { return fmix32(p_int); }
-	static _FORCE_INLINE_ uint32_t hash(const Vector2i &p_vec) { return hash_murmur3_32(&p_vec, sizeof(Vector2i)); }
-	static _FORCE_INLINE_ uint32_t hash(const Vector3i &p_vec) { return hash_murmur3_32(&p_vec, sizeof(Vector3i)); }
-	static _FORCE_INLINE_ uint32_t hash(const Vector2 &p_vec) { return hash_murmur3_32(&p_vec, sizeof(Vector2)); }
-	static _FORCE_INLINE_ uint32_t hash(const Vector3 &p_vec) { return hash_murmur3_32(&p_vec, sizeof(Vector3)); }
-	static _FORCE_INLINE_ uint32_t hash(const Rect2i &p_rect) { return hash_murmur3_32(&p_rect, sizeof(Rect2i)); }
-	static _FORCE_INLINE_ uint32_t hash(const Rect2 &p_rect) { return hash_murmur3_32(&p_rect, sizeof(Rect2)); }
-	static _FORCE_INLINE_ uint32_t hash(const AABB &p_aabb) { return hash_murmur3_32(&p_aabb, sizeof(AABB)); }
+	static _FORCE_INLINE_ uint32_t hash(const float p_float) { return hash_murmur3_one_float(p_float); }
+	static _FORCE_INLINE_ uint32_t hash(const double p_double) { return hash_murmur3_one_double(p_double); }
+	static _FORCE_INLINE_ uint32_t hash(const uint32_t p_int) { return hash_fmix32(p_int); }
+	static _FORCE_INLINE_ uint32_t hash(const int32_t p_int) { return hash_fmix32(p_int); }
+	static _FORCE_INLINE_ uint32_t hash(const uint16_t p_int) { return hash_fmix32(p_int); }
+	static _FORCE_INLINE_ uint32_t hash(const int16_t p_int) { return hash_fmix32(p_int); }
+	static _FORCE_INLINE_ uint32_t hash(const uint8_t p_int) { return hash_fmix32(p_int); }
+	static _FORCE_INLINE_ uint32_t hash(const int8_t p_int) { return hash_fmix32(p_int); }
+	static _FORCE_INLINE_ uint32_t hash(const Vector2i &p_vec) {
+		uint32_t h = hash_murmur3_one_32(p_vec.x);
+		h = hash_murmur3_one_32(p_vec.y, h);
+		return hash_fmix32(h);
+	}
+	static _FORCE_INLINE_ uint32_t hash(const Vector3i &p_vec) {
+		uint32_t h = hash_murmur3_one_32(p_vec.x);
+		h = hash_murmur3_one_32(p_vec.y, h);
+		h = hash_murmur3_one_32(p_vec.z, h);
+		return hash_fmix32(h);
+	}
+	static _FORCE_INLINE_ uint32_t hash(const Vector4i &p_vec) {
+		uint32_t h = hash_murmur3_one_32(p_vec.x);
+		h = hash_murmur3_one_32(p_vec.y, h);
+		h = hash_murmur3_one_32(p_vec.z, h);
+		h = hash_murmur3_one_32(p_vec.w, h);
+		return hash_fmix32(h);
+	}
+	static _FORCE_INLINE_ uint32_t hash(const Vector2 &p_vec) {
+		uint32_t h = hash_murmur3_one_real(p_vec.x);
+		h = hash_murmur3_one_real(p_vec.y, h);
+		return hash_fmix32(h);
+	}
+	static _FORCE_INLINE_ uint32_t hash(const Vector3 &p_vec) {
+		uint32_t h = hash_murmur3_one_real(p_vec.x);
+		h = hash_murmur3_one_real(p_vec.y, h);
+		h = hash_murmur3_one_real(p_vec.z, h);
+		return hash_fmix32(h);
+	}
+	static _FORCE_INLINE_ uint32_t hash(const Vector4 &p_vec) {
+		uint32_t h = hash_murmur3_one_real(p_vec.x);
+		h = hash_murmur3_one_real(p_vec.y, h);
+		h = hash_murmur3_one_real(p_vec.z, h);
+		h = hash_murmur3_one_real(p_vec.w, h);
+		return hash_fmix32(h);
+	}
+	static _FORCE_INLINE_ uint32_t hash(const Rect2i &p_rect) {
+		uint32_t h = hash_murmur3_one_32(p_rect.position.x);
+		h = hash_murmur3_one_32(p_rect.position.y, h);
+		h = hash_murmur3_one_32(p_rect.size.x, h);
+		h = hash_murmur3_one_32(p_rect.size.y, h);
+		return hash_fmix32(h);
+	}
+	static _FORCE_INLINE_ uint32_t hash(const Rect2 &p_rect) {
+		uint32_t h = hash_murmur3_one_real(p_rect.position.x);
+		h = hash_murmur3_one_real(p_rect.position.y, h);
+		h = hash_murmur3_one_real(p_rect.size.x, h);
+		h = hash_murmur3_one_real(p_rect.size.y, h);
+		return hash_fmix32(h);
+	}
+	static _FORCE_INLINE_ uint32_t hash(const AABB &p_aabb) {
+		uint32_t h = hash_murmur3_one_real(p_aabb.position.x);
+		h = hash_murmur3_one_real(p_aabb.position.y, h);
+		h = hash_murmur3_one_real(p_aabb.position.z, h);
+		h = hash_murmur3_one_real(p_aabb.size.x, h);
+		h = hash_murmur3_one_real(p_aabb.size.y, h);
+		h = hash_murmur3_one_real(p_aabb.size.z, h);
+		return hash_fmix32(h);
+	}
 };
 
 template <typename T>
@@ -337,4 +453,66 @@ const uint32_t hash_table_size_primes[HASH_TABLE_SIZE_MAX] = {
 	1610612741,
 };
 
+// Computed with elem_i = UINT64_C (0 x FFFFFFFF FFFFFFFF ) / d_i + 1, where d_i is the i-th element of the above array.
+const uint64_t hash_table_size_primes_inv[HASH_TABLE_SIZE_MAX] = {
+	3689348814741910324,
+	1418980313362273202,
+	802032351030850071,
+	392483916461905354,
+	190172619316593316,
+	95578984837873325,
+	47420935922132524,
+	23987963684927896,
+	11955116055547344,
+	5991147799191151,
+	2998982941588287,
+	1501077717772769,
+	750081082979285,
+	375261795343686,
+	187625172388393,
+	93822606204624,
+	46909513691883,
+	23456218233098,
+	11728086747027,
+	5864041509391,
+	2932024948977,
+	1466014921160,
+	733007198436,
+	366503839517,
+	183251896093,
+	91625960335,
+	45812983922,
+	22906489714,
+	11453246088
+};
+
+/**
+ * Fastmod computes ( n mod d ) given the precomputed c much faster than n % d.
+ * The implementation of fastmod is based on the following paper by Daniel Lemire et al.
+ * Faster Remainder by Direct Computation: Applications to Compilers and Software Libraries
+ * https://arxiv.org/abs/1902.01961
+ */
+static _FORCE_INLINE_ uint32_t fastmod(const uint32_t n, const uint64_t c, const uint32_t d) {
+#if defined(_MSC_VER)
+	// Returns the upper 64 bits of the product of two 64-bit unsigned integers.
+	// This intrinsic function is required since MSVC does not support unsigned 128-bit integers.
+#if defined(_M_X64) || defined(_M_ARM64)
+	return __umulh(c * n, d);
+#else
+	// Fallback to the slower method for 32-bit platforms.
+	return n % d;
+#endif // _M_X64 || _M_ARM64
+#else
+#ifdef __SIZEOF_INT128__
+	// Prevent compiler warning, because we know what we are doing.
+	uint64_t lowbits = c * n;
+	__extension__ typedef unsigned __int128 uint128;
+	return static_cast<uint64_t>(((uint128)lowbits * d) >> 64);
+#else
+	// Fallback to the slower method if no 128-bit unsigned integer type is available.
+	return n % d;
+#endif // __SIZEOF_INT128__
+#endif // _MSC_VER
+}
+
 #endif // HASHFUNCS_H
diff --git a/core/templates/local_vector.h b/core/templates/local_vector.h
index f4e0748c27..49690f2373 100644
--- a/core/templates/local_vector.h
+++ b/core/templates/local_vector.h
@@ -37,8 +37,11 @@
 #include "core/templates/vector.h"
 
 #include <initializer_list>
+#include <type_traits>
 
-template <class T, class U = uint32_t, bool force_trivial = false>
+// If tight, it grows strictly as much as needed.
+// Otherwise, it grows exponentially (the default and what you want in most cases).
+template <class T, class U = uint32_t, bool force_trivial = false, bool tight = false>
 class LocalVector {
 private:
 	U count = 0;
@@ -65,7 +68,7 @@ public:
 			CRASH_COND_MSG(!data, "Out of memory");
 		}
 
-		if (!__has_trivial_constructor(T) && !force_trivial) {
+		if (!std::is_trivially_constructible<T>::value && !force_trivial) {
 			memnew_placement(&data[count++], T(p_elem));
 		} else {
 			data[count++] = p_elem;
@@ -78,7 +81,7 @@ public:
 		for (U i = p_index; i < count; i++) {
 			data[i] = data[i + 1];
 		}
-		if (!__has_trivial_destructor(T) && !force_trivial) {
+		if (!std::is_trivially_destructible<T>::value && !force_trivial) {
 			data[count].~T();
 		}
 	}
@@ -91,7 +94,7 @@ public:
 		if (count > p_index) {
 			data[p_index] = data[count];
 		}
-		if (!__has_trivial_destructor(T) && !force_trivial) {
+		if (!std::is_trivially_destructible<T>::value && !force_trivial) {
 			data[count].~T();
 		}
 	}
@@ -121,7 +124,7 @@ public:
 	_FORCE_INLINE_ bool is_empty() const { return count == 0; }
 	_FORCE_INLINE_ U get_capacity() const { return capacity; }
 	_FORCE_INLINE_ void reserve(U p_size) {
-		p_size = nearest_power_of_2_templated(p_size);
+		p_size = tight ? p_size : nearest_power_of_2_templated(p_size);
 		if (p_size > capacity) {
 			capacity = p_size;
 			data = (T *)memrealloc(data, capacity * sizeof(T));
@@ -132,7 +135,7 @@ public:
 	_FORCE_INLINE_ U size() const { return count; }
 	void resize(U p_size) {
 		if (p_size < count) {
-			if (!__has_trivial_destructor(T) && !force_trivial) {
+			if (!std::is_trivially_destructible<T>::value && !force_trivial) {
 				for (U i = p_size; i < count; i++) {
 					data[i].~T();
 				}
@@ -149,7 +152,7 @@ public:
 				data = (T *)memrealloc(data, capacity * sizeof(T));
 				CRASH_COND_MSG(!data, "Out of memory");
 			}
-			if (!__has_trivial_constructor(T) && !force_trivial) {
+			if (!std::is_trivially_constructible<T>::value && !force_trivial) {
 				for (U i = count; i < p_size; i++) {
 					memnew_placement(&data[i], T);
 				}
@@ -262,4 +265,7 @@ public:
 	}
 };
 
+template <class T, class U = uint32_t, bool force_trivial = false>
+using TightLocalVector = LocalVector<T, U, force_trivial, true>;
+
 #endif // LOCAL_VECTOR_H
diff --git a/core/templates/lru.h b/core/templates/lru.h
index 48ba318b12..3a78de61db 100644
--- a/core/templates/lru.h
+++ b/core/templates/lru.h
@@ -35,7 +35,7 @@
 #include "hash_map.h"
 #include "list.h"
 
-template <class TKey, class TData>
+template <class TKey, class TData, class Hasher = HashMapHasherDefault, class Comparator = HashMapComparatorDefault<TKey>>
 class LRUCache {
 private:
 	struct Pair {
@@ -52,7 +52,7 @@ private:
 	typedef typename List<Pair>::Element *Element;
 
 	List<Pair> _list;
-	HashMap<TKey, Element> _map;
+	HashMap<TKey, Element, Hasher, Comparator> _map;
 	size_t capacity;
 
 public:
@@ -102,6 +102,7 @@ public:
 	}
 
 	_FORCE_INLINE_ size_t get_capacity() const { return capacity; }
+	_FORCE_INLINE_ size_t get_size() const { return _map.size(); }
 
 	void set_capacity(size_t p_capacity) {
 		if (capacity > 0) {
@@ -123,4 +124,4 @@ public:
 	}
 };
 
-#endif
+#endif // LRU_H
diff --git a/core/templates/paged_allocator.h b/core/templates/paged_allocator.h
index cf5911a847..43aab052fd 100644
--- a/core/templates/paged_allocator.h
+++ b/core/templates/paged_allocator.h
@@ -31,11 +31,14 @@
 #ifndef PAGED_ALLOCATOR_H
 #define PAGED_ALLOCATOR_H
 
+#include "core/core_globals.h"
 #include "core/os/memory.h"
 #include "core/os/spin_lock.h"
+#include "core/string/ustring.h"
 #include "core/typedefs.h"
 
 #include <type_traits>
+#include <typeinfo>
 
 template <class T, bool thread_safe = false>
 class PagedAllocator {
@@ -132,7 +135,12 @@ public:
 	}
 
 	~PagedAllocator() {
-		ERR_FAIL_COND_MSG(allocs_available < pages_allocated * page_size, "Pages in use exist at exit in PagedAllocator");
+		if (allocs_available < pages_allocated * page_size) {
+			if (CoreGlobals::leak_reporting_enabled) {
+				ERR_FAIL_COND_MSG(allocs_available < pages_allocated * page_size, String("Pages in use exist at exit in PagedAllocator: ") + String(typeid(T).name()));
+			}
+			return;
+		}
 		reset();
 	}
 };
diff --git a/core/templates/paged_array.h b/core/templates/paged_array.h
index 33d2757bec..f1ede556e6 100644
--- a/core/templates/paged_array.h
+++ b/core/templates/paged_array.h
@@ -35,6 +35,8 @@
 #include "core/os/spin_lock.h"
 #include "core/typedefs.h"
 
+#include <type_traits>
+
 // PagedArray is used mainly for filling a very large array from multiple threads efficiently and without causing major fragmentation
 
 // PageArrayPool manages central page allocation in a thread safe matter
@@ -197,7 +199,7 @@ public:
 		uint32_t page = count >> page_size_shift;
 		uint32_t offset = count & page_size_mask;
 
-		if (!__has_trivial_constructor(T)) {
+		if (!std::is_trivially_constructible<T>::value) {
 			memnew_placement(&page_data[page][offset], T(p_value));
 		} else {
 			page_data[page][offset] = p_value;
@@ -209,7 +211,7 @@ public:
 	_FORCE_INLINE_ void pop_back() {
 		ERR_FAIL_COND(count == 0);
 
-		if (!__has_trivial_destructor(T)) {
+		if (!std::is_trivially_destructible<T>::value) {
 			uint32_t page = (count - 1) >> page_size_shift;
 			uint32_t offset = (count - 1) & page_size_mask;
 			page_data[page][offset].~T();
@@ -226,7 +228,7 @@ public:
 
 	void clear() {
 		//destruct if needed
-		if (!__has_trivial_destructor(T)) {
+		if (!std::is_trivially_destructible<T>::value) {
 			for (uint64_t i = 0; i < count; i++) {
 				uint32_t page = i >> page_size_shift;
 				uint32_t offset = i & page_size_mask;
@@ -309,13 +311,13 @@ public:
 				uint32_t to_copy = MIN(page_size - new_remainder, remainder);
 
 				for (uint32_t i = 0; i < to_copy; i++) {
-					if (!__has_trivial_constructor(T)) {
+					if (!std::is_trivially_constructible<T>::value) {
 						memnew_placement(&dst_page[i + new_remainder], T(remainder_page[i + remainder - to_copy]));
 					} else {
 						dst_page[i + new_remainder] = remainder_page[i + remainder - to_copy];
 					}
 
-					if (!__has_trivial_destructor(T)) {
+					if (!std::is_trivially_destructible<T>::value) {
 						remainder_page[i + remainder - to_copy].~T();
 					}
 				}
diff --git a/core/templates/rb_map.h b/core/templates/rb_map.h
index c732ccd485..3393e6dd3e 100644
--- a/core/templates/rb_map.h
+++ b/core/templates/rb_map.h
@@ -758,4 +758,4 @@ public:
 	}
 };
 
-#endif // MAP_H
+#endif // RB_MAP_H
diff --git a/core/templates/rb_set.h b/core/templates/rb_set.h
index 226ea979c9..e87ea544fd 100644
--- a/core/templates/rb_set.h
+++ b/core/templates/rb_set.h
@@ -708,4 +708,4 @@ public:
 	}
 };
 
-#endif // SET_H
+#endif // RB_SET_H
diff --git a/core/templates/rid_owner.h b/core/templates/rid_owner.h
index 9c74b41818..320faebe98 100644
--- a/core/templates/rid_owner.h
+++ b/core/templates/rid_owner.h
@@ -79,7 +79,7 @@ class RID_Alloc : public RID_AllocBase {
 
 	const char *description = nullptr;
 
-	SpinLock spin_lock;
+	mutable SpinLock spin_lock;
 
 	_FORCE_INLINE_ RID _allocate_rid() {
 		if (THREAD_SAFE) {
@@ -220,7 +220,7 @@ public:
 		memnew_placement(mem, T(p_value));
 	}
 
-	_FORCE_INLINE_ bool owns(const RID &p_rid) {
+	_FORCE_INLINE_ bool owns(const RID &p_rid) const {
 		if (THREAD_SAFE) {
 			spin_lock.lock();
 		}
@@ -292,7 +292,7 @@ public:
 	_FORCE_INLINE_ uint32_t get_rid_count() const {
 		return alloc_count;
 	}
-	void get_owned_list(List<RID> *p_owned) {
+	void get_owned_list(List<RID> *p_owned) const {
 		if (THREAD_SAFE) {
 			spin_lock.lock();
 		}
@@ -308,7 +308,7 @@ public:
 	}
 
 	//used for fast iteration in the elements or RIDs
-	void fill_owned_buffer(RID *p_rid_buffer) {
+	void fill_owned_buffer(RID *p_rid_buffer) const {
 		if (THREAD_SAFE) {
 			spin_lock.lock();
 		}
@@ -402,7 +402,7 @@ public:
 		*ptr = p_new_ptr;
 	}
 
-	_FORCE_INLINE_ bool owns(const RID &p_rid) {
+	_FORCE_INLINE_ bool owns(const RID &p_rid) const {
 		return alloc.owns(p_rid);
 	}
 
@@ -414,11 +414,11 @@ public:
 		return alloc.get_rid_count();
 	}
 
-	_FORCE_INLINE_ void get_owned_list(List<RID> *p_owned) {
+	_FORCE_INLINE_ void get_owned_list(List<RID> *p_owned) const {
 		return alloc.get_owned_list(p_owned);
 	}
 
-	void fill_owned_buffer(RID *p_rid_buffer) {
+	void fill_owned_buffer(RID *p_rid_buffer) const {
 		alloc.fill_owned_buffer(p_rid_buffer);
 	}
 
@@ -458,7 +458,7 @@ public:
 		return alloc.get_or_null(p_rid);
 	}
 
-	_FORCE_INLINE_ bool owns(const RID &p_rid) {
+	_FORCE_INLINE_ bool owns(const RID &p_rid) const {
 		return alloc.owns(p_rid);
 	}
 
@@ -470,10 +470,10 @@ public:
 		return alloc.get_rid_count();
 	}
 
-	_FORCE_INLINE_ void get_owned_list(List<RID> *p_owned) {
+	_FORCE_INLINE_ void get_owned_list(List<RID> *p_owned) const {
 		return alloc.get_owned_list(p_owned);
 	}
-	void fill_owned_buffer(RID *p_rid_buffer) {
+	void fill_owned_buffer(RID *p_rid_buffer) const {
 		alloc.fill_owned_buffer(p_rid_buffer);
 	}
 
diff --git a/core/templates/safe_refcount.h b/core/templates/safe_refcount.h
index 76f76be96a..1f6551762e 100644
--- a/core/templates/safe_refcount.h
+++ b/core/templates/safe_refcount.h
@@ -111,7 +111,8 @@ public:
 			if (tmp >= p_value) {
 				return tmp; // already greater, or equal
 			}
-			if (value.compare_exchange_weak(tmp, p_value, std::memory_order_release)) {
+
+			if (value.compare_exchange_weak(tmp, p_value, std::memory_order_acq_rel)) {
 				return p_value;
 			}
 		}
@@ -123,7 +124,7 @@ public:
 			if (c == 0) {
 				return 0;
 			}
-			if (value.compare_exchange_weak(c, c + 1, std::memory_order_release)) {
+			if (value.compare_exchange_weak(c, c + 1, std::memory_order_acq_rel)) {
 				return c + 1;
 			}
 		}
diff --git a/core/templates/thread_work_pool.cpp b/core/templates/thread_work_pool.cpp
deleted file mode 100644
index a75fd06b9b..0000000000
--- a/core/templates/thread_work_pool.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*************************************************************************/
-/*  thread_work_pool.cpp                                                 */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#include "thread_work_pool.h"
-
-#include "core/os/os.h"
-
-void ThreadWorkPool::_thread_function(void *p_user) {
-	ThreadData *thread = static_cast<ThreadData *>(p_user);
-	while (true) {
-		thread->start.wait();
-		if (thread->exit.load()) {
-			break;
-		}
-		thread->work->work();
-		thread->completed.post();
-	}
-}
-
-void ThreadWorkPool::init(int p_thread_count) {
-	ERR_FAIL_COND(threads != nullptr);
-	if (p_thread_count < 0) {
-		p_thread_count = OS::get_singleton()->get_default_thread_pool_size();
-	}
-
-	thread_count = p_thread_count;
-	threads = memnew_arr(ThreadData, thread_count);
-
-	for (uint32_t i = 0; i < thread_count; i++) {
-		threads[i].exit.store(false);
-		threads[i].thread.start(&ThreadWorkPool::_thread_function, &threads[i]);
-	}
-}
-
-void ThreadWorkPool::finish() {
-	if (threads == nullptr) {
-		return;
-	}
-
-	for (uint32_t i = 0; i < thread_count; i++) {
-		threads[i].exit.store(true);
-		threads[i].start.post();
-	}
-	for (uint32_t i = 0; i < thread_count; i++) {
-		threads[i].thread.wait_to_finish();
-	}
-
-	memdelete_arr(threads);
-	threads = nullptr;
-}
-
-ThreadWorkPool::~ThreadWorkPool() {
-	finish();
-}
diff --git a/core/templates/thread_work_pool.h b/core/templates/thread_work_pool.h
deleted file mode 100644
index b0cebf04f1..0000000000
--- a/core/templates/thread_work_pool.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*************************************************************************/
-/*  thread_work_pool.h                                                   */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#ifndef THREAD_WORK_POOL_H
-#define THREAD_WORK_POOL_H
-
-#include "core/os/memory.h"
-#include "core/os/semaphore.h"
-#include "core/os/thread.h"
-
-#include <atomic>
-
-class ThreadWorkPool {
-	std::atomic<uint32_t> index;
-
-	struct BaseWork {
-		std::atomic<uint32_t> *index = nullptr;
-		uint32_t max_elements = 0;
-		virtual void work() = 0;
-		virtual ~BaseWork() = default;
-	};
-
-	template <class C, class M, class U>
-	struct Work : public BaseWork {
-		C *instance;
-		M method;
-		U userdata;
-		virtual void work() override {
-			while (true) {
-				uint32_t work_index = index->fetch_add(1, std::memory_order_relaxed);
-				if (work_index >= max_elements) {
-					break;
-				}
-				(instance->*method)(work_index, userdata);
-			}
-		}
-	};
-
-	struct ThreadData {
-		Thread thread;
-		Semaphore start;
-		Semaphore completed;
-		std::atomic<bool> exit;
-		BaseWork *work = nullptr;
-	};
-
-	ThreadData *threads = nullptr;
-	uint32_t thread_count = 0;
-	uint32_t threads_working = 0;
-	BaseWork *current_work = nullptr;
-
-	static void _thread_function(void *p_user);
-
-public:
-	template <class C, class M, class U>
-	void begin_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
-		ERR_FAIL_COND(!threads); //never initialized
-		ERR_FAIL_COND(current_work != nullptr);
-
-		index.store(0, std::memory_order_release);
-
-		Work<C, M, U> *w = memnew((Work<C, M, U>));
-		w->instance = p_instance;
-		w->userdata = p_userdata;
-		w->method = p_method;
-		w->index = &index;
-		w->max_elements = p_elements;
-
-		current_work = w;
-
-		threads_working = MIN(p_elements, thread_count);
-
-		for (uint32_t i = 0; i < threads_working; i++) {
-			threads[i].work = w;
-			threads[i].start.post();
-		}
-	}
-
-	bool is_working() const {
-		return current_work != nullptr;
-	}
-
-	bool is_done_dispatching() const {
-		ERR_FAIL_COND_V(current_work == nullptr, true);
-		return index.load(std::memory_order_acquire) >= current_work->max_elements;
-	}
-
-	uint32_t get_work_index() const {
-		ERR_FAIL_COND_V(current_work == nullptr, 0);
-		uint32_t idx = index.load(std::memory_order_acquire);
-		return MIN(idx, current_work->max_elements);
-	}
-
-	void end_work() {
-		ERR_FAIL_COND(current_work == nullptr);
-		for (uint32_t i = 0; i < threads_working; i++) {
-			threads[i].completed.wait();
-			threads[i].work = nullptr;
-		}
-
-		threads_working = 0;
-		memdelete(current_work);
-		current_work = nullptr;
-	}
-
-	template <class C, class M, class U>
-	void do_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
-		switch (p_elements) {
-			case 0:
-				// Nothing to do, so do nothing.
-				break;
-			case 1:
-				// No value in pushing the work to another thread if it's a single job
-				// and we're going to wait for it to finish. Just run it right here.
-				(p_instance->*p_method)(0, p_userdata);
-				break;
-			default:
-				// Multiple jobs to do; commence threaded business.
-				begin_work(p_elements, p_instance, p_method, p_userdata);
-				end_work();
-		}
-	}
-
-	_FORCE_INLINE_ int get_thread_count() const { return thread_count; }
-	void init(int p_thread_count = -1);
-	void finish();
-	~ThreadWorkPool();
-};
-
-#endif // THREAD_POOL_H
diff --git a/core/templates/vector.h b/core/templates/vector.h
index 2ac7c7630a..51595a75f5 100644
--- a/core/templates/vector.h
+++ b/core/templates/vector.h
@@ -89,6 +89,7 @@ public:
 	_FORCE_INLINE_ void set(int p_index, const T &p_elem) { _cowdata.set(p_index, p_elem); }
 	_FORCE_INLINE_ int size() const { return _cowdata.size(); }
 	Error resize(int p_size) { return _cowdata.resize(p_size); }
+	Error resize_zeroed(int p_size) { return _cowdata.template resize<true>(p_size); }
 	_FORCE_INLINE_ const T &operator[](int p_index) const { return _cowdata.get(p_index); }
 	Error insert(int p_pos, T p_val) { return _cowdata.insert(p_pos, p_val); }
 	int find(const T &p_val, int p_from = 0) const { return _cowdata.find(p_val, p_from); }
@@ -145,6 +146,9 @@ public:
 
 	Vector<uint8_t> to_byte_array() const {
 		Vector<uint8_t> ret;
+		if (is_empty()) {
+			return ret;
+		}
 		ret.resize(size() * sizeof(T));
 		memcpy(ret.ptrw(), ptr(), sizeof(T) * size());
 		return ret;