31 files changed, 658 insertions, 326 deletions
diff --git a/core/math/basis.cpp b/core/math/basis.cpp
index e34c1c1315..84f9d12bb1 100644
--- a/core/math/basis.cpp
+++ b/core/math/basis.cpp
@@ -37,7 +37,7 @@
 	(elements[row1][col1] * elements[row2][col2] - elements[row1][col2] * elements[row2][col1])
 
 void Basis::from_z(const Vector3 &p_z) {
-	if (Math::abs(p_z.z) > Math_SQRT12) {
+	if (Math::abs(p_z.z) > (real_t)Math_SQRT12) {
 		// choose p in y-z plane
 		real_t a = p_z[1] * p_z[1] + p_z[2] * p_z[2];
 		real_t k = 1.0f / Math::sqrt(a);
@@ -153,7 +153,7 @@ Basis Basis::diagonalize() {
 
 	int ite = 0;
 	Basis acc_rot;
-	while (off_matrix_norm_2 > CMP_EPSILON2 && ite++ < ite_max) {
+	while (off_matrix_norm_2 > (real_t)CMP_EPSILON2 && ite++ < ite_max) {
 		real_t el01_2 = elements[0][1] * elements[0][1];
 		real_t el02_2 = elements[0][2] * elements[0][2];
 		real_t el12_2 = elements[1][2] * elements[1][2];
@@ -463,8 +463,8 @@ Vector3 Basis::get_euler(EulerOrder p_order) const {
 
 			Vector3 euler;
 			real_t sy = elements[0][2];
-			if (sy < (1.0f - CMP_EPSILON)) {
-				if (sy > -(1.0f - CMP_EPSILON)) {
+			if (sy < (1.0f - (real_t)CMP_EPSILON)) {
+				if (sy > -(1.0f - (real_t)CMP_EPSILON)) {
 					// is this a pure Y rotation?
 					if (elements[1][0] == 0 && elements[0][1] == 0 && elements[1][2] == 0 && elements[2][1] == 0 && elements[1][1] == 1) {
 						// return the simplest form (human friendlier in editor and scripts)
@@ -498,8 +498,8 @@ Vector3 Basis::get_euler(EulerOrder p_order) const {
 
 			Vector3 euler;
 			real_t sz = elements[0][1];
-			if (sz < (1.0f - CMP_EPSILON)) {
-				if (sz > -(1.0f - CMP_EPSILON)) {
+			if (sz < (1.0f - (real_t)CMP_EPSILON)) {
+				if (sz > -(1.0f - (real_t)CMP_EPSILON)) {
 					euler.x = Math::atan2(elements[2][1], elements[1][1]);
 					euler.y = Math::atan2(elements[0][2], elements[0][0]);
 					euler.z = Math::asin(-sz);
@@ -529,8 +529,8 @@ Vector3 Basis::get_euler(EulerOrder p_order) const {
 
 			real_t m12 = elements[1][2];
 
-			if (m12 < (1 - CMP_EPSILON)) {
-				if (m12 > -(1 - CMP_EPSILON)) {
+			if (m12 < (1 - (real_t)CMP_EPSILON)) {
+				if (m12 > -(1 - (real_t)CMP_EPSILON)) {
 					// is this a pure X rotation?
 					if (elements[1][0] == 0 && elements[0][1] == 0 && elements[0][2] == 0 && elements[2][0] == 0 && elements[0][0] == 1) {
 						// return the simplest form (human friendlier in editor and scripts)
@@ -565,8 +565,8 @@ Vector3 Basis::get_euler(EulerOrder p_order) const {
 
 			Vector3 euler;
 			real_t sz = elements[1][0];
-			if (sz < (1.0f - CMP_EPSILON)) {
-				if (sz > -(1.0f - CMP_EPSILON)) {
+			if (sz < (1.0f - (real_t)CMP_EPSILON)) {
+				if (sz > -(1.0f - (real_t)CMP_EPSILON)) {
 					euler.x = Math::atan2(-elements[1][2], elements[1][1]);
 					euler.y = Math::atan2(-elements[2][0], elements[0][0]);
 					euler.z = Math::asin(sz);
@@ -593,8 +593,8 @@ Vector3 Basis::get_euler(EulerOrder p_order) const {
 			//        -cx*sy            sx                    cx*cy
 			Vector3 euler;
 			real_t sx = elements[2][1];
-			if (sx < (1.0f - CMP_EPSILON)) {
-				if (sx > -(1.0f - CMP_EPSILON)) {
+			if (sx < (1.0f - (real_t)CMP_EPSILON)) {
+				if (sx > -(1.0f - (real_t)CMP_EPSILON)) {
 					euler.x = Math::asin(sx);
 					euler.y = Math::atan2(-elements[2][0], elements[2][2]);
 					euler.z = Math::atan2(-elements[0][1], elements[1][1]);
@@ -621,8 +621,8 @@ Vector3 Basis::get_euler(EulerOrder p_order) const {
 			//        -sy               cy*sx                 cy*cx
 			Vector3 euler;
 			real_t sy = elements[2][0];
-			if (sy < (1.0f - CMP_EPSILON)) {
-				if (sy > -(1.0f - CMP_EPSILON)) {
+			if (sy < (1.0f - (real_t)CMP_EPSILON)) {
+				if (sy > -(1.0f - (real_t)CMP_EPSILON)) {
 					euler.x = Math::atan2(elements[2][1], elements[2][2]);
 					euler.y = Math::asin(-sy);
 					euler.z = Math::atan2(elements[1][0], elements[0][0]);
diff --git a/core/math/bvh.h b/core/math/bvh.h
index a8e3cc7bbe..e686e27445 100644
--- a/core/math/bvh.h
+++ b/core/math/bvh.h
@@ -46,21 +46,35 @@
 // Layer masks are implemented in the renderers as a later step, and light_cull_mask appears to be
 // implemented in GLES3 but not GLES2. Layer masks are not yet implemented for directional lights.
 
+// In the physics, the pairable_type is based on 1 << p_object->get_type() where:
+// TYPE_AREA,
+// TYPE_BODY
+// and pairable_mask is either 0 if static, or set to all if non static
+
 #include "bvh_tree.h"
+#include "core/os/mutex.h"
 
-#define BVHTREE_CLASS BVH_Tree<T, 2, MAX_ITEMS, USE_PAIRS, Bounds, Point>
+#define BVHTREE_CLASS BVH_Tree<T, NUM_TREES, 2, MAX_ITEMS, USER_PAIR_TEST_FUNCTION, USER_CULL_TEST_FUNCTION, USE_PAIRS, BOUNDS, POINT>
+#define BVH_LOCKED_FUNCTION BVHLockedFunction(&_mutex, BVH_THREAD_SAFE &&_thread_safe);
 
-template <class T, bool USE_PAIRS = false, int MAX_ITEMS = 32, class Bounds = AABB, class Point = Vector3>
+template <class T, int NUM_TREES = 1, bool USE_PAIRS = false, int MAX_ITEMS = 32, class USER_PAIR_TEST_FUNCTION = BVH_DummyPairTestFunction<T>, class USER_CULL_TEST_FUNCTION = BVH_DummyCullTestFunction<T>, class BOUNDS = AABB, class POINT = Vector3, bool BVH_THREAD_SAFE = true>
 class BVH_Manager {
 public:
 	// note we are using uint32_t instead of BVHHandle, losing type safety, but this
 	// is for compatibility with octree
 	typedef void *(*PairCallback)(void *, uint32_t, T *, int, uint32_t, T *, int);
 	typedef void (*UnpairCallback)(void *, uint32_t, T *, int, uint32_t, T *, int, void *);
+	typedef void *(*CheckPairCallback)(void *, uint32_t, T *, int, uint32_t, T *, int, void *);
+
+	// allow locally toggling thread safety if the template has been compiled with BVH_THREAD_SAFE
+	void params_set_thread_safe(bool p_enable) {
+		_thread_safe = p_enable;
+	}
 
 	// these 2 are crucial for fine tuning, and can be applied manually
 	// see the variable declarations for more info.
 	void params_set_node_expansion(real_t p_value) {
+		BVH_LOCKED_FUNCTION
 		if (p_value >= 0.0) {
 			tree._node_expansion = p_value;
 			tree._auto_node_expansion = false;
@@ -70,43 +84,40 @@ public:
 	}
 
 	void params_set_pairing_expansion(real_t p_value) {
-		if (p_value >= 0.0) {
-			tree._pairing_expansion = p_value;
-			tree._auto_pairing_expansion = false;
-		} else {
-			tree._auto_pairing_expansion = true;
-		}
+		BVH_LOCKED_FUNCTION
+		tree.params_set_pairing_expansion(p_value);
 	}
 
 	void set_pair_callback(PairCallback p_callback, void *p_userdata) {
+		BVH_LOCKED_FUNCTION
 		pair_callback = p_callback;
 		pair_callback_userdata = p_userdata;
 	}
 	void set_unpair_callback(UnpairCallback p_callback, void *p_userdata) {
+		BVH_LOCKED_FUNCTION
 		unpair_callback = p_callback;
 		unpair_callback_userdata = p_userdata;
 	}
+	void set_check_pair_callback(CheckPairCallback p_callback, void *p_userdata) {
+		BVH_LOCKED_FUNCTION
+		check_pair_callback = p_callback;
+		check_pair_callback_userdata = p_userdata;
+	}
+
+	BVHHandle create(T *p_userdata, bool p_active = true, uint32_t p_tree_id = 0, uint32_t p_tree_collision_mask = 1, const BOUNDS &p_aabb = BOUNDS(), int p_subindex = 0) {
+		BVH_LOCKED_FUNCTION
 
-	BVHHandle create(T *p_userdata, bool p_active, const Bounds &p_aabb = Bounds(), int p_subindex = 0, bool p_pairable = false, uint32_t p_pairable_type = 0, uint32_t p_pairable_mask = 1) {
 		// not sure if absolutely necessary to flush collisions here. It will cost performance to, instead
 		// of waiting for update, so only uncomment this if there are bugs.
 		if (USE_PAIRS) {
 			//_check_for_collisions();
 		}
 
-#ifdef TOOLS_ENABLED
-		if (!USE_PAIRS) {
-			if (p_pairable) {
-				WARN_PRINT_ONCE("creating pairable item in BVH with USE_PAIRS set to false");
-			}
-		}
-#endif
-
-		BVHHandle h = tree.item_add(p_userdata, p_active, p_aabb, p_subindex, p_pairable, p_pairable_type, p_pairable_mask);
+		BVHHandle h = tree.item_add(p_userdata, p_active, p_aabb, p_subindex, p_tree_id, p_tree_collision_mask);
 
 		if (USE_PAIRS) {
 			// for safety initialize the expanded AABB
-			Bounds &expanded_aabb = tree._pairs[h.id()].expanded_aabb;
+			BOUNDS &expanded_aabb = tree._pairs[h.id()].expanded_aabb;
 			expanded_aabb = p_aabb;
 			expanded_aabb.grow_by(tree._pairing_expansion);
 
@@ -123,12 +134,18 @@ public:
 	////////////////////////////////////////////////////
 	// wrapper versions that use uint32_t instead of handle
 	// for backward compatibility. Less type safe
-	void move(uint32_t p_handle, const Bounds &p_aabb) {
+	void move(uint32_t p_handle, const BOUNDS &p_aabb) {
 		BVHHandle h;
 		h.set(p_handle);
 		move(h, p_aabb);
 	}
 
+	void recheck_pairs(uint32_t p_handle) {
+		BVHHandle h;
+		h.set(p_handle);
+		recheck_pairs(h);
+	}
+
 	void erase(uint32_t p_handle) {
 		BVHHandle h;
 		h.set(p_handle);
@@ -141,7 +158,7 @@ public:
 		force_collision_check(h);
 	}
 
-	bool activate(uint32_t p_handle, const Bounds &p_aabb, bool p_delay_collision_check = false) {
+	bool activate(uint32_t p_handle, const BOUNDS &p_aabb, bool p_delay_collision_check = false) {
 		BVHHandle h;
 		h.set(p_handle);
 		return activate(h, p_aabb, p_delay_collision_check);
@@ -153,16 +170,16 @@ public:
 		return deactivate(h);
 	}
 
-	void set_pairable(uint32_t p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask, bool p_force_collision_check = true) {
+	void set_tree(uint32_t p_handle, uint32_t p_tree_id, uint32_t p_tree_collision_mask, bool p_force_collision_check = true) {
 		BVHHandle h;
 		h.set(p_handle);
-		set_pairable(h, p_pairable, p_pairable_type, p_pairable_mask, p_force_collision_check);
+		set_tree(h, p_tree_id, p_tree_collision_mask, p_force_collision_check);
 	}
 
-	bool is_pairable(uint32_t p_handle) const {
+	uint32_t get_tree_id(uint32_t p_handle) const {
 		BVHHandle h;
 		h.set(p_handle);
-		return item_is_pairable(h);
+		return item_get_tree_id(h);
 	}
 	int get_subindex(uint32_t p_handle) const {
 		BVHHandle h;
@@ -178,7 +195,8 @@ public:
 
 	////////////////////////////////////////////////////
 
-	void move(BVHHandle p_handle, const Bounds &p_aabb) {
+	void move(BVHHandle p_handle, const BOUNDS &p_aabb) {
+		BVH_LOCKED_FUNCTION
 		if (tree.item_move(p_handle, p_aabb)) {
 			if (USE_PAIRS) {
 				_add_changed_item(p_handle, p_aabb);
@@ -186,7 +204,12 @@ public:
 		}
 	}
 
+	void recheck_pairs(BVHHandle p_handle) {
+		force_collision_check(p_handle);
+	}
+
 	void erase(BVHHandle p_handle) {
+		BVH_LOCKED_FUNCTION
 		// call unpair and remove all references to the item
 		// before deleting from the tree
 		if (USE_PAIRS) {
@@ -200,11 +223,12 @@ public:
 
 	// use in conjunction with activate if you have deferred the collision check, and
 	// set pairable has never been called.
-	// (deferred collision checks are a workaround for rendering server for historical reasons)
+	// (deferred collision checks are a workaround for visual server for historical reasons)
 	void force_collision_check(BVHHandle p_handle) {
+		BVH_LOCKED_FUNCTION
 		if (USE_PAIRS) {
 			// the aabb should already be up to date in the BVH
-			Bounds aabb;
+			BOUNDS aabb;
 			item_get_AABB(p_handle, aabb);
 
 			// add it as changed even if aabb not different
@@ -218,7 +242,8 @@ public:
 	// these should be read as set_visible for render trees,
 	// but generically this makes items add or remove from the
 	// tree internally, to speed things up by ignoring inactive items
-	bool activate(BVHHandle p_handle, const Bounds &p_aabb, bool p_delay_collision_check = false) {
+	bool activate(BVHHandle p_handle, const BOUNDS &p_aabb, bool p_delay_collision_check = false) {
+		BVH_LOCKED_FUNCTION
 		// sending the aabb here prevents the need for the BVH to maintain
 		// a redundant copy of the aabb.
 		// returns success
@@ -242,6 +267,7 @@ public:
 	}
 
 	bool deactivate(BVHHandle p_handle) {
+		BVH_LOCKED_FUNCTION
 		// returns success
 		if (tree.item_deactivate(p_handle)) {
 			// call unpair and remove all references to the item
@@ -258,12 +284,14 @@ public:
 		return false;
 	}
 
-	bool get_active(BVHHandle p_handle) const {
+	bool get_active(BVHHandle p_handle) {
+		BVH_LOCKED_FUNCTION
 		return tree.item_get_active(p_handle);
 	}
 
 	// call e.g. once per frame (this does a trickle optimize)
 	void update() {
+		BVH_LOCKED_FUNCTION
 		tree.update();
 		_check_for_collisions();
 #ifdef BVH_INTEGRITY_CHECKS
@@ -273,24 +301,26 @@ public:
 
 	// this can be called more frequently than per frame if necessary
 	void update_collisions() {
+		BVH_LOCKED_FUNCTION
 		_check_for_collisions();
 	}
 
 	// prefer calling this directly as type safe
-	void set_pairable(const BVHHandle &p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask, bool p_force_collision_check = true) {
+	void set_tree(const BVHHandle &p_handle, uint32_t p_tree_id, uint32_t p_tree_collision_mask, bool p_force_collision_check = true) {
+		BVH_LOCKED_FUNCTION
 		// Returns true if the pairing state has changed.
-		bool state_changed = tree.item_set_pairable(p_handle, p_pairable, p_pairable_type, p_pairable_mask);
+		bool state_changed = tree.item_set_tree(p_handle, p_tree_id, p_tree_collision_mask);
 
 		if (USE_PAIRS) {
 			// not sure if absolutely necessary to flush collisions here. It will cost performance to, instead
 			// of waiting for update, so only uncomment this if there are bugs.
 			//_check_for_collisions();
 
-			if ((p_force_collision_check || state_changed) && get_active(p_handle)) {
+			if ((p_force_collision_check || state_changed) && tree.item_get_active(p_handle)) {
 				// when the pairable state changes, we need to force a collision check because newly pairable
 				// items may be in collision, and unpairable items might move out of collision.
 				// We cannot depend on waiting for the next update, because that may come much later.
-				Bounds aabb;
+				BOUNDS aabb;
 				item_get_AABB(p_handle, aabb);
 
 				// passing false disables the optimization which prevents collision checks if
@@ -307,32 +337,33 @@ public:
 	}
 
 	// cull tests
-	int cull_aabb(const Bounds &p_aabb, T **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) {
+	int cull_aabb(const BOUNDS &p_aabb, T **p_result_array, int p_result_max, const T *p_tester, uint32_t p_tree_collision_mask = 0xFFFFFFFF, int *p_subindex_array = nullptr) {
+		BVH_LOCKED_FUNCTION
 		typename BVHTREE_CLASS::CullParams params;
 
 		params.result_count_overall = 0;
 		params.result_max = p_result_max;
 		params.result_array = p_result_array;
 		params.subindex_array = p_subindex_array;
-		params.mask = p_mask;
-		params.pairable_type = 0;
-		params.test_pairable_only = false;
+		params.tree_collision_mask = p_tree_collision_mask;
 		params.abb.from(p_aabb);
+		params.tester = p_tester;
 
 		tree.cull_aabb(params);
 
 		return params.result_count_overall;
 	}
 
-	int cull_segment(const Point &p_from, const Point &p_to, T **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) {
+	int cull_segment(const POINT &p_from, const POINT &p_to, T **p_result_array, int p_result_max, const T *p_tester, uint32_t p_tree_collision_mask = 0xFFFFFFFF, int *p_subindex_array = nullptr) {
+		BVH_LOCKED_FUNCTION
 		typename BVHTREE_CLASS::CullParams params;
 
 		params.result_count_overall = 0;
 		params.result_max = p_result_max;
 		params.result_array = p_result_array;
 		params.subindex_array = p_subindex_array;
-		params.mask = p_mask;
-		params.pairable_type = 0;
+		params.tester = p_tester;
+		params.tree_collision_mask = p_tree_collision_mask;
 
 		params.segment.from = p_from;
 		params.segment.to = p_to;
@@ -342,15 +373,16 @@ public:
 		return params.result_count_overall;
 	}
 
-	int cull_point(const Point &p_point, T **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) {
+	int cull_point(const POINT &p_point, T **p_result_array, int p_result_max, const T *p_tester, uint32_t p_tree_collision_mask = 0xFFFFFFFF, int *p_subindex_array = nullptr) {
+		BVH_LOCKED_FUNCTION
 		typename BVHTREE_CLASS::CullParams params;
 
 		params.result_count_overall = 0;
 		params.result_max = p_result_max;
 		params.result_array = p_result_array;
 		params.subindex_array = p_subindex_array;
-		params.mask = p_mask;
-		params.pairable_type = 0;
+		params.tester = p_tester;
+		params.tree_collision_mask = p_tree_collision_mask;
 
 		params.point = p_point;
 
@@ -358,7 +390,8 @@ public:
 		return params.result_count_overall;
 	}
 
-	int cull_convex(const Vector<Plane> &p_convex, T **p_result_array, int p_result_max, uint32_t p_mask = 0xFFFFFFFF) {
+	int cull_convex(const Vector<Plane> &p_convex, T **p_result_array, int p_result_max, const T *p_tester, uint32_t p_tree_collision_mask = 0xFFFFFFFF) {
+		BVH_LOCKED_FUNCTION
 		if (!p_convex.size()) {
 			return 0;
 		}
@@ -373,8 +406,8 @@ public:
 		params.result_max = p_result_max;
 		params.result_array = p_result_array;
 		params.subindex_array = nullptr;
-		params.mask = p_mask;
-		params.pairable_type = 0;
+		params.tester = p_tester;
+		params.tree_collision_mask = p_tree_collision_mask;
 
 		params.hull.planes = &p_convex[0];
 		params.hull.num_planes = p_convex.size();
@@ -394,7 +427,7 @@ private:
 			return;
 		}
 
-		Bounds bb;
+		BOUNDS bb;
 
 		typename BVHTREE_CLASS::CullParams params;
 
@@ -402,28 +435,23 @@ private:
 		params.result_max = INT_MAX;
 		params.result_array = nullptr;
 		params.subindex_array = nullptr;
-		params.mask = 0xFFFFFFFF;
-		params.pairable_type = 0;
 
 		for (unsigned int n = 0; n < changed_items.size(); n++) {
 			const BVHHandle &h = changed_items[n];
 
 			// use the expanded aabb for pairing
-			const Bounds &expanded_aabb = tree._pairs[h.id()].expanded_aabb;
+			const BOUNDS &expanded_aabb = tree._pairs[h.id()].expanded_aabb;
 			BVHABB_CLASS abb;
 			abb.from(expanded_aabb);
 
+			tree.item_fill_cullparams(h, params);
+
 			// find all the existing paired aabbs that are no longer
 			// paired, and send callbacks
 			_find_leavers(h, abb, p_full_check);
 
 			uint32_t changed_item_ref_id = h.id();
 
-			// set up the test from this item.
-			// this includes whether to test the non pairable tree,
-			// and the item mask.
-			tree.item_fill_cullparams(h, params);
-
 			params.abb = abb;
 
 			params.result_count_overall = 0; // might not be needed
@@ -456,7 +484,7 @@ private:
 	}
 
 public:
-	void item_get_AABB(BVHHandle p_handle, Bounds &r_aabb) {
+	void item_get_AABB(BVHHandle p_handle, BOUNDS &r_aabb) {
 		BVHABB_CLASS abb;
 		tree.item_get_ABB(p_handle, abb);
 		abb.to(r_aabb);
@@ -464,7 +492,7 @@ public:
 
 private:
 	// supplemental funcs
-	bool item_is_pairable(BVHHandle p_handle) const { return _get_extra(p_handle).pairable; }
+	uint32_t item_get_tree_id(BVHHandle p_handle) const { return _get_extra(p_handle).tree_id; }
 	T *item_get_userdata(BVHHandle p_handle) const { return _get_extra(p_handle).userdata; }
 	int item_get_subindex(BVHHandle p_handle) const { return _get_extra(p_handle).subindex; }
 
@@ -485,12 +513,35 @@ private:
 		void *ud_from = pairs_from.remove_pair_to(p_to);
 		pairs_to.remove_pair_to(p_from);
 
+#ifdef BVH_VERBOSE_PAIRING
+		print_line("_unpair " + itos(p_from.id()) + " from " + itos(p_to.id()));
+#endif
+
 		// callback
 		if (unpair_callback) {
 			unpair_callback(pair_callback_userdata, p_from, exa.userdata, exa.subindex, p_to, exb.userdata, exb.subindex, ud_from);
 		}
 	}
 
+	void *_recheck_pair(BVHHandle p_from, BVHHandle p_to, void *p_pair_data) {
+		tree._handle_sort(p_from, p_to);
+
+		typename BVHTREE_CLASS::ItemExtra &exa = tree._extra[p_from.id()];
+		typename BVHTREE_CLASS::ItemExtra &exb = tree._extra[p_to.id()];
+
+		// if the userdata is the same, no collisions should occur
+		if ((exa.userdata == exb.userdata) && exa.userdata) {
+			return p_pair_data;
+		}
+
+		// callback
+		if (check_pair_callback) {
+			return check_pair_callback(check_pair_callback_userdata, p_from, exa.userdata, exa.subindex, p_to, exb.userdata, exb.subindex, p_pair_data);
+		}
+
+		return p_pair_data;
+	}
+
 	// returns true if unpair
 	bool _find_leavers_process_pair(typename BVHTREE_CLASS::ItemPairs &p_pairs_from, const BVHABB_CLASS &p_abb_from, BVHHandle p_from, BVHHandle p_to, bool p_full_check) {
 		BVHABB_CLASS abb_to;
@@ -498,8 +549,8 @@ private:
 
 		// do they overlap?
 		if (p_abb_from.intersects(abb_to)) {
-			// the full check for pairable / non pairable and mask changes is extra expense
-			// this need not be done in most cases (for speed) except in the case where set_pairable is called
+			// the full check for pairable / non pairable (i.e. tree_id and tree_masks) and mask changes is extra expense
+			// this need not be done in most cases (for speed) except in the case where set_tree is called
 			// where the masks etc of the objects in question may have changed
 			if (!p_full_check) {
 				return false;
@@ -507,12 +558,13 @@ private:
 			const typename BVHTREE_CLASS::ItemExtra &exa = _get_extra(p_from);
 			const typename BVHTREE_CLASS::ItemExtra &exb = _get_extra(p_to);
 
-			// one of the two must be pairable to still pair
-			// if neither are pairable, we always unpair
-			if (exa.pairable || exb.pairable) {
+			// Checking tree_ids and tree_collision_masks
+			if (exa.are_item_trees_compatible(exb)) {
+				bool pair_allowed = USER_PAIR_TEST_FUNCTION::user_pair_check(exa.userdata, exb.userdata);
+
 				// the masks must still be compatible to pair
-				// i.e. if there is a hit between the two, then they should stay paired
-				if (tree._cull_pairing_mask_test_hit(exa.pairable_mask, exa.pairable_type, exb.pairable_mask, exb.pairable_type)) {
+				// i.e. if there is a hit between the two and they intersect, then they should stay paired
+				if (pair_allowed) {
 					return false;
 				}
 			}
@@ -550,6 +602,11 @@ private:
 		const typename BVHTREE_CLASS::ItemExtra &exa = _get_extra(p_ha);
 		const typename BVHTREE_CLASS::ItemExtra &exb = _get_extra(p_hb);
 
+		// user collision callback
+		if (!USER_PAIR_TEST_FUNCTION::user_pair_check(exa.userdata, exb.userdata)) {
+			return;
+		}
+
 		// if the userdata is the same, no collisions should occur
 		if ((exa.userdata == exb.userdata) && exa.userdata) {
 			return;
@@ -573,6 +630,10 @@ private:
 		// callback
 		void *callback_userdata = nullptr;
 
+#ifdef BVH_VERBOSE_PAIRING
+		print_line("_pair " + itos(p_ha.id()) + " to " + itos(p_hb.id()));
+#endif
+
 		if (pair_callback) {
 			callback_userdata = pair_callback(pair_callback_userdata, p_ha, exa.userdata, exa.subindex, p_hb, exb.userdata, exb.subindex);
 		}
@@ -594,6 +655,32 @@ private:
 		}
 	}
 
+	// Send pair callbacks again for all existing pairs for the given handle.
+	void _recheck_pairs(BVHHandle p_handle) {
+		typename BVHTREE_CLASS::ItemPairs &from = tree._pairs[p_handle.id()];
+
+		// checking pair for every partner.
+		for (unsigned int n = 0; n < from.extended_pairs.size(); n++) {
+			typename BVHTREE_CLASS::ItemPairs::Link &pair = from.extended_pairs[n];
+			BVHHandle h_to = pair.handle;
+			void *new_pair_data = _recheck_pair(p_handle, h_to, pair.userdata);
+
+			if (new_pair_data != pair.userdata) {
+				pair.userdata = new_pair_data;
+
+				// Update pair data for the second item.
+				typename BVHTREE_CLASS::ItemPairs &to = tree._pairs[h_to.id()];
+				for (unsigned int to_index = 0; to_index < to.extended_pairs.size(); to_index++) {
+					typename BVHTREE_CLASS::ItemPairs::Link &to_pair = to.extended_pairs[to_index];
+					if (to_pair.handle == p_handle) {
+						to_pair.userdata = new_pair_data;
+						break;
+					}
+				}
+			}
+		}
+	}
+
 private:
 	const typename BVHTREE_CLASS::ItemExtra &_get_extra(BVHHandle p_handle) const {
 		return tree._extra[p_handle.id()];
@@ -607,19 +694,24 @@ private:
 		_tick++;
 	}
 
-	void _add_changed_item(BVHHandle p_handle, const Bounds &aabb, bool p_check_aabb = true) {
+	void _add_changed_item(BVHHandle p_handle, const BOUNDS &aabb, bool p_check_aabb = true) {
 		// Note that non pairable items can pair with pairable,
 		// so all types must be added to the list
 
+#ifdef BVH_EXPAND_LEAF_AABBS
+		// if using expanded AABB in the leaf, the redundancy check will already have been made
+		BOUNDS &expanded_aabb = tree._pairs[p_handle.id()].expanded_aabb;
+		item_get_AABB(p_handle, expanded_aabb);
+#else
 		// aabb check with expanded aabb. This greatly decreases processing
 		// at the cost of slightly less accurate pairing checks
 		// Note this pairing AABB is separate from the AABB in the actual tree
-		Bounds &expanded_aabb = tree._pairs[p_handle.id()].expanded_aabb;
+		BOUNDS &expanded_aabb = tree._pairs[p_handle.id()].expanded_aabb;
 
 		// passing p_check_aabb false disables the optimization which prevents collision checks if
 		// the aabb hasn't changed. This is needed where set_pairable has been called, but the position
 		// has not changed.
-		if (p_check_aabb && expanded_aabb.encloses(aabb)) {
+		if (p_check_aabb && tree.expanded_aabb_encloses_not_shrink(expanded_aabb, aabb)) {
 			return;
 		}
 
@@ -627,6 +719,7 @@ private:
 		// this tick, because it is vital that the AABB is kept up to date
 		expanded_aabb = aabb;
 		expanded_aabb.grow_by(tree._pairing_expansion);
+#endif
 
 		// this code is to ensure that changed items only appear once on the updated list
 		// collision checking them multiple times is not needed, and repeats the same thing
@@ -670,8 +763,10 @@ private:
 
 	PairCallback pair_callback;
 	UnpairCallback unpair_callback;
+	CheckPairCallback check_pair_callback;
 	void *pair_callback_userdata;
 	void *unpair_callback_userdata;
+	void *check_pair_callback_userdata;
 
 	BVHTREE_CLASS tree;
 
@@ -680,6 +775,38 @@ private:
 	LocalVector<BVHHandle, uint32_t, true> changed_items;
 	uint32_t _tick;
 
+	class BVHLockedFunction {
+	public:
+		BVHLockedFunction(Mutex *p_mutex, bool p_thread_safe) {
+			// will be compiled out if not set in template
+			if (p_thread_safe) {
+				_mutex = p_mutex;
+
+				if (_mutex->try_lock() != OK) {
+					WARN_PRINT("Info : multithread BVH access detected (benign)");
+					_mutex->lock();
+				}
+
+			} else {
+				_mutex = nullptr;
+			}
+		}
+		~BVHLockedFunction() {
+			// will be compiled out if not set in template
+			if (_mutex) {
+				_mutex->unlock();
+			}
+		}
+
+	private:
+		Mutex *_mutex;
+	};
+
+	Mutex _mutex;
+
+	// local toggle for turning on and off thread safety in project settings
+	bool _thread_safe;
+
 public:
 	BVH_Manager() {
 		_tick = 1; // start from 1 so items with 0 indicate never updated
@@ -687,6 +814,7 @@ public:
 		unpair_callback = nullptr;
 		pair_callback_userdata = nullptr;
 		unpair_callback_userdata = nullptr;
+		_thread_safe = BVH_THREAD_SAFE;
 	}
 };
 
diff --git a/core/math/bvh_abb.h b/core/math/bvh_abb.h
index 009032d34d..8a44f1c4da 100644
--- a/core/math/bvh_abb.h
+++ b/core/math/bvh_abb.h
@@ -32,7 +32,7 @@
 #define BVH_ABB_H
 
 // special optimized version of axis aligned bounding box
-template <class Bounds = AABB, class Point = Vector3>
+template <class BOUNDS = AABB, class POINT = Vector3>
 struct BVH_ABB {
 	struct ConvexHull {
 		// convex hulls (optional)
@@ -43,8 +43,8 @@ struct BVH_ABB {
 	};
 
 	struct Segment {
-		Point from;
-		Point to;
+		POINT from;
+		POINT to;
 	};
 
 	enum IntersectResult {
@@ -54,47 +54,47 @@ struct BVH_ABB {
 	};
 
 	// we store mins with a negative value in order to test them with SIMD
-	Point min;
-	Point neg_max;
+	POINT min;
+	POINT neg_max;
 
 	bool operator==(const BVH_ABB &o) const { return (min == o.min) && (neg_max == o.neg_max); }
 	bool operator!=(const BVH_ABB &o) const { return (*this == o) == false; }
 
-	void set(const Point &_min, const Point &_max) {
+	void set(const POINT &_min, const POINT &_max) {
 		min = _min;
 		neg_max = -_max;
 	}
 
 	// to and from standard AABB
-	void from(const Bounds &p_aabb) {
+	void from(const BOUNDS &p_aabb) {
 		min = p_aabb.position;
 		neg_max = -(p_aabb.position + p_aabb.size);
 	}
 
-	void to(Bounds &r_aabb) const {
+	void to(BOUNDS &r_aabb) const {
 		r_aabb.position = min;
 		r_aabb.size = calculate_size();
 	}
 
 	void merge(const BVH_ABB &p_o) {
-		for (int axis = 0; axis < Point::AXIS_COUNT; ++axis) {
+		for (int axis = 0; axis < POINT::AXIS_COUNT; ++axis) {
 			neg_max[axis] = MIN(neg_max[axis], p_o.neg_max[axis]);
 			min[axis] = MIN(min[axis], p_o.min[axis]);
 		}
 	}
 
-	Point calculate_size() const {
+	POINT calculate_size() const {
 		return -neg_max - min;
 	}
 
-	Point calculate_centre() const {
-		return Point((calculate_size() * 0.5) + min);
+	POINT calculate_centre() const {
+		return POINT((calculate_size() * 0.5) + min);
 	}
 
 	real_t get_proximity_to(const BVH_ABB &p_b) const {
-		const Point d = (min - neg_max) - (p_b.min - p_b.neg_max);
+		const POINT d = (min - neg_max) - (p_b.min - p_b.neg_max);
 		real_t proximity = 0.0;
-		for (int axis = 0; axis < Point::AXIS_COUNT; ++axis) {
+		for (int axis = 0; axis < POINT::AXIS_COUNT; ++axis) {
 			proximity += Math::abs(d[axis]);
 		}
 		return proximity;
@@ -104,7 +104,7 @@ struct BVH_ABB {
 		return (get_proximity_to(p_a) < get_proximity_to(p_b) ? 0 : 1);
 	}
 
-	uint32_t find_cutting_planes(const BVH_ABB::ConvexHull &p_hull, uint32_t *p_plane_ids) const {
+	uint32_t find_cutting_planes(const typename BVH_ABB::ConvexHull &p_hull, uint32_t *p_plane_ids) const {
 		uint32_t count = 0;
 
 		for (int n = 0; n < p_hull.num_planes; n++) {
@@ -162,7 +162,7 @@ struct BVH_ABB {
 	}
 
 	bool intersects_convex_partial(const ConvexHull &p_hull) const {
-		Bounds bb;
+		BOUNDS bb;
 		to(bb);
 		return bb.intersects_convex_shape(p_hull.planes, p_hull.num_planes, p_hull.points, p_hull.num_points);
 	}
@@ -182,7 +182,7 @@ struct BVH_ABB {
 
 	bool is_within_convex(const ConvexHull &p_hull) const {
 		// use half extents routine
-		Bounds bb;
+		BOUNDS bb;
 		to(bb);
 		return bb.inside_convex_shape(p_hull.planes, p_hull.num_planes);
 	}
@@ -197,12 +197,12 @@ struct BVH_ABB {
 	}
 
 	bool intersects_segment(const Segment &p_s) const {
-		Bounds bb;
+		BOUNDS bb;
 		to(bb);
 		return bb.intersects_segment(p_s.from, p_s.to);
 	}
 
-	bool intersects_point(const Point &p_pt) const {
+	bool intersects_point(const POINT &p_pt) const {
 		if (_any_lessthan(-p_pt, neg_max)) {
 			return false;
 		}
@@ -212,6 +212,7 @@ struct BVH_ABB {
 		return true;
 	}
 
+	// Very hot in profiling, make sure optimized
 	bool intersects(const BVH_ABB &p_o) const {
 		if (_any_morethan(p_o.min, -neg_max)) {
 			return false;
@@ -222,6 +223,17 @@ struct BVH_ABB {
 		return true;
 	}
 
+	// for pre-swizzled tester (this object)
+	bool intersects_swizzled(const BVH_ABB &p_o) const {
+		if (_any_lessthan(min, p_o.min)) {
+			return false;
+		}
+		if (_any_lessthan(neg_max, p_o.neg_max)) {
+			return false;
+		}
+		return true;
+	}
+
 	bool is_other_within(const BVH_ABB &p_o) const {
 		if (_any_lessthan(p_o.neg_max, neg_max)) {
 			return false;
@@ -232,20 +244,20 @@ struct BVH_ABB {
 		return true;
 	}
 
-	void grow(const Point &p_change) {
+	void grow(const POINT &p_change) {
 		neg_max -= p_change;
 		min -= p_change;
 	}
 
 	void expand(real_t p_change) {
-		Point change;
+		POINT change;
 		change.set_all(p_change);
 		grow(change);
 	}
 
 	// Actually surface area metric.
 	float get_area() const {
-		Point d = calculate_size();
+		POINT d = calculate_size();
 		return 2.0f * (d.x * d.y + d.y * d.z + d.z * d.x);
 	}
 
@@ -254,8 +266,8 @@ struct BVH_ABB {
 		min = neg_max;
 	}
 
-	bool _any_morethan(const Point &p_a, const Point &p_b) const {
-		for (int axis = 0; axis < Point::AXIS_COUNT; ++axis) {
+	bool _any_morethan(const POINT &p_a, const POINT &p_b) const {
+		for (int axis = 0; axis < POINT::AXIS_COUNT; ++axis) {
 			if (p_a[axis] > p_b[axis]) {
 				return true;
 			}
@@ -263,8 +275,8 @@ struct BVH_ABB {
 		return false;
 	}
 
-	bool _any_lessthan(const Point &p_a, const Point &p_b) const {
-		for (int axis = 0; axis < Point::AXIS_COUNT; ++axis) {
+	bool _any_lessthan(const POINT &p_a, const POINT &p_b) const {
+		for (int axis = 0; axis < POINT::AXIS_COUNT; ++axis) {
 			if (p_a[axis] < p_b[axis]) {
 				return true;
 			}
diff --git a/core/math/bvh_cull.inc b/core/math/bvh_cull.inc
index ab468bfd29..11f50e41e6 100644
--- a/core/math/bvh_cull.inc
+++ b/core/math/bvh_cull.inc
@@ -9,20 +9,22 @@ struct CullParams {
 	T **result_array;
 	int *subindex_array;
 
-	// nobody truly understands how masks are intended to work.
-	uint32_t mask;
-	uint32_t pairable_type;
+	// We now process masks etc in a user template function,
+	// and these for simplicity assume even for cull tests there is a
+	// testing object (which has masks etc) for the user cull checks.
+	// This means for cull tests on their own, the client will usually
+	// want to create a dummy object, just in order to specify masks etc.
+	const T *tester;
 
 	// optional components for different tests
-	Point point;
+	POINT point;
 	BVHABB_CLASS abb;
 	typename BVHABB_CLASS::ConvexHull hull;
 	typename BVHABB_CLASS::Segment segment;
 
-	// when collision testing, non pairable moving items
-	// only need to be tested against the pairable tree.
-	// collisions with other non pairable items are irrelevant.
-	bool test_pairable_only;
+	// When collision testing, we can specify which tree ids
+	// to collide test against with the tree_collision_mask.
+	uint32_t tree_collision_mask;
 };
 
 private:
@@ -58,11 +60,22 @@ int cull_convex(CullParams &r_params, bool p_translate_hits = true) {
 	_cull_hits.clear();
 	r_params.result_count = 0;
 
+	uint32_t tree_test_mask = 0;
+
 	for (int n = 0; n < NUM_TREES; n++) {
+		tree_test_mask <<= 1;
+		if (!tree_test_mask) {
+			tree_test_mask = 1;
+		}
+
 		if (_root_node_id[n] == BVHCommon::INVALID) {
 			continue;
 		}
 
+		if (!(r_params.tree_collision_mask & tree_test_mask)) {
+			continue;
+		}
+
 		_cull_convex_iterative(_root_node_id[n], r_params);
 	}
 
@@ -77,11 +90,22 @@ int cull_segment(CullParams &r_params, bool p_translate_hits = true) {
 	_cull_hits.clear();
 	r_params.result_count = 0;
 
+	uint32_t tree_test_mask = 0;
+
 	for (int n = 0; n < NUM_TREES; n++) {
+		tree_test_mask <<= 1;
+		if (!tree_test_mask) {
+			tree_test_mask = 1;
+		}
+
 		if (_root_node_id[n] == BVHCommon::INVALID) {
 			continue;
 		}
 
+		if (!(r_params.tree_collision_mask & tree_test_mask)) {
+			continue;
+		}
+
 		_cull_segment_iterative(_root_node_id[n], r_params);
 	}
 
@@ -96,11 +120,22 @@ int cull_point(CullParams &r_params, bool p_translate_hits = true) {
 	_cull_hits.clear();
 	r_params.result_count = 0;
 
+	uint32_t tree_test_mask = 0;
+
 	for (int n = 0; n < NUM_TREES; n++) {
+		tree_test_mask <<= 1;
+		if (!tree_test_mask) {
+			tree_test_mask = 1;
+		}
+
 		if (_root_node_id[n] == BVHCommon::INVALID) {
 			continue;
 		}
 
+		if (!(r_params.tree_collision_mask & tree_test_mask)) {
+			continue;
+		}
+
 		_cull_point_iterative(_root_node_id[n], r_params);
 	}
 
@@ -115,12 +150,20 @@ int cull_aabb(CullParams &r_params, bool p_translate_hits = true) {
 	_cull_hits.clear();
 	r_params.result_count = 0;
 
+	uint32_t tree_test_mask = 0;
+
 	for (int n = 0; n < NUM_TREES; n++) {
+		tree_test_mask <<= 1;
+		if (!tree_test_mask) {
+			tree_test_mask = 1;
+		}
+
 		if (_root_node_id[n] == BVHCommon::INVALID) {
 			continue;
 		}
 
-		if ((n == 0) && r_params.test_pairable_only) {
+		// the tree collision mask determines which trees to collide test against
+		if (!(r_params.tree_collision_mask & tree_test_mask)) {
 			continue;
 		}
 
@@ -142,22 +185,6 @@ bool _cull_hits_full(const CullParams &p) {
 	return (int)_cull_hits.size() >= p.result_max;
 }
 
-// write this logic once for use in all routines
-// double check this as a possible source of bugs in future.
-bool _cull_pairing_mask_test_hit(uint32_t p_maskA, uint32_t p_typeA, uint32_t p_maskB, uint32_t p_typeB) const {
-	// double check this as a possible source of bugs in future.
-	bool A_match_B = p_maskA & p_typeB;
-
-	if (!A_match_B) {
-		bool B_match_A = p_maskB & p_typeA;
-		if (!B_match_A) {
-			return false;
-		}
-	}
-
-	return true;
-}
-
 void _cull_hit(uint32_t p_ref_id, CullParams &p) {
 	// take into account masks etc
 	// this would be more efficient to do before plane checks,
@@ -165,7 +192,8 @@ void _cull_hit(uint32_t p_ref_id, CullParams &p) {
 	if (USE_PAIRS) {
 		const ItemExtra &ex = _extra[p_ref_id];
 
-		if (!_cull_pairing_mask_test_hit(p.mask, p.pairable_type, ex.pairable_mask, ex.pairable_type)) {
+		// user supplied function (for e.g. pairable types and pairable masks in the render tree)
+		if (!USER_CULL_TEST_FUNCTION::user_cull_check(p.tester, ex.userdata)) {
 			return;
 		}
 	}
@@ -294,6 +322,7 @@ bool _cull_point_iterative(uint32_t p_node_id, CullParams &r_params) {
 	return true;
 }
 
+// Note: This is a very hot loop profiling wise. Take care when changing this and profile.
 bool _cull_aabb_iterative(uint32_t p_node_id, CullParams &r_params, bool p_fully_within = false) {
 	// our function parameters to keep on a stack
 	struct CullAABBParams {
@@ -336,16 +365,26 @@ bool _cull_aabb_iterative(uint32_t p_node_id, CullParams &r_params, bool p_fully
 					_cull_hit(child_id, r_params);
 				}
 			} else {
-				for (int n = 0; n < leaf.num_items; n++) {
+				// This section is the hottest area in profiling, so
+				// is optimized highly
+				// get this into a local register and preconverted to correct type
+				int leaf_num_items = leaf.num_items;
+
+				BVHABB_CLASS swizzled_tester;
+				swizzled_tester.min = -r_params.abb.neg_max;
+				swizzled_tester.neg_max = -r_params.abb.min;
+
+				for (int n = 0; n < leaf_num_items; n++) {
 					const BVHABB_CLASS &aabb = leaf.get_aabb(n);
 
-					if (aabb.intersects(r_params.abb)) {
+					if (swizzled_tester.intersects_swizzled(aabb)) {
 						uint32_t child_id = leaf.get_item_ref_id(n);
 
 						// register hit
 						_cull_hit(child_id, r_params);
 					}
 				}
+
 			} // not fully within
 		} else {
 			if (!cap.fully_within) {
diff --git a/core/math/bvh_debug.inc b/core/math/bvh_debug.inc
index 896c36ecf1..2e519ceb3d 100644
--- a/core/math/bvh_debug.inc
+++ b/core/math/bvh_debug.inc
@@ -7,12 +7,12 @@ void _debug_recursive_print_tree(int p_tree_id) const {
 }
 
 String _debug_aabb_to_string(const BVHABB_CLASS &aabb) const {
-	Point size = aabb.calculate_size();
+	POINT size = aabb.calculate_size();
 
 	String sz;
 	float vol = 0.0;
 
-	for (int i = 0; i < Point::AXES_COUNT; ++i) {
+	for (int i = 0; i < POINT::AXIS_COUNT; ++i) {
 		sz += "(";
 		sz += itos(aabb.min[i]);
 		sz += " ~ ";
diff --git a/core/math/bvh_logic.inc b/core/math/bvh_logic.inc
index c65002a9fd..dd3b135bb5 100644
--- a/core/math/bvh_logic.inc
+++ b/core/math/bvh_logic.inc
@@ -42,9 +42,9 @@ BVHABB_CLASS _logic_abb_merge(const BVHABB_CLASS &a, const BVHABB_CLASS &b) {
 
 //--------------------------------------------------------------------------------------------------
 /**
- * @file	q3DynamicAABBTree.h
- * @author	Randy Gaul
- * @date	10/10/2014
+ * @file    q3DynamicAABBTree.h
+ * @author  Randy Gaul
+ * @date    10/10/2014
  *  Copyright (c) 2014 Randy Gaul http://www.randygaul.net
  *  This software is provided 'as-is', without any express or implied
  *  warranty. In no event will the authors be held liable for any damages
@@ -75,11 +75,11 @@ int32_t _logic_balance(int32_t iA, uint32_t p_tree_id) {
 		return iA;
 	}
 
-	/*       A
-	 *     /   \
-	 *    B     C
-	 *   / \   / \
-	 *  D   E F   G
+	/*        A
+	 *      /   \
+	 *     B     C
+	 *    / \   / \
+	 *   D   E F   G
 	 */
 
 	CRASH_COND(A->num_children != 2);
diff --git a/core/math/bvh_misc.inc b/core/math/bvh_misc.inc
index 71aa0e4fe0..9b35a1d36d 100644
--- a/core/math/bvh_misc.inc
+++ b/core/math/bvh_misc.inc
@@ -1,11 +1,7 @@
 
 int _handle_get_tree_id(BVHHandle p_handle) const {
 	if (USE_PAIRS) {
-		int tree = 0;
-		if (_extra[p_handle.id()].pairable) {
-			tree = 1;
-		}
-		return tree;
+		return _extra[p_handle.id()].tree_id;
 	}
 	return 0;
 }
diff --git a/core/math/bvh_pair.inc b/core/math/bvh_pair.inc
index a12acec2b6..7b9c7ce6ae 100644
--- a/core/math/bvh_pair.inc
+++ b/core/math/bvh_pair.inc
@@ -14,10 +14,10 @@ struct ItemPairs {
 	void clear() {
 		num_pairs = 0;
 		extended_pairs.reset();
-		expanded_aabb = Bounds();
+		expanded_aabb = BOUNDS();
 	}
 
-	Bounds expanded_aabb;
+	BOUNDS expanded_aabb;
 
 	// maybe we can just use the number in the vector TODO
 	int32_t num_pairs;
@@ -59,4 +59,14 @@ struct ItemPairs {
 
 		return userdata;
 	}
+
+	// experiment : scale the pairing expansion by the number of pairs.
+	// when the number of pairs is high, the density is high and a lower collision margin is better.
+	// when there are few local pairs, a larger margin is more optimal.
+	real_t scale_expansion_margin(real_t p_margin) const {
+		real_t x = real_t(num_pairs) * (1.0 / 9.0);
+		x = MIN(x, 1.0);
+		x = 1.0 - x;
+		return p_margin * x;
+	}
 };
diff --git a/core/math/bvh_public.inc b/core/math/bvh_public.inc
index 2c1e406712..36b0bfeb13 100644
--- a/core/math/bvh_public.inc
+++ b/core/math/bvh_public.inc
@@ -1,5 +1,5 @@
 public:
-BVHHandle item_add(T *p_userdata, bool p_active, const Bounds &p_aabb, int32_t p_subindex, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask, bool p_invisible = false) {
+BVHHandle item_add(T *p_userdata, bool p_active, const BOUNDS &p_aabb, int32_t p_subindex, uint32_t p_tree_id, uint32_t p_tree_collision_mask, bool p_invisible = false) {
 #ifdef BVH_VERBOSE_TREE
 	VERBOSE_PRINT("\nitem_add BEFORE");
 	_debug_recursive_print_tree(0);
@@ -9,6 +9,13 @@ BVHHandle item_add(T *p_userdata, bool p_active, const Bounds &p_aabb, int32_t p
 	BVHABB_CLASS abb;
 	abb.from(p_aabb);
 
+	// NOTE that we do not expand the AABB for the first create even if
+	// leaf expansion is switched on. This is for two reasons:
+	// (1) We don't know if this object will move in future, in which case a non-expanded
+	// bound would be better...
+	// (2) We don't yet know how many objects will be paired, which is used to modify
+	// the expansion margin.
+
 	// handle to be filled with the new item ref
 	BVHHandle handle;
 
@@ -40,29 +47,17 @@ BVHHandle item_add(T *p_userdata, bool p_active, const Bounds &p_aabb, int32_t p
 	extra->active_ref_id = _active_refs.size();
 	_active_refs.push_back(ref_id);
 
-	if (USE_PAIRS) {
-		extra->pairable_mask = p_pairable_mask;
-		extra->pairable_type = p_pairable_type;
-		extra->pairable = p_pairable;
-	} else {
-		// just for safety, in case this gets queried etc
-		extra->pairable = 0;
-		p_pairable = false;
-	}
+	extra->tree_id = p_tree_id;
+	extra->tree_collision_mask = p_tree_collision_mask;
 
 	// assign to handle to return
 	handle.set_id(ref_id);
 
-	uint32_t tree_id = 0;
-	if (p_pairable) {
-		tree_id = 1;
-	}
-
-	create_root_node(tree_id);
+	create_root_node(p_tree_id);
 
 	// we must choose where to add to tree
 	if (p_active) {
-		ref->tnode_id = _logic_choose_item_add_node(_root_node_id[tree_id], abb);
+		ref->tnode_id = _logic_choose_item_add_node(_root_node_id[p_tree_id], abb);
 
 		bool refit = _node_add_item(ref->tnode_id, ref_id, abb);
 
@@ -70,7 +65,7 @@ BVHHandle item_add(T *p_userdata, bool p_active, const Bounds &p_aabb, int32_t p
 			// only need to refit from the parent
 			const TNode &add_node = _nodes[ref->tnode_id];
 			if (add_node.parent_id != BVHCommon::INVALID) {
-				refit_upward_and_balance(add_node.parent_id, tree_id);
+				refit_upward_and_balance(add_node.parent_id, p_tree_id);
 			}
 		}
 	} else {
@@ -103,7 +98,7 @@ void _debug_print_refs() {
 }
 
 // returns false if noop
-bool item_move(BVHHandle p_handle, const Bounds &p_aabb) {
+bool item_move(BVHHandle p_handle, const BOUNDS &p_aabb) {
 	uint32_t ref_id = p_handle.id();
 
 	// get the reference
@@ -115,10 +110,19 @@ bool item_move(BVHHandle p_handle, const Bounds &p_aabb) {
 	BVHABB_CLASS abb;
 	abb.from(p_aabb);
 
+#ifdef BVH_EXPAND_LEAF_AABBS
+	if (USE_PAIRS) {
+		// scale the pairing expansion by the number of pairs.
+		abb.expand(_pairs[ref_id].scale_expansion_margin(_pairing_expansion));
+	} else {
+		abb.expand(_pairing_expansion);
+	}
+#endif
+
 	BVH_ASSERT(ref.tnode_id != BVHCommon::INVALID);
 	TNode &tnode = _nodes[ref.tnode_id];
 
-	// does it fit within the current aabb?
+	// does it fit within the current leaf aabb?
 	if (tnode.aabb.is_other_within(abb)) {
 		// do nothing .. fast path .. not moved enough to need refit
 
@@ -129,9 +133,24 @@ bool item_move(BVHHandle p_handle, const Bounds &p_aabb) {
 		BVHABB_CLASS &leaf_abb = leaf.get_aabb(ref.item_id);
 
 		// no change?
+#ifdef BVH_EXPAND_LEAF_AABBS
+		BOUNDS leaf_aabb;
+		leaf_abb.to(leaf_aabb);
+
+		// This test should pass in a lot of cases, and by returning false we can avoid
+		// collision pairing checks later, which greatly reduces processing.
+		if (expanded_aabb_encloses_not_shrink(leaf_aabb, p_aabb)) {
+			return false;
+		}
+#else
 		if (leaf_abb == abb) {
 			return false;
 		}
+#endif
+
+#ifdef BVH_VERBOSE_MOVES
+		print_line("item_move " + itos(p_handle.id()) + "(within tnode aabb) : " + _debug_aabb_to_string(abb));
+#endif
 
 		leaf_abb = abb;
 		_integrity_check_all();
@@ -139,6 +158,10 @@ bool item_move(BVHHandle p_handle, const Bounds &p_aabb) {
 		return true;
 	}
 
+#ifdef BVH_VERBOSE_MOVES
+	print_line("item_move " + itos(p_handle.id()) + "(outside tnode aabb) : " + _debug_aabb_to_string(abb));
+#endif
+
 	uint32_t tree_id = _handle_get_tree_id(p_handle);
 
 	// remove and reinsert
@@ -206,7 +229,7 @@ void item_remove(BVHHandle p_handle) {
 }
 
 // returns success
-bool item_activate(BVHHandle p_handle, const Bounds &p_aabb) {
+bool item_activate(BVHHandle p_handle, const BOUNDS &p_aabb) {
 	uint32_t ref_id = p_handle.id();
 	ItemRef &ref = _refs[ref_id];
 	if (ref.is_active()) {
@@ -260,12 +283,14 @@ void item_fill_cullparams(BVHHandle p_handle, CullParams &r_params) const {
 	uint32_t ref_id = p_handle.id();
 	const ItemExtra &extra = _extra[ref_id];
 
-	// testing from a non pairable item, we only want to test pairable items
-	r_params.test_pairable_only = extra.pairable == 0;
+	// which trees does this item want to collide detect against?
+	r_params.tree_collision_mask = extra.tree_collision_mask;
 
-	// we take into account the mask of the item testing from
-	r_params.mask = extra.pairable_mask;
-	r_params.pairable_type = extra.pairable_type;
+	// The testing user defined object is passed to the user defined cull check function
+	// for masks etc. This is usually a dummy object of type T with masks set.
+	// However, if not using the cull_check callback (i.e. returning true), you can pass
+	// a nullptr instead of dummy object, as it will not be used.
+	r_params.tester = extra.userdata;
 }
 
 bool item_is_pairable(const BVHHandle &p_handle) {
@@ -285,7 +310,7 @@ void item_get_ABB(const BVHHandle &p_handle, BVHABB_CLASS &r_abb) {
 	r_abb = leaf.get_aabb(ref.item_id);
 }
 
-bool item_set_pairable(const BVHHandle &p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) {
+bool item_set_tree(const BVHHandle &p_handle, uint32_t p_tree_id, uint32_t p_tree_collision_mask) {
 	// change tree?
 	uint32_t ref_id = p_handle.id();
 
@@ -293,13 +318,15 @@ bool item_set_pairable(const BVHHandle &p_handle, bool p_pairable, uint32_t p_pa
 	ItemRef &ref = _refs[ref_id];
 
 	bool active = ref.is_active();
-	bool pairable_changed = (ex.pairable != 0) != p_pairable;
-	bool state_changed = pairable_changed || (ex.pairable_type != p_pairable_type) || (ex.pairable_mask != p_pairable_mask);
+	bool tree_changed = ex.tree_id != p_tree_id;
+	bool mask_changed = ex.tree_collision_mask != p_tree_collision_mask;
+	bool state_changed = tree_changed | mask_changed;
 
-	ex.pairable_type = p_pairable_type;
-	ex.pairable_mask = p_pairable_mask;
+	// Keep an eye on this for bugs of not noticing changes to objects,
+	// especially when changing client user masks that will not be detected as a change
+	// in the BVH. You may need to force a collision check in this case with recheck_pairs().
 
-	if (active && pairable_changed) {
+	if (active && (tree_changed | mask_changed)) {
 		// record abb
 		TNode &tnode = _nodes[ref.tnode_id];
 		TLeaf &leaf = _node_get_leaf(tnode);
@@ -313,7 +340,8 @@ bool item_set_pairable(const BVHHandle &p_handle, bool p_pairable, uint32_t p_pa
 
 		// we must set the pairable AFTER getting the current tree
 		// because the pairable status determines which tree
-		ex.pairable = p_pairable;
+		ex.tree_id = p_tree_id;
+		ex.tree_collision_mask = p_tree_collision_mask;
 
 		// add to new tree
 		tree_id = _handle_get_tree_id(p_handle);
@@ -333,7 +361,8 @@ bool item_set_pairable(const BVHHandle &p_handle, bool p_pairable, uint32_t p_pa
 		}
 	} else {
 		// always keep this up to date
-		ex.pairable = p_pairable;
+		ex.tree_id = p_tree_id;
+		ex.tree_collision_mask = p_tree_collision_mask;
 	}
 
 	return state_changed;
@@ -403,7 +432,7 @@ void update() {
 
 		// if there are no nodes, do nothing, but if there are...
 		if (bound_valid) {
-			Bounds bb;
+			BOUNDS bb;
 			world_bound.to(bb);
 			real_t size = bb.get_longest_axis_size();
 
@@ -421,3 +450,50 @@ void update() {
 	}
 #endif
 }
+
+void params_set_pairing_expansion(real_t p_value) {
+	if (p_value < 0.0) {
+#ifdef BVH_ALLOW_AUTO_EXPANSION
+		_auto_pairing_expansion = true;
+#endif
+		return;
+	}
+#ifdef BVH_ALLOW_AUTO_EXPANSION
+	_auto_pairing_expansion = false;
+#endif
+
+	_pairing_expansion = p_value;
+
+	// calculate shrinking threshold
+	const real_t fudge_factor = 1.1;
+	_aabb_shrinkage_threshold = _pairing_expansion * POINT::AXIS_COUNT * 2.0 * fudge_factor;
+}
+
+// This routine is not just an enclose check, it also checks for special case of shrinkage
+bool expanded_aabb_encloses_not_shrink(const BOUNDS &p_expanded_aabb, const BOUNDS &p_aabb) const {
+	if (!p_expanded_aabb.encloses(p_aabb)) {
+		return false;
+	}
+
+	// Check for special case of shrinkage. If the aabb has shrunk
+	// significantly we want to create a new expanded bound, because
+	// the previous expanded bound will have diverged significantly.
+	const POINT &exp_size = p_expanded_aabb.size;
+	const POINT &new_size = p_aabb.size;
+
+	real_t exp_l = 0.0;
+	real_t new_l = 0.0;
+
+	for (int i = 0; i < POINT::AXIS_COUNT; ++i) {
+		exp_l += exp_size[i];
+		new_l += new_size[i];
+	}
+
+	// is difference above some metric
+	real_t diff = exp_l - new_l;
+	if (diff < _aabb_shrinkage_threshold) {
+		return true;
+	}
+
+	return false;
+}
diff --git a/core/math/bvh_split.inc b/core/math/bvh_split.inc
index f19ee8a7da..ff07166d4a 100644
--- a/core/math/bvh_split.inc
+++ b/core/math/bvh_split.inc
@@ -25,16 +25,16 @@ void _split_leaf_sort_groups_simple(int &num_a, int &num_b, uint16_t *group_a, u
 		return;
 	}
 
-	Point centre = full_bound.calculate_centre();
-	Point size = full_bound.calculate_size();
+	POINT centre = full_bound.calculate_centre();
+	POINT size = full_bound.calculate_size();
 
-	int order[Point::AXIS_COUNT];
+	int order[POINT::AXIS_COUNT];
 
 	order[0] = size.min_axis_index();
-	order[Point::AXIS_COUNT - 1] = size.max_axis_index();
+	order[POINT::AXIS_COUNT - 1] = size.max_axis_index();
 
-	static_assert(Point::AXIS_COUNT <= 3);
-	if (Point::AXIS_COUNT == 3) {
+	static_assert(POINT::AXIS_COUNT <= 3, "BVH POINT::AXIS_COUNT has unexpected size");
+	if (POINT::AXIS_COUNT == 3) {
 		order[1] = 3 - (order[0] + order[2]);
 	}
 
@@ -58,7 +58,7 @@ void _split_leaf_sort_groups_simple(int &num_a, int &num_b, uint16_t *group_a, u
 
 	// detect when split on longest axis failed
 	int min_threshold = MAX_ITEMS / 4;
-	int min_group_size[Point::AXIS_COUNT];
+	int min_group_size[POINT::AXIS_COUNT];
 	min_group_size[0] = MIN(num_a, num_b);
 	if (min_group_size[0] < min_threshold) {
 		// slow but sure .. first move everything back into a
@@ -68,7 +68,7 @@ void _split_leaf_sort_groups_simple(int &num_a, int &num_b, uint16_t *group_a, u
 		num_b = 0;
 
 		// now calculate the best split
-		for (int axis = 1; axis < Point::AXIS_COUNT; axis++) {
+		for (int axis = 1; axis < POINT::AXIS_COUNT; axis++) {
 			split_axis = order[axis];
 			int count = 0;
 
@@ -86,7 +86,7 @@ void _split_leaf_sort_groups_simple(int &num_a, int &num_b, uint16_t *group_a, u
 		// best axis
 		int best_axis = 0;
 		int best_min = min_group_size[0];
-		for (int axis = 1; axis < Point::AXIS_COUNT; axis++) {
+		for (int axis = 1; axis < POINT::AXIS_COUNT; axis++) {
 			if (min_group_size[axis] > best_min) {
 				best_min = min_group_size[axis];
 				best_axis = axis;
diff --git a/core/math/bvh_structs.inc b/core/math/bvh_structs.inc
index 1d1e0e6468..b0d9ae3615 100644
--- a/core/math/bvh_structs.inc
+++ b/core/math/bvh_structs.inc
@@ -14,25 +14,38 @@ struct ItemRef {
 // extra info kept in separate parallel list to the references,
 // as this is less used as keeps cache better
 struct ItemExtra {
-	uint32_t last_updated_tick;
-	uint32_t pairable;
-	uint32_t pairable_mask;
-	uint32_t pairable_type;
+	// Before doing user defined pairing checks (especially in the find_leavers function),
+	// we may want to check that two items have compatible tree ids and tree masks,
+	// as if they are incompatible they should not pair / collide.
+	bool are_item_trees_compatible(const ItemExtra &p_other) const {
+		uint32_t other_type = 1 << p_other.tree_id;
+		if (tree_collision_mask & other_type) {
+			return true;
+		}
+		uint32_t our_type = 1 << tree_id;
+		if (p_other.tree_collision_mask & our_type) {
+			return true;
+		}
+		return false;
+	}
+
+	// There can be multiple user defined trees
+	uint32_t tree_id;
 
+	// Defines which trees this item should collision check against.
+	// 1 << tree_id, and normally items would collide against there own
+	// tree (but not always).
+	uint32_t tree_collision_mask;
+
+	uint32_t last_updated_tick;
 	int32_t subindex;
 
+	T *userdata;
+
 	// the active reference is a separate list of which references
 	// are active so that we can slowly iterate through it over many frames for
 	// slow optimize.
 	uint32_t active_ref_id;
-
-	T *userdata;
-};
-
-// this is an item OR a child node depending on whether a leaf node
-struct Item {
-	BVHABB_CLASS aabb;
-	uint32_t item_ref_id;
 };
 
 // tree leaf
@@ -133,13 +146,13 @@ struct TNode {
 
 // instead of using linked list we maintain
 // item references (for quick lookup)
-PooledList<ItemRef, true> _refs;
-PooledList<ItemExtra, true> _extra;
+PooledList<ItemRef, uint32_t, true> _refs;
+PooledList<ItemExtra, uint32_t, true> _extra;
 PooledList<ItemPairs> _pairs;
 
 // these 2 are not in sync .. nodes != leaves!
-PooledList<TNode, true> _nodes;
-PooledList<TLeaf, true> _leaves;
+PooledList<TNode, uint32_t, true> _nodes;
+PooledList<TLeaf, uint32_t, true> _leaves;
 
 // we can maintain an un-ordered list of which references are active,
 // in order to do a slow incremental optimize of the tree over each frame.
@@ -152,15 +165,11 @@ uint32_t _current_active_ref = 0;
 // for pairing collision detection
 LocalVector<uint32_t, uint32_t, true> _cull_hits;
 
-// we now have multiple root nodes, allowing us to store
-// more than 1 tree. This can be more efficient, while sharing the same
-// common lists
-enum { NUM_TREES = 2,
-};
-
-// Tree 0 - Non pairable
-// Tree 1 - Pairable
-// This is more efficient because in physics we only need check non pairable against the pairable tree.
+// We can now have a user definable number of trees.
+// This allows using e.g. a non-pairable and pairable tree,
+// which can be more efficient for example, if we only need check non pairable against the pairable tree.
+// It also may be more efficient in terms of separating static from dynamic objects, by reducing housekeeping.
+// However this is a trade off, as there is a cost of traversing two trees.
 uint32_t _root_node_id[NUM_TREES];
 
 // these values may need tweaking according to the project
@@ -177,4 +186,14 @@ bool _auto_node_expansion = true;
 // larger values gives more 'sticky' pairing, and is less likely to exhibit tunneling
 // we can either use auto mode, where the expansion is based on the root node size, or specify manually
 real_t _pairing_expansion = 0.1;
+
+#ifdef BVH_ALLOW_AUTO_EXPANSION
 bool _auto_pairing_expansion = true;
+#endif
+
+// when using an expanded bound, we must detect the condition where a new AABB
+// is significantly smaller than the expanded bound, as this is a special case where we
+// should override the optimization and create a new expanded bound.
+// This threshold is derived from the _pairing_expansion, and should be recalculated
+// if _pairing_expansion is changed.
+real_t _aabb_shrinkage_threshold = 0.0;
diff --git a/core/math/bvh_tree.h b/core/math/bvh_tree.h
index c948d83456..da9b307778 100644
--- a/core/math/bvh_tree.h
+++ b/core/math/bvh_tree.h
@@ -48,12 +48,17 @@
 #include "core/templates/pooled_list.h"
 #include <limits.h>
 
-#define BVHABB_CLASS BVH_ABB<Bounds, Point>
+#define BVHABB_CLASS BVH_ABB<BOUNDS, POINT>
+
+// not sure if this is better yet so making optional
+#define BVH_EXPAND_LEAF_AABBS
 
 // never do these checks in release
 #if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
 //#define BVH_VERBOSE
 //#define BVH_VERBOSE_TREE
+//#define BVH_VERBOSE_PAIRING
+//#define BVH_VERBOSE_MOVES
 
 //#define BVH_VERBOSE_FRAME
 //#define BVH_CHECKS
@@ -148,7 +153,25 @@ public:
 	}
 };
 
-template <class T, int MAX_CHILDREN, int MAX_ITEMS, bool USE_PAIRS = false, class Bounds = AABB, class Point = Vector3>
+template <class T>
+class BVH_DummyPairTestFunction {
+public:
+	static bool user_collision_check(T *p_a, T *p_b) {
+		// return false if no collision, decided by masks etc
+		return true;
+	}
+};
+
+template <class T>
+class BVH_DummyCullTestFunction {
+public:
+	static bool user_cull_check(T *p_a, T *p_b) {
+		// return false if no collision
+		return true;
+	}
+};
+
+template <class T, int NUM_TREES, int MAX_CHILDREN, int MAX_ITEMS, class USER_PAIR_TEST_FUNCTION = BVH_DummyPairTestFunction<T>, class USER_CULL_TEST_FUNCTION = BVH_DummyCullTestFunction<T>, bool USE_PAIRS = false, class BOUNDS = AABB, class POINT = Vector3>
 class BVH_Tree {
 	friend class BVH;
 
@@ -165,6 +188,11 @@ public:
 		// (as these ids are stored as negative numbers in the node)
 		uint32_t dummy_leaf_id;
 		_leaves.request(dummy_leaf_id);
+
+		// In many cases you may want to change this default in the client code,
+		// or expose this value to the user.
+		// This default may make sense for a typically scaled 3d game, but maybe not for 2d on a pixel scale.
+		params_set_pairing_expansion(0.1);
 	}
 
 private:
@@ -234,7 +262,7 @@ private:
 				change_root_node(sibling_id, p_tree_id);
 
 				// delete the old root node as no longer needed
-				_nodes.free(p_parent_id);
+				node_free_node_and_leaf(p_parent_id);
 			}
 
 			return;
@@ -247,7 +275,19 @@ private:
 		}
 
 		// put the node on the free list to recycle
-		_nodes.free(p_parent_id);
+		node_free_node_and_leaf(p_parent_id);
+	}
+
+	// A node can either be a node, or a node AND a leaf combo.
+	// Both must be deleted to prevent a leak.
+	void node_free_node_and_leaf(uint32_t p_node_id) {
+		TNode &node = _nodes[p_node_id];
+		if (node.is_leaf()) {
+			int leaf_id = node.get_leaf_id();
+			_leaves.free(leaf_id);
+		}
+
+		_nodes.free(p_node_id);
 	}
 
 	void change_root_node(uint32_t p_new_root_id, uint32_t p_tree_id) {
@@ -339,7 +379,7 @@ private:
 				refit_upward(parent_id);
 
 				// put the node on the free list to recycle
-				_nodes.free(owner_node_id);
+				node_free_node_and_leaf(owner_node_id);
 			}
 
 			// else if no parent, it is the root node. Do not delete
diff --git a/core/math/camera_matrix.cpp b/core/math/camera_matrix.cpp
index f5d746ef0f..f4392c74b7 100644
--- a/core/math/camera_matrix.cpp
+++ b/core/math/camera_matrix.cpp
@@ -436,9 +436,7 @@ void CameraMatrix::invert() {
 	int pvt_i[4], pvt_j[4]; /* Locations of pivot matrix */
 	real_t pvt_val; /* Value of current pivot element */
 	real_t hold; /* Temporary storage */
-	real_t determinat; /* Determinant */
-
-	determinat = 1.0;
+	real_t determinant = 1.0f;
 	for (k = 0; k < 4; k++) {
 		/** Locate k'th pivot element **/
 		pvt_val = matrix[k][k]; /** Initialize for search **/
@@ -446,7 +444,7 @@ void CameraMatrix::invert() {
 		pvt_j[k] = k;
 		for (i = k; i < 4; i++) {
 			for (j = k; j < 4; j++) {
-				if (Math::absd(matrix[i][j]) > Math::absd(pvt_val)) {
+				if (Math::abs(matrix[i][j]) > Math::abs(pvt_val)) {
 					pvt_i[k] = i;
 					pvt_j[k] = j;
 					pvt_val = matrix[i][j];
@@ -455,9 +453,9 @@ void CameraMatrix::invert() {
 		}
 
 		/** Product of pivots, gives determinant when finished **/
-		determinat *= pvt_val;
-		if (Math::absd(determinat) < 1e-7) {
-			return; //(false);  /** Matrix is singular (zero determinant). **/
+		determinant *= pvt_val;
+		if (Math::is_zero_approx(determinant)) {
+			return; /** Matrix is singular (zero determinant). **/
 		}
 
 		/** "Interchange" rows (with sign change stuff) **/
diff --git a/core/math/camera_matrix.h b/core/math/camera_matrix.h
index 285d2ae384..f1aea5e4e8 100644
--- a/core/math/camera_matrix.h
+++ b/core/math/camera_matrix.h
@@ -33,6 +33,7 @@
 
 #include "core/math/math_defs.h"
 #include "core/math/vector3.h"
+#include "core/templates/vector.h"
 
 struct AABB;
 struct Plane;
diff --git a/core/math/color.h b/core/math/color.h
index 429807e4a6..b90a0f33a2 100644
--- a/core/math/color.h
+++ b/core/math/color.h
@@ -138,7 +138,7 @@ struct _NO_DISCARD_ Color {
 
 		float cMax = MAX(cRed, MAX(cGreen, cBlue));
 
-		float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / Math_LN2)) + 1.0f + B;
+		float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B;
 
 		float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f);
 
diff --git a/core/math/face3.cpp b/core/math/face3.cpp
index 9c968df19b..5bc1bc25e6 100644
--- a/core/math/face3.cpp
+++ b/core/math/face3.cpp
@@ -42,7 +42,7 @@ int Face3::split_by_plane(const Plane &p_plane, Face3 p_res[3], bool p_is_point_
 	int below_count = 0;
 
 	for (int i = 0; i < 3; i++) {
-		if (p_plane.has_point(vertex[i], CMP_EPSILON)) { // point is in plane
+		if (p_plane.has_point(vertex[i], (real_t)CMP_EPSILON)) { // point is in plane
 
 			ERR_FAIL_COND_V(above_count >= 4, 0);
 			above[above_count++] = vertex[i];
@@ -117,7 +117,7 @@ bool Face3::intersects_segment(const Vector3 &p_from, const Vector3 &p_dir, Vect
 
 bool Face3::is_degenerate() const {
 	Vector3 normal = vec3_cross(vertex[0] - vertex[1], vertex[0] - vertex[2]);
-	return (normal.length_squared() < CMP_EPSILON2);
+	return (normal.length_squared() < (real_t)CMP_EPSILON2);
 }
 
 Face3::Side Face3::get_side_of(const Face3 &p_face, ClockDirection p_clock_dir) const {
diff --git a/core/math/geometry_2d.cpp b/core/math/geometry_2d.cpp
index b1af91c49c..46b7d99b43 100644
--- a/core/math/geometry_2d.cpp
+++ b/core/math/geometry_2d.cpp
@@ -218,10 +218,10 @@ Vector<Vector<Point2>> Geometry2D::_polypaths_do_operation(PolyBooleanOperation
 
 	// Need to scale points (Clipper's requirement for robust computation).
 	for (int i = 0; i != p_polypath_a.size(); ++i) {
-		path_a << IntPoint(p_polypath_a[i].x * SCALE_FACTOR, p_polypath_a[i].y * SCALE_FACTOR);
+		path_a << IntPoint(p_polypath_a[i].x * (real_t)SCALE_FACTOR, p_polypath_a[i].y * (real_t)SCALE_FACTOR);
 	}
 	for (int i = 0; i != p_polypath_b.size(); ++i) {
-		path_b << IntPoint(p_polypath_b[i].x * SCALE_FACTOR, p_polypath_b[i].y * SCALE_FACTOR);
+		path_b << IntPoint(p_polypath_b[i].x * (real_t)SCALE_FACTOR, p_polypath_b[i].y * (real_t)SCALE_FACTOR);
 	}
 	Clipper clp;
 	clp.AddPath(path_a, ptSubject, !is_a_open); // Forward compatible with Clipper 10.0.0.
@@ -246,8 +246,8 @@ Vector<Vector<Point2>> Geometry2D::_polypaths_do_operation(PolyBooleanOperation
 
 		for (Paths::size_type j = 0; j < scaled_path.size(); ++j) {
 			polypath.push_back(Point2(
-					static_cast<real_t>(scaled_path[j].X) / SCALE_FACTOR,
-					static_cast<real_t>(scaled_path[j].Y) / SCALE_FACTOR));
+					static_cast<real_t>(scaled_path[j].X) / (real_t)SCALE_FACTOR,
+					static_cast<real_t>(scaled_path[j].Y) / (real_t)SCALE_FACTOR));
 		}
 		polypaths.push_back(polypath);
 	}
@@ -290,17 +290,17 @@ Vector<Vector<Point2>> Geometry2D::_polypath_offset(const Vector<Point2> &p_poly
 			et = etOpenRound;
 			break;
 	}
-	ClipperOffset co(2.0, 0.25f * SCALE_FACTOR); // Defaults from ClipperOffset.
+	ClipperOffset co(2.0, 0.25f * (real_t)SCALE_FACTOR); // Defaults from ClipperOffset.
 	Path path;
 
 	// Need to scale points (Clipper's requirement for robust computation).
 	for (int i = 0; i != p_polypath.size(); ++i) {
-		path << IntPoint(p_polypath[i].x * SCALE_FACTOR, p_polypath[i].y * SCALE_FACTOR);
+		path << IntPoint(p_polypath[i].x * (real_t)SCALE_FACTOR, p_polypath[i].y * (real_t)SCALE_FACTOR);
 	}
 	co.AddPath(path, jt, et);
 
 	Paths paths;
-	co.Execute(paths, p_delta * SCALE_FACTOR); // Inflate/deflate.
+	co.Execute(paths, p_delta * (real_t)SCALE_FACTOR); // Inflate/deflate.
 
 	// Have to scale points down now.
 	Vector<Vector<Point2>> polypaths;
@@ -312,8 +312,8 @@ Vector<Vector<Point2>> Geometry2D::_polypath_offset(const Vector<Point2> &p_poly
 
 		for (Paths::size_type j = 0; j < scaled_path.size(); ++j) {
 			polypath.push_back(Point2(
-					static_cast<real_t>(scaled_path[j].X) / SCALE_FACTOR,
-					static_cast<real_t>(scaled_path[j].Y) / SCALE_FACTOR));
+					static_cast<real_t>(scaled_path[j].X) / (real_t)SCALE_FACTOR,
+					static_cast<real_t>(scaled_path[j].Y) / (real_t)SCALE_FACTOR));
 		}
 		polypaths.push_back(polypath);
 	}
diff --git a/core/math/geometry_2d.h b/core/math/geometry_2d.h
index 4fdb8ee36a..62786d69be 100644
--- a/core/math/geometry_2d.h
+++ b/core/math/geometry_2d.h
@@ -51,20 +51,20 @@ public:
 		real_t f = d2.dot(r);
 		real_t s, t;
 		// Check if either or both segments degenerate into points.
-		if (a <= CMP_EPSILON && e <= CMP_EPSILON) {
+		if (a <= (real_t)CMP_EPSILON && e <= (real_t)CMP_EPSILON) {
 			// Both segments degenerate into points.
 			c1 = p1;
 			c2 = p2;
 			return Math::sqrt((c1 - c2).dot(c1 - c2));
 		}
-		if (a <= CMP_EPSILON) {
+		if (a <= (real_t)CMP_EPSILON) {
 			// First segment degenerates into a point.
 			s = 0.0;
 			t = f / e; // s = 0 => t = (b*s + f) / e = f / e
 			t = CLAMP(t, 0.0f, 1.0f);
 		} else {
 			real_t c = d1.dot(r);
-			if (e <= CMP_EPSILON) {
+			if (e <= (real_t)CMP_EPSILON) {
 				// Second segment degenerates into a point.
 				t = 0.0;
 				s = CLAMP(-c / a, 0.0f, 1.0f); // t = 0 => s = (b*t - c) / a = -c / a
@@ -185,7 +185,7 @@ public:
 		D = Vector2(D.x * Bn.x + D.y * Bn.y, D.y * Bn.x - D.x * Bn.y);
 
 		// Fail if C x B and D x B have the same sign (segments don't intersect).
-		if ((C.y < -CMP_EPSILON && D.y < -CMP_EPSILON) || (C.y > CMP_EPSILON && D.y > CMP_EPSILON)) {
+		if ((C.y < (real_t)-CMP_EPSILON && D.y < (real_t)-CMP_EPSILON) || (C.y > (real_t)CMP_EPSILON && D.y > (real_t)CMP_EPSILON)) {
 			return false;
 		}
 
@@ -198,7 +198,7 @@ public:
 		real_t ABpos = D.x + (C.x - D.x) * D.y / (D.y - C.y);
 
 		// Fail if segment C-D crosses line A-B outside of segment A-B.
-		if (ABpos < 0 || ABpos > 1.0f) {
+		if ((ABpos < 0) || (ABpos > 1)) {
 			return false;
 		}
 
diff --git a/core/math/geometry_3d.cpp b/core/math/geometry_3d.cpp
index 7eeb37df46..bd22bffb1f 100644
--- a/core/math/geometry_3d.cpp
+++ b/core/math/geometry_3d.cpp
@@ -879,7 +879,7 @@ Vector<Vector3> Geometry3D::compute_convex_mesh_points(const Plane *p_planes, in
 					for (int n = 0; n < p_plane_count; n++) {
 						if (n != i && n != j && n != k) {
 							real_t dp = p_planes[n].normal.dot(convex_shape_point);
-							if (dp - p_planes[n].d > CMP_EPSILON) {
+							if (dp - p_planes[n].d > (real_t)CMP_EPSILON) {
 								excluded = true;
 								break;
 							}
diff --git a/core/math/geometry_3d.h b/core/math/geometry_3d.h
index 482c7ea604..59c56906f4 100644
--- a/core/math/geometry_3d.h
+++ b/core/math/geometry_3d.h
@@ -76,7 +76,7 @@ public:
 		real_t tc, tN, tD = D; // tc = tN / tD, default tD = D >= 0
 
 		// Compute the line parameters of the two closest points.
-		if (D < CMP_EPSILON) { // The lines are almost parallel.
+		if (D < (real_t)CMP_EPSILON) { // The lines are almost parallel.
 			sN = 0.0f; // Force using point P0 on segment S1
 			sD = 1.0f; // to prevent possible division by 0.0 later.
 			tN = e;
@@ -142,7 +142,7 @@ public:
 		Vector3 s = p_from - p_v0;
 		real_t u = f * s.dot(h);
 
-		if (u < 0.0f || u > 1.0f) {
+		if ((u < 0.0f) || (u > 1.0f)) {
 			return false;
 		}
 
@@ -150,7 +150,7 @@ public:
 
 		real_t v = f * p_dir.dot(q);
 
-		if (v < 0.0f || u + v > 1.0f) {
+		if ((v < 0.0f) || (u + v > 1.0f)) {
 			return false;
 		}
 
@@ -183,7 +183,7 @@ public:
 		Vector3 s = p_from - p_v0;
 		real_t u = f * s.dot(h);
 
-		if (u < 0.0f || u > 1.0f) {
+		if ((u < 0.0f) || (u > 1.0f)) {
 			return false;
 		}
 
@@ -191,7 +191,7 @@ public:
 
 		real_t v = f * rel.dot(q);
 
-		if (v < 0.0f || u + v > 1.0f) {
+		if ((v < 0.0f) || (u + v > 1.0f)) {
 			return false;
 		}
 
@@ -199,7 +199,7 @@ public:
 		// the intersection point is on the line.
 		real_t t = f * e2.dot(q);
 
-		if (t > CMP_EPSILON && t <= 1.0f) { // Ray intersection.
+		if (t > (real_t)CMP_EPSILON && t <= 1.0f) { // Ray intersection.
 			if (r_res) {
 				*r_res = p_from + rel * t;
 			}
@@ -213,7 +213,7 @@ public:
 		Vector3 sphere_pos = p_sphere_pos - p_from;
 		Vector3 rel = (p_to - p_from);
 		real_t rel_l = rel.length();
-		if (rel_l < CMP_EPSILON) {
+		if (rel_l < (real_t)CMP_EPSILON) {
 			return false; // Both points are the same.
 		}
 		Vector3 normal = rel / rel_l;
@@ -229,7 +229,7 @@ public:
 		real_t inters_d2 = p_sphere_radius * p_sphere_radius - ray_distance * ray_distance;
 		real_t inters_d = sphere_d;
 
-		if (inters_d2 >= CMP_EPSILON) {
+		if (inters_d2 >= (real_t)CMP_EPSILON) {
 			inters_d -= Math::sqrt(inters_d2);
 		}
 
@@ -253,7 +253,7 @@ public:
 	static inline bool segment_intersects_cylinder(const Vector3 &p_from, const Vector3 &p_to, real_t p_height, real_t p_radius, Vector3 *r_res = nullptr, Vector3 *r_norm = nullptr, int p_cylinder_axis = 2) {
 		Vector3 rel = (p_to - p_from);
 		real_t rel_l = rel.length();
-		if (rel_l < CMP_EPSILON) {
+		if (rel_l < (real_t)CMP_EPSILON) {
 			return false; // Both points are the same.
 		}
 
@@ -269,7 +269,7 @@ public:
 
 		Vector3 axis_dir;
 
-		if (crs_l < CMP_EPSILON) {
+		if (crs_l < (real_t)CMP_EPSILON) {
 			Vector3 side_axis;
 			side_axis[(p_cylinder_axis + 1) % 3] = 1.0f; // Any side axis OK.
 			axis_dir = side_axis;
@@ -285,7 +285,7 @@ public:
 
 		// Convert to 2D.
 		real_t w2 = p_radius * p_radius - dist * dist;
-		if (w2 < CMP_EPSILON) {
+		if (w2 < (real_t)CMP_EPSILON) {
 			return false; // Avoid numerical error.
 		}
 		Size2 size(Math::sqrt(w2), p_height * 0.5f);
@@ -366,7 +366,7 @@ public:
 		Vector3 rel = p_to - p_from;
 		real_t rel_l = rel.length();
 
-		if (rel_l < CMP_EPSILON) {
+		if (rel_l < (real_t)CMP_EPSILON) {
 			return false;
 		}
 
@@ -379,7 +379,7 @@ public:
 
 			real_t den = p.normal.dot(dir);
 
-			if (Math::abs(den) <= CMP_EPSILON) {
+			if (Math::abs(den) <= (real_t)CMP_EPSILON) {
 				continue; // Ignore parallel plane.
 			}
 
@@ -564,11 +564,11 @@ public:
 
 		for (int a = 0; a < polygon.size(); a++) {
 			real_t dist = p_plane.distance_to(polygon[a]);
-			if (dist < -CMP_POINT_IN_PLANE_EPSILON) {
+			if (dist < (real_t)-CMP_POINT_IN_PLANE_EPSILON) {
 				location_cache[a] = LOC_INSIDE;
 				inside_count++;
 			} else {
-				if (dist > CMP_POINT_IN_PLANE_EPSILON) {
+				if (dist > (real_t)CMP_POINT_IN_PLANE_EPSILON) {
 					location_cache[a] = LOC_OUTSIDE;
 					outside_count++;
 				} else {
diff --git a/core/math/math_funcs.h b/core/math/math_funcs.h
index 47e5ab2709..8c0b87cf4a 100644
--- a/core/math/math_funcs.h
+++ b/core/math/math_funcs.h
@@ -64,7 +64,7 @@ public:
 	static _ALWAYS_INLINE_ float sinc(float p_x) { return p_x == 0 ? 1 : ::sin(p_x) / p_x; }
 	static _ALWAYS_INLINE_ double sinc(double p_x) { return p_x == 0 ? 1 : ::sin(p_x) / p_x; }
 
-	static _ALWAYS_INLINE_ float sincn(float p_x) { return sinc(Math_PI * p_x); }
+	static _ALWAYS_INLINE_ float sincn(float p_x) { return sinc((float)Math_PI * p_x); }
 	static _ALWAYS_INLINE_ double sincn(double p_x) { return sinc(Math_PI * p_x); }
 
 	static _ALWAYS_INLINE_ double cosh(double p_x) { return ::cosh(p_x); }
@@ -187,7 +187,7 @@ public:
 
 	static _ALWAYS_INLINE_ double fposmod(double p_x, double p_y) {
 		double value = Math::fmod(p_x, p_y);
-		if ((value < 0 && p_y > 0) || (value > 0 && p_y < 0)) {
+		if (((value < 0) && (p_y > 0)) || ((value > 0) && (p_y < 0))) {
 			value += p_y;
 		}
 		value += 0.0;
@@ -195,7 +195,7 @@ public:
 	}
 	static _ALWAYS_INLINE_ float fposmod(float p_x, float p_y) {
 		float value = Math::fmod(p_x, p_y);
-		if ((value < 0 && p_y > 0) || (value > 0 && p_y < 0)) {
+		if (((value < 0) && (p_y > 0)) || ((value > 0) && (p_y < 0))) {
 			value += p_y;
 		}
 		value += 0.0f;
@@ -220,17 +220,17 @@ public:
 
 	static _ALWAYS_INLINE_ int64_t posmod(int64_t p_x, int64_t p_y) {
 		int64_t value = p_x % p_y;
-		if ((value < 0 && p_y > 0) || (value > 0 && p_y < 0)) {
+		if (((value < 0) && (p_y > 0)) || ((value > 0) && (p_y < 0))) {
 			value += p_y;
 		}
 		return value;
 	}
 
 	static _ALWAYS_INLINE_ double deg2rad(double p_y) { return p_y * (Math_PI / 180.0); }
-	static _ALWAYS_INLINE_ float deg2rad(float p_y) { return p_y * (Math_PI / 180.0); }
+	static _ALWAYS_INLINE_ float deg2rad(float p_y) { return p_y * (float)(Math_PI / 180.0); }
 
 	static _ALWAYS_INLINE_ double rad2deg(double p_y) { return p_y * (180.0 / Math_PI); }
-	static _ALWAYS_INLINE_ float rad2deg(float p_y) { return p_y * (180.0 / Math_PI); }
+	static _ALWAYS_INLINE_ float rad2deg(float p_y) { return p_y * (float)(180.0 / Math_PI); }
 
 	static _ALWAYS_INLINE_ double lerp(double p_from, double p_to, double p_weight) { return p_from + (p_to - p_from) * p_weight; }
 	static _ALWAYS_INLINE_ float lerp(float p_from, float p_to, float p_weight) { return p_from + (p_to - p_from) * p_weight; }
@@ -285,10 +285,10 @@ public:
 	static _ALWAYS_INLINE_ float move_toward(float p_from, float p_to, float p_delta) { return abs(p_to - p_from) <= p_delta ? p_to : p_from + SIGN(p_to - p_from) * p_delta; }
 
 	static _ALWAYS_INLINE_ double linear2db(double p_linear) { return Math::log(p_linear) * 8.6858896380650365530225783783321; }
-	static _ALWAYS_INLINE_ float linear2db(float p_linear) { return Math::log(p_linear) * 8.6858896380650365530225783783321; }
+	static _ALWAYS_INLINE_ float linear2db(float p_linear) { return Math::log(p_linear) * (float)8.6858896380650365530225783783321; }
 
 	static _ALWAYS_INLINE_ double db2linear(double p_db) { return Math::exp(p_db * 0.11512925464970228420089957273422); }
-	static _ALWAYS_INLINE_ float db2linear(float p_db) { return Math::exp(p_db * 0.11512925464970228420089957273422); }
+	static _ALWAYS_INLINE_ float db2linear(float p_db) { return Math::exp(p_db * (float)0.11512925464970228420089957273422); }
 
 	static _ALWAYS_INLINE_ double round(double p_val) { return ::round(p_val); }
 	static _ALWAYS_INLINE_ float round(float p_val) { return ::roundf(p_val); }
@@ -345,9 +345,9 @@ public:
 			return true;
 		}
 		// Then check for approximate equality.
-		float tolerance = CMP_EPSILON * abs(a);
-		if (tolerance < CMP_EPSILON) {
-			tolerance = CMP_EPSILON;
+		float tolerance = (float)CMP_EPSILON * abs(a);
+		if (tolerance < (float)CMP_EPSILON) {
+			tolerance = (float)CMP_EPSILON;
 		}
 		return abs(a - b) < tolerance;
 	}
@@ -362,7 +362,7 @@ public:
 	}
 
 	static _ALWAYS_INLINE_ bool is_zero_approx(float s) {
-		return abs(s) < CMP_EPSILON;
+		return abs(s) < (float)CMP_EPSILON;
 	}
 
 	static _ALWAYS_INLINE_ bool is_equal_approx(double a, double b) {
diff --git a/core/math/plane.cpp b/core/math/plane.cpp
index 0ce8aed51c..6881ad4014 100644
--- a/core/math/plane.cpp
+++ b/core/math/plane.cpp
@@ -106,7 +106,7 @@ bool Plane::intersects_ray(const Vector3 &p_from, const Vector3 &p_dir, Vector3
 	real_t dist = (normal.dot(p_from) - d) / den;
 	//printf("dist is %i\n",dist);
 
-	if (dist > CMP_EPSILON) { //this is a ray, before the emitting pos (p_from) doesn't exist
+	if (dist > (real_t)CMP_EPSILON) { //this is a ray, before the emitting pos (p_from) doesn't exist
 
 		return false;
 	}
@@ -129,7 +129,7 @@ bool Plane::intersects_segment(const Vector3 &p_begin, const Vector3 &p_end, Vec
 	real_t dist = (normal.dot(p_begin) - d) / den;
 	//printf("dist is %i\n",dist);
 
-	if (dist < -CMP_EPSILON || dist > (1.0f + CMP_EPSILON)) {
+	if (dist < (real_t)-CMP_EPSILON || dist > (1.0f + (real_t)CMP_EPSILON)) {
 		return false;
 	}
 
diff --git a/core/math/quaternion.cpp b/core/math/quaternion.cpp
index ade252d628..0a650a8578 100644
--- a/core/math/quaternion.cpp
+++ b/core/math/quaternion.cpp
@@ -129,7 +129,7 @@ Quaternion Quaternion::slerp(const Quaternion &p_to, const real_t &p_weight) con
 
 	// calculate coefficients
 
-	if ((1.0f - cosom) > CMP_EPSILON) {
+	if ((1.0f - cosom) > (real_t)CMP_EPSILON) {
 		// standard case (slerp)
 		omega = Math::acos(cosom);
 		sinom = Math::sin(omega);
diff --git a/core/math/quaternion.h b/core/math/quaternion.h
index f8a2c6456e..38729ac3df 100644
--- a/core/math/quaternion.h
+++ b/core/math/quaternion.h
@@ -145,7 +145,7 @@ struct _NO_DISCARD_ Quaternion {
 		Vector3 c = v0.cross(v1);
 		real_t d = v0.dot(v1);
 
-		if (d < -1.0f + CMP_EPSILON) {
+		if (d < -1.0f + (real_t)CMP_EPSILON) {
 			x = 0;
 			y = 1;
 			z = 0;
diff --git a/core/math/random_pcg.h b/core/math/random_pcg.h
index 974dbbfc2e..65fcf67664 100644
--- a/core/math/random_pcg.h
+++ b/core/math/random_pcg.h
@@ -129,7 +129,7 @@ public:
 		return p_mean + p_deviation * (cos(Math_TAU * randd()) * sqrt(-2.0 * log(randd()))); // Box-Muller transform
 	}
 	_FORCE_INLINE_ float randfn(float p_mean, float p_deviation) {
-		return p_mean + p_deviation * (cos(Math_TAU * randf()) * sqrt(-2.0 * log(randf()))); // Box-Muller transform
+		return p_mean + p_deviation * (cos((float)Math_TAU * randf()) * sqrt(-2.0 * log(randf()))); // Box-Muller transform
 	}
 
 	double random(double p_from, double p_to);
diff --git a/core/math/transform_2d.cpp b/core/math/transform_2d.cpp
index 55c1f06ff5..71953e4130 100644
--- a/core/math/transform_2d.cpp
+++ b/core/math/transform_2d.cpp
@@ -71,12 +71,12 @@ void Transform2D::rotate(const real_t p_phi) {
 
 real_t Transform2D::get_skew() const {
 	real_t det = basis_determinant();
-	return Math::acos(elements[0].normalized().dot(SIGN(det) * elements[1].normalized())) - Math_PI * 0.5f;
+	return Math::acos(elements[0].normalized().dot(SIGN(det) * elements[1].normalized())) - (real_t)Math_PI * 0.5f;
 }
 
 void Transform2D::set_skew(const real_t p_angle) {
 	real_t det = basis_determinant();
-	elements[1] = SIGN(det) * elements[0].rotated((Math_PI * 0.5f + p_angle)).normalized() * elements[1].length();
+	elements[1] = SIGN(det) * elements[0].rotated(((real_t)Math_PI * 0.5f + p_angle)).normalized() * elements[1].length();
 }
 
 real_t Transform2D::get_rotation() const {
diff --git a/core/math/vector2.cpp b/core/math/vector2.cpp
index ed4266b115..a27227905c 100644
--- a/core/math/vector2.cpp
+++ b/core/math/vector2.cpp
@@ -163,7 +163,7 @@ Vector2 Vector2::move_toward(const Vector2 &p_to, const real_t p_delta) const {
 	Vector2 v = *this;
 	Vector2 vd = p_to - v;
 	real_t len = vd.length();
-	return len <= p_delta || len < CMP_EPSILON ? p_to : v + vd / len * p_delta;
+	return len <= p_delta || len < (real_t)CMP_EPSILON ? p_to : v + vd / len * p_delta;
 }
 
 // slide returns the component of the vector along the given plane, specified by its normal vector.
diff --git a/core/math/vector3.cpp b/core/math/vector3.cpp
index 998c437a22..87b2ac7104 100644
--- a/core/math/vector3.cpp
+++ b/core/math/vector3.cpp
@@ -31,6 +31,9 @@
 #include "vector3.h"
 
 #include "core/math/basis.h"
+#include "core/math/vector2.h"
+#include "core/math/vector3i.h"
+#include "core/string/ustring.h"
 
 void Vector3::rotate(const Vector3 &p_axis, const real_t p_phi) {
 	*this = Basis(p_axis, p_phi).xform(*this);
@@ -94,7 +97,32 @@ Vector3 Vector3::move_toward(const Vector3 &p_to, const real_t p_delta) const {
 	Vector3 v = *this;
 	Vector3 vd = p_to - v;
 	real_t len = vd.length();
-	return len <= p_delta || len < CMP_EPSILON ? p_to : v + vd / len * p_delta;
+	return len <= p_delta || len < (real_t)CMP_EPSILON ? p_to : v + vd / len * p_delta;
+}
+
+Vector2 Vector3::octahedron_encode() const {
+	Vector3 n = *this;
+	n /= Math::abs(n.x) + Math::abs(n.y) + Math::abs(n.z);
+	Vector2 o;
+	if (n.z >= 0.0f) {
+		o.x = n.x;
+		o.y = n.y;
+	} else {
+		o.x = (1.0f - Math::abs(n.y)) * (n.x >= 0.0f ? 1.0f : -1.0f);
+		o.y = (1.0f - Math::abs(n.x)) * (n.y >= 0.0f ? 1.0f : -1.0f);
+	}
+	o.x = o.x * 0.5f + 0.5f;
+	o.y = o.y * 0.5f + 0.5f;
+	return o;
+}
+
+Vector3 Vector3::octahedron_decode(const Vector2 &p_oct) {
+	Vector2 f(p_oct.x * 2.0f - 1.0f, p_oct.y * 2.0f - 1.0f);
+	Vector3 n(f.x, f.y, 1.0f - Math::abs(f.x) - Math::abs(f.y));
+	float t = CLAMP(-n.z, 0.0f, 1.0f);
+	n.x += n.x >= 0 ? -t : t;
+	n.y += n.y >= 0 ? -t : t;
+	return n.normalized();
 }
 
 Basis Vector3::outer(const Vector3 &p_with) const {
@@ -112,3 +140,7 @@ bool Vector3::is_equal_approx(const Vector3 &p_v) const {
 Vector3::operator String() const {
 	return "(" + String::num_real(x, false) + ", " + String::num_real(y, false) + ", " + String::num_real(z, false) + ")";
 }
+
+Vector3::operator Vector3i() const {
+	return Vector3i(x, y, z);
+}
diff --git a/core/math/vector3.h b/core/math/vector3.h
index c1da159e00..89b0095741 100644
--- a/core/math/vector3.h
+++ b/core/math/vector3.h
@@ -31,12 +31,13 @@
 #ifndef VECTOR3_H
 #define VECTOR3_H
 
+#include "core/error/error_macros.h"
 #include "core/math/math_funcs.h"
-#include "core/math/vector2.h"
-#include "core/math/vector3i.h"
-#include "core/string/ustring.h"
 
+class String;
 struct Basis;
+struct Vector2;
+struct Vector3i;
 
 struct _NO_DISCARD_ Vector3 {
 	static const int AXIS_COUNT = 3;
@@ -104,30 +105,8 @@ struct _NO_DISCARD_ Vector3 {
 	Vector3 cubic_interpolate(const Vector3 &p_b, const Vector3 &p_pre_a, const Vector3 &p_post_b, const real_t p_weight) const;
 	Vector3 move_toward(const Vector3 &p_to, const real_t p_delta) const;
 
-	_FORCE_INLINE_ Vector2 octahedron_encode() const {
-		Vector3 n = *this;
-		n /= Math::abs(n.x) + Math::abs(n.y) + Math::abs(n.z);
-		Vector2 o;
-		if (n.z >= 0.0f) {
-			o.x = n.x;
-			o.y = n.y;
-		} else {
-			o.x = (1.0f - Math::abs(n.y)) * (n.x >= 0.0f ? 1.0f : -1.0f);
-			o.y = (1.0f - Math::abs(n.x)) * (n.y >= 0.0f ? 1.0f : -1.0f);
-		}
-		o.x = o.x * 0.5f + 0.5f;
-		o.y = o.y * 0.5f + 0.5f;
-		return o;
-	}
-
-	static _FORCE_INLINE_ Vector3 octahedron_decode(const Vector2 &p_oct) {
-		Vector2 f(p_oct.x * 2.0f - 1.0f, p_oct.y * 2.0f - 1.0f);
-		Vector3 n(f.x, f.y, 1.0f - Math::abs(f.x) - Math::abs(f.y));
-		float t = CLAMP(-n.z, 0.0f, 1.0f);
-		n.x += n.x >= 0 ? -t : t;
-		n.y += n.y >= 0 ? -t : t;
-		return n.normalized();
-	}
+	Vector2 octahedron_encode() const;
+	static Vector3 octahedron_decode(const Vector2 &p_oct);
 
 	_FORCE_INLINE_ Vector3 cross(const Vector3 &p_with) const;
 	_FORCE_INLINE_ real_t dot(const Vector3 &p_with) const;
@@ -183,16 +162,9 @@ struct _NO_DISCARD_ Vector3 {
 	_FORCE_INLINE_ bool operator>=(const Vector3 &p_v) const;
 
 	operator String() const;
-	_FORCE_INLINE_ operator Vector3i() const {
-		return Vector3i(x, y, z);
-	}
+	operator Vector3i() const;
 
 	_FORCE_INLINE_ Vector3() {}
-	_FORCE_INLINE_ Vector3(const Vector3i &p_ivec) {
-		x = p_ivec.x;
-		y = p_ivec.y;
-		z = p_ivec.z;
-	}
 	_FORCE_INLINE_ Vector3(const real_t p_x, const real_t p_y, const real_t p_z) {
 		x = p_x;
 		y = p_y;
@@ -344,7 +316,7 @@ Vector3 &Vector3::operator*=(const real_t p_scalar) {
 }
 
 // Multiplication operators required to workaround issues with LLVM using implicit conversion
-// to Vector2i instead for integers where it should not.
+// to Vector3i instead for integers where it should not.
 
 _FORCE_INLINE_ Vector3 operator*(const float p_scalar, const Vector3 &p_vec) {
 	return p_vec * p_scalar;
diff --git a/core/math/vector3i.cpp b/core/math/vector3i.cpp
index ac79b3c7ea..b8e74ea6d2 100644
--- a/core/math/vector3i.cpp
+++ b/core/math/vector3i.cpp
@@ -30,6 +30,9 @@
 
 #include "vector3i.h"
 
+#include "core/math/vector3.h"
+#include "core/string/ustring.h"
+
 void Vector3i::set_axis(const int p_axis, const int32_t p_value) {
 	ERR_FAIL_INDEX(p_axis, 3);
 	coord[p_axis] = p_value;
@@ -58,3 +61,7 @@ Vector3i Vector3i::clamp(const Vector3i &p_min, const Vector3i &p_max) const {
 Vector3i::operator String() const {
 	return "(" + itos(x) + ", " + itos(y) + ", " + itos(z) + ")";
 }
+
+Vector3i::operator Vector3() const {
+	return Vector3(x, y, z);
+}
diff --git a/core/math/vector3i.h b/core/math/vector3i.h
index d166de80aa..2a4c7e2e97 100644
--- a/core/math/vector3i.h
+++ b/core/math/vector3i.h
@@ -32,8 +32,9 @@
 #define VECTOR3I_H
 
 #include "core/math/math_funcs.h"
-#include "core/string/ustring.h"
-#include "core/typedefs.h"
+
+class String;
+struct Vector3;
 
 struct _NO_DISCARD_ Vector3i {
 	enum Axis {
@@ -105,6 +106,7 @@ struct _NO_DISCARD_ Vector3i {
 	_FORCE_INLINE_ bool operator>=(const Vector3i &p_v) const;
 
 	operator String() const;
+	operator Vector3() const;
 
 	_FORCE_INLINE_ Vector3i() {}
 	_FORCE_INLINE_ Vector3i(const int32_t p_x, const int32_t p_y, const int32_t p_z) {