55 files changed, 6539 insertions, 493 deletions
diff --git a/core/math/a_star.cpp b/core/math/a_star.cpp
index 88e11a630c..322eb7ac61 100644
--- a/core/math/a_star.cpp
+++ b/core/math/a_star.cpp
@@ -35,18 +35,12 @@
 #include "scene/scene_string_names.h"
 
 int AStar::get_available_point_id() const {
-	if (points.is_empty()) {
-		return 1;
-	}
-
-	// calculate our new next available point id if bigger than before or next id already contained in set of points.
 	if (points.has(last_free_id)) {
-		int cur_new_id = last_free_id;
+		int cur_new_id = last_free_id + 1;
 		while (points.has(cur_new_id)) {
 			cur_new_id++;
 		}
-		int &non_const = const_cast<int &>(last_free_id);
-		non_const = cur_new_id;
+		const_cast<int &>(last_free_id) = cur_new_id;
 	}
 
 	return last_free_id;
diff --git a/core/math/a_star.h b/core/math/a_star.h
index 4c61abd91c..44758cb046 100644
--- a/core/math/a_star.h
+++ b/core/math/a_star.h
@@ -31,7 +31,7 @@
 #ifndef A_STAR_H
 #define A_STAR_H
 
-#include "core/object/reference.h"
+#include "core/object/ref_counted.h"
 #include "core/templates/oa_hash_map.h"
 
 /**
@@ -40,8 +40,8 @@
 	@author Juan Linietsky <reduzio@gmail.com>
 */
 
-class AStar : public Reference {
-	GDCLASS(AStar, Reference);
+class AStar : public RefCounted {
+	GDCLASS(AStar, RefCounted);
 	friend class AStar2D;
 
 	struct Point {
@@ -157,8 +157,8 @@ public:
 	~AStar();
 };
 
-class AStar2D : public Reference {
-	GDCLASS(AStar2D, Reference);
+class AStar2D : public RefCounted {
+	GDCLASS(AStar2D, RefCounted);
 	AStar astar;
 
 	bool _solve(AStar::Point *begin_point, AStar::Point *end_point);
diff --git a/core/math/aabb.cpp b/core/math/aabb.cpp
index 2c721997d8..33aa65f15d 100644
--- a/core/math/aabb.cpp
+++ b/core/math/aabb.cpp
@@ -392,5 +392,5 @@ Variant AABB::intersects_ray_bind(const Vector3 &p_from, const Vector3 &p_dir) c
 }
 
 AABB::operator String() const {
-	return String() + position + " - " + size;
+	return "[P: " + position.operator String() + ", S: " + size + "]";
 }
diff --git a/core/math/basis.cpp b/core/math/basis.cpp
index 50299902eb..5c42213e61 100644
--- a/core/math/basis.cpp
+++ b/core/math/basis.cpp
@@ -109,7 +109,7 @@ bool Basis::is_diagonal() const {
 }
 
 bool Basis::is_rotation() const {
-	return Math::is_equal_approx(determinant(), 1, UNIT_EPSILON) && is_orthogonal();
+	return Math::is_equal_approx(determinant(), 1, (real_t)UNIT_EPSILON) && is_orthogonal();
 }
 
 #ifdef MATH_CHECKS
@@ -345,12 +345,12 @@ void Basis::rotate(const Vector3 &p_euler) {
 	*this = rotated(p_euler);
 }
 
-Basis Basis::rotated(const Quat &p_quat) const {
-	return Basis(p_quat) * (*this);
+Basis Basis::rotated(const Quaternion &p_quaternion) const {
+	return Basis(p_quaternion) * (*this);
 }
 
-void Basis::rotate(const Quat &p_quat) {
-	*this = rotated(p_quat);
+void Basis::rotate(const Quaternion &p_quaternion) {
+	*this = rotated(p_quaternion);
 }
 
 Vector3 Basis::get_rotation_euler() const {
@@ -367,7 +367,7 @@ Vector3 Basis::get_rotation_euler() const {
 	return m.get_euler();
 }
 
-Quat Basis::get_rotation_quat() const {
+Quaternion Basis::get_rotation_quaternion() const {
 	// Assumes that the matrix can be decomposed into a proper rotation and scaling matrix as M = R.S,
 	// and returns the Euler angles corresponding to the rotation part, complementing get_scale().
 	// See the comment in get_scale() for further information.
@@ -378,7 +378,19 @@ Quat Basis::get_rotation_quat() const {
 		m.scale(Vector3(-1, -1, -1));
 	}
 
-	return m.get_quat();
+	return m.get_quaternion();
+}
+
+void Basis::rotate_to_align(Vector3 p_start_direction, Vector3 p_end_direction) {
+	// Takes two vectors and rotates the basis from the first vector to the second vector.
+	// Adopted from: https://gist.github.com/kevinmoran/b45980723e53edeb8a5a43c49f134724
+	const Vector3 axis = p_start_direction.cross(p_end_direction).normalized();
+	if (axis.length_squared() != 0) {
+		real_t dot = p_start_direction.dot(p_end_direction);
+		dot = CLAMP(dot, -1.0, 1.0);
+		const real_t angle_rads = Math::acos(dot);
+		set_axis_angle(axis, angle_rads);
+	}
 }
 
 void Basis::get_rotation_axis_angle(Vector3 &p_axis, real_t &p_angle) const {
@@ -756,23 +768,14 @@ bool Basis::operator!=(const Basis &p_matrix) const {
 }
 
 Basis::operator String() const {
-	String mtx;
-	for (int i = 0; i < 3; i++) {
-		for (int j = 0; j < 3; j++) {
-			if (i != 0 || j != 0) {
-				mtx += ", ";
-			}
-
-			mtx += rtos(elements[j][i]); //matrix is stored transposed for performance, so print it transposed
-		}
-	}
-
-	return mtx;
+	return "[X: " + get_axis(0).operator String() +
+		   ", Y: " + get_axis(1).operator String() +
+		   ", Z: " + get_axis(2).operator String() + "]";
 }
 
-Quat Basis::get_quat() const {
+Quaternion Basis::get_quaternion() const {
 #ifdef MATH_CHECKS
-	ERR_FAIL_COND_V_MSG(!is_rotation(), Quat(), "Basis must be normalized in order to be casted to a Quaternion. Use get_rotation_quat() or call orthonormalized() instead.");
+	ERR_FAIL_COND_V_MSG(!is_rotation(), Quaternion(), "Basis must be normalized in order to be casted to a Quaternion. Use get_rotation_quaternion() or call orthonormalized() instead.");
 #endif
 	/* Allow getting a quaternion from an unnormalized transform */
 	Basis m = *this;
@@ -803,7 +806,7 @@ Quat Basis::get_quat() const {
 		temp[k] = (m.elements[k][i] + m.elements[i][k]) * s;
 	}
 
-	return Quat(temp[0], temp[1], temp[2], temp[3]);
+	return Quaternion(temp[0], temp[1], temp[2], temp[3]);
 }
 
 static const Basis _ortho_bases[24] = {
@@ -945,13 +948,13 @@ void Basis::get_axis_angle(Vector3 &r_axis, real_t &r_angle) const {
 	r_angle = angle;
 }
 
-void Basis::set_quat(const Quat &p_quat) {
-	real_t d = p_quat.length_squared();
+void Basis::set_quaternion(const Quaternion &p_quaternion) {
+	real_t d = p_quaternion.length_squared();
 	real_t s = 2.0 / d;
-	real_t xs = p_quat.x * s, ys = p_quat.y * s, zs = p_quat.z * s;
-	real_t wx = p_quat.w * xs, wy = p_quat.w * ys, wz = p_quat.w * zs;
-	real_t xx = p_quat.x * xs, xy = p_quat.x * ys, xz = p_quat.x * zs;
-	real_t yy = p_quat.y * ys, yz = p_quat.y * zs, zz = p_quat.z * zs;
+	real_t xs = p_quaternion.x * s, ys = p_quaternion.y * s, zs = p_quaternion.z * s;
+	real_t wx = p_quaternion.w * xs, wy = p_quaternion.w * ys, wz = p_quaternion.w * zs;
+	real_t xx = p_quaternion.x * xs, xy = p_quaternion.x * ys, xz = p_quaternion.x * zs;
+	real_t yy = p_quaternion.y * ys, yz = p_quaternion.y * zs, zz = p_quaternion.z * zs;
 	set(1.0 - (yy + zz), xy - wz, xz + wy,
 			xy + wz, 1.0 - (xx + zz), yz - wx,
 			xz - wy, yz + wx, 1.0 - (xx + yy));
@@ -997,9 +1000,9 @@ void Basis::set_euler_scale(const Vector3 &p_euler, const Vector3 &p_scale) {
 	rotate(p_euler);
 }
 
-void Basis::set_quat_scale(const Quat &p_quat, const Vector3 &p_scale) {
+void Basis::set_quaternion_scale(const Quaternion &p_quaternion, const Vector3 &p_scale) {
 	set_diagonal(p_scale);
-	rotate(p_quat);
+	rotate(p_quaternion);
 }
 
 void Basis::set_diagonal(const Vector3 &p_diag) {
@@ -1018,8 +1021,8 @@ void Basis::set_diagonal(const Vector3 &p_diag) {
 
 Basis Basis::slerp(const Basis &p_to, const real_t &p_weight) const {
 	//consider scale
-	Quat from(*this);
-	Quat to(p_to);
+	Quaternion from(*this);
+	Quaternion to(p_to);
 
 	Basis b(from.slerp(to, p_weight));
 	b.elements[0] *= Math::lerp(elements[0].length(), p_to.elements[0].length(), p_weight);
@@ -1138,3 +1141,21 @@ void Basis::rotate_sh(real_t *p_values) {
 	p_values[7] = -d3;
 	p_values[8] = d4 * s_scale_dst4;
 }
+
+Basis Basis::looking_at(const Vector3 &p_target, const Vector3 &p_up) {
+#ifdef MATH_CHECKS
+	ERR_FAIL_COND_V_MSG(p_target.is_equal_approx(Vector3()), Basis(), "The target vector can't be zero.");
+	ERR_FAIL_COND_V_MSG(p_up.is_equal_approx(Vector3()), Basis(), "The up vector can't be zero.");
+#endif
+	Vector3 v_z = -p_target.normalized();
+	Vector3 v_x = p_up.cross(v_z);
+#ifdef MATH_CHECKS
+	ERR_FAIL_COND_V_MSG(v_x.is_equal_approx(Vector3()), Basis(), "The target vector and up vector can't be parallel to each other.");
+#endif
+	v_x.normalize();
+	Vector3 v_y = v_z.cross(v_x);
+
+	Basis basis;
+	basis.set(v_x, v_y, v_z);
+	return basis;
+}
diff --git a/core/math/basis.h b/core/math/basis.h
index 56f6227313..9d8ed16e29 100644
--- a/core/math/basis.h
+++ b/core/math/basis.h
@@ -31,7 +31,7 @@
 #ifndef BASIS_H
 #define BASIS_H
 
-#include "core/math/quat.h"
+#include "core/math/quaternion.h"
 #include "core/math/vector3.h"
 
 class Basis {
@@ -79,15 +79,17 @@ public:
 	void rotate(const Vector3 &p_euler);
 	Basis rotated(const Vector3 &p_euler) const;
 
-	void rotate(const Quat &p_quat);
-	Basis rotated(const Quat &p_quat) const;
+	void rotate(const Quaternion &p_quaternion);
+	Basis rotated(const Quaternion &p_quaternion) const;
 
 	Vector3 get_rotation_euler() const;
 	void get_rotation_axis_angle(Vector3 &p_axis, real_t &p_angle) const;
 	void get_rotation_axis_angle_local(Vector3 &p_axis, real_t &p_angle) const;
-	Quat get_rotation_quat() const;
+	Quaternion get_rotation_quaternion() const;
 	Vector3 get_rotation() const { return get_rotation_euler(); };
 
+	void rotate_to_align(Vector3 p_start_direction, Vector3 p_end_direction);
+
 	Vector3 rotref_posscale_decomposition(Basis &rotref) const;
 
 	Vector3 get_euler_xyz() const;
@@ -108,8 +110,8 @@ public:
 	Vector3 get_euler_zyx() const;
 	void set_euler_zyx(const Vector3 &p_euler);
 
-	Quat get_quat() const;
-	void set_quat(const Quat &p_quat);
+	Quaternion get_quaternion() const;
+	void set_quaternion(const Quaternion &p_quaternion);
 
 	Vector3 get_euler() const { return get_euler_yxz(); }
 	void set_euler(const Vector3 &p_euler) { set_euler_yxz(p_euler); }
@@ -132,7 +134,7 @@ public:
 
 	void set_axis_angle_scale(const Vector3 &p_axis, real_t p_phi, const Vector3 &p_scale);
 	void set_euler_scale(const Vector3 &p_euler, const Vector3 &p_scale);
-	void set_quat_scale(const Quat &p_quat, const Vector3 &p_scale);
+	void set_quaternion_scale(const Quaternion &p_quaternion, const Vector3 &p_scale);
 
 	// transposed dot products
 	_FORCE_INLINE_ real_t tdotx(const Vector3 &v) const {
@@ -158,8 +160,8 @@ public:
 	_FORCE_INLINE_ Basis operator+(const Basis &p_matrix) const;
 	_FORCE_INLINE_ void operator-=(const Basis &p_matrix);
 	_FORCE_INLINE_ Basis operator-(const Basis &p_matrix) const;
-	_FORCE_INLINE_ void operator*=(real_t p_val);
-	_FORCE_INLINE_ Basis operator*(real_t p_val) const;
+	_FORCE_INLINE_ void operator*=(const real_t p_val);
+	_FORCE_INLINE_ Basis operator*(const real_t p_val) const;
 
 	int get_orthogonal_index() const;
 	void set_orthogonal_index(int p_index);
@@ -240,10 +242,12 @@ public:
 #endif
 	Basis diagonalize();
 
-	operator Quat() const { return get_quat(); }
+	operator Quaternion() const { return get_quaternion(); }
+
+	static Basis looking_at(const Vector3 &p_target, const Vector3 &p_up = Vector3(0, 1, 0));
 
-	Basis(const Quat &p_quat) { set_quat(p_quat); };
-	Basis(const Quat &p_quat, const Vector3 &p_scale) { set_quat_scale(p_quat, p_scale); }
+	Basis(const Quaternion &p_quaternion) { set_quaternion(p_quaternion); };
+	Basis(const Quaternion &p_quaternion, const Vector3 &p_scale) { set_quaternion_scale(p_quaternion, p_scale); }
 
 	Basis(const Vector3 &p_euler) { set_euler(p_euler); }
 	Basis(const Vector3 &p_euler, const Vector3 &p_scale) { set_euler_scale(p_euler, p_scale); }
@@ -298,13 +302,13 @@ _FORCE_INLINE_ Basis Basis::operator-(const Basis &p_matrix) const {
 	return ret;
 }
 
-_FORCE_INLINE_ void Basis::operator*=(real_t p_val) {
+_FORCE_INLINE_ void Basis::operator*=(const real_t p_val) {
 	elements[0] *= p_val;
 	elements[1] *= p_val;
 	elements[2] *= p_val;
 }
 
-_FORCE_INLINE_ Basis Basis::operator*(real_t p_val) const {
+_FORCE_INLINE_ Basis Basis::operator*(const real_t p_val) const {
 	Basis ret(*this);
 	ret *= p_val;
 	return ret;
diff --git a/core/math/bvh.h b/core/math/bvh.h
new file mode 100644
index 0000000000..cefbc9b0db
--- /dev/null
+++ b/core/math/bvh.h
@@ -0,0 +1,695 @@
+/*************************************************************************/
+/*  bvh.h                                                                */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef BVH_H
+#define BVH_H
+
+// BVH
+// This class provides a wrapper around BVH tree, which contains most of the functionality
+// for a dynamic BVH with templated leaf size.
+// However BVH also adds facilities for pairing, to maintain compatibility with Godot 3.2.
+// Pairing is a collision pairing system, on top of the basic BVH.
+
+// Some notes on the use of BVH / Octree from Godot 3.2.
+// This is not well explained elsewhere.
+// The rendering tree mask and types that are sent to the BVH are NOT layer masks.
+// They are INSTANCE_TYPES (defined in visual_server.h), e.g. MESH, MULTIMESH, PARTICLES etc.
+// Thus the lights do no cull by layer mask in the BVH.
+
+// Layer masks are implemented in the renderers as a later step, and light_cull_mask appears to be
+// implemented in GLES3 but not GLES2. Layer masks are not yet implemented for directional lights.
+
+#include "bvh_tree.h"
+
+#define BVHTREE_CLASS BVH_Tree<T, 2, MAX_ITEMS, USE_PAIRS, Bounds, Point>
+
+template <class T, bool USE_PAIRS = false, int MAX_ITEMS = 32, class Bounds = AABB, class Point = Vector3>
+class BVH_Manager {
+public:
+	// note we are using uint32_t instead of BVHHandle, losing type safety, but this
+	// is for compatibility with octree
+	typedef void *(*PairCallback)(void *, uint32_t, T *, int, uint32_t, T *, int);
+	typedef void (*UnpairCallback)(void *, uint32_t, T *, int, uint32_t, T *, int, void *);
+
+	// these 2 are crucial for fine tuning, and can be applied manually
+	// see the variable declarations for more info.
+	void params_set_node_expansion(real_t p_value) {
+		if (p_value >= 0.0) {
+			tree._node_expansion = p_value;
+			tree._auto_node_expansion = false;
+		} else {
+			tree._auto_node_expansion = true;
+		}
+	}
+
+	void params_set_pairing_expansion(real_t p_value) {
+		if (p_value >= 0.0) {
+			tree._pairing_expansion = p_value;
+			tree._auto_pairing_expansion = false;
+		} else {
+			tree._auto_pairing_expansion = true;
+		}
+	}
+
+	void set_pair_callback(PairCallback p_callback, void *p_userdata) {
+		pair_callback = p_callback;
+		pair_callback_userdata = p_userdata;
+	}
+	void set_unpair_callback(UnpairCallback p_callback, void *p_userdata) {
+		unpair_callback = p_callback;
+		unpair_callback_userdata = p_userdata;
+	}
+
+	BVHHandle create(T *p_userdata, bool p_active, const Bounds &p_aabb = Bounds(), int p_subindex = 0, bool p_pairable = false, uint32_t p_pairable_type = 0, uint32_t p_pairable_mask = 1) {
+		// not sure if absolutely necessary to flush collisions here. It will cost performance to, instead
+		// of waiting for update, so only uncomment this if there are bugs.
+		if (USE_PAIRS) {
+			//_check_for_collisions();
+		}
+
+#ifdef TOOLS_ENABLED
+		if (!USE_PAIRS) {
+			if (p_pairable) {
+				WARN_PRINT_ONCE("creating pairable item in BVH with USE_PAIRS set to false");
+			}
+		}
+#endif
+
+		BVHHandle h = tree.item_add(p_userdata, p_active, p_aabb, p_subindex, p_pairable, p_pairable_type, p_pairable_mask);
+
+		if (USE_PAIRS) {
+			// for safety initialize the expanded AABB
+			Bounds &expanded_aabb = tree._pairs[h.id()].expanded_aabb;
+			expanded_aabb = p_aabb;
+			expanded_aabb.grow_by(tree._pairing_expansion);
+
+			// force a collision check no matter the AABB
+			if (p_active) {
+				_add_changed_item(h, p_aabb, false);
+				_check_for_collisions(true);
+			}
+		}
+
+		return h;
+	}
+
+	////////////////////////////////////////////////////
+	// wrapper versions that use uint32_t instead of handle
+	// for backward compatibility. Less type safe
+	void move(uint32_t p_handle, const Bounds &p_aabb) {
+		BVHHandle h;
+		h.set(p_handle);
+		move(h, p_aabb);
+	}
+
+	void erase(uint32_t p_handle) {
+		BVHHandle h;
+		h.set(p_handle);
+		erase(h);
+	}
+
+	void force_collision_check(uint32_t p_handle) {
+		BVHHandle h;
+		h.set(p_handle);
+		force_collision_check(h);
+	}
+
+	bool activate(uint32_t p_handle, const Bounds &p_aabb, bool p_delay_collision_check = false) {
+		BVHHandle h;
+		h.set(p_handle);
+		return activate(h, p_aabb, p_delay_collision_check);
+	}
+
+	bool deactivate(uint32_t p_handle) {
+		BVHHandle h;
+		h.set(p_handle);
+		return deactivate(h);
+	}
+
+	void set_pairable(uint32_t p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask, bool p_force_collision_check = true) {
+		BVHHandle h;
+		h.set(p_handle);
+		set_pairable(h, p_pairable, p_pairable_type, p_pairable_mask, p_force_collision_check);
+	}
+
+	bool is_pairable(uint32_t p_handle) const {
+		BVHHandle h;
+		h.set(p_handle);
+		return item_is_pairable(h);
+	}
+	int get_subindex(uint32_t p_handle) const {
+		BVHHandle h;
+		h.set(p_handle);
+		return item_get_subindex(h);
+	}
+
+	T *get(uint32_t p_handle) const {
+		BVHHandle h;
+		h.set(p_handle);
+		return item_get_userdata(h);
+	}
+
+	////////////////////////////////////////////////////
+
+	void move(BVHHandle p_handle, const Bounds &p_aabb) {
+		if (tree.item_move(p_handle, p_aabb)) {
+			if (USE_PAIRS) {
+				_add_changed_item(p_handle, p_aabb);
+			}
+		}
+	}
+
+	void erase(BVHHandle p_handle) {
+		// call unpair and remove all references to the item
+		// before deleting from the tree
+		if (USE_PAIRS) {
+			_remove_changed_item(p_handle);
+		}
+
+		tree.item_remove(p_handle);
+
+		_check_for_collisions(true);
+	}
+
+	// use in conjunction with activate if you have deferred the collision check, and
+	// set pairable has never been called.
+	// (deferred collision checks are a workaround for visual server for historical reasons)
+	void force_collision_check(BVHHandle p_handle) {
+		if (USE_PAIRS) {
+			// the aabb should already be up to date in the BVH
+			Bounds aabb;
+			item_get_AABB(p_handle, aabb);
+
+			// add it as changed even if aabb not different
+			_add_changed_item(p_handle, aabb, false);
+
+			// force an immediate full collision check, much like calls to set_pairable
+			_check_for_collisions(true);
+		}
+	}
+
+	// these should be read as set_visible for render trees,
+	// but generically this makes items add or remove from the
+	// tree internally, to speed things up by ignoring inactive items
+	bool activate(BVHHandle p_handle, const Bounds &p_aabb, bool p_delay_collision_check = false) {
+		// sending the aabb here prevents the need for the BVH to maintain
+		// a redundant copy of the aabb.
+		// returns success
+		if (tree.item_activate(p_handle, p_aabb)) {
+			if (USE_PAIRS) {
+				// in the special case of the render tree, when setting visibility we are using the combination of
+				// activate then set_pairable. This would case 2 sets of collision checks. For efficiency here we allow
+				// deferring to have a single collision check at the set_pairable call.
+				// Watch for bugs! This may cause bugs if set_pairable is not called.
+				if (!p_delay_collision_check) {
+					_add_changed_item(p_handle, p_aabb, false);
+
+					// force an immediate collision check, much like calls to set_pairable
+					_check_for_collisions(true);
+				}
+			}
+			return true;
+		}
+
+		return false;
+	}
+
+	bool deactivate(BVHHandle p_handle) {
+		// returns success
+		if (tree.item_deactivate(p_handle)) {
+			// call unpair and remove all references to the item
+			// before deleting from the tree
+			if (USE_PAIRS) {
+				_remove_changed_item(p_handle);
+
+				// force check for collisions, much like an erase was called
+				_check_for_collisions(true);
+			}
+			return true;
+		}
+
+		return false;
+	}
+
+	bool get_active(BVHHandle p_handle) const {
+		return tree.item_get_active(p_handle);
+	}
+
+	// call e.g. once per frame (this does a trickle optimize)
+	void update() {
+		tree.update();
+		_check_for_collisions();
+#ifdef BVH_INTEGRITY_CHECKS
+		tree.integrity_check_all();
+#endif
+	}
+
+	// this can be called more frequently than per frame if necessary
+	void update_collisions() {
+		_check_for_collisions();
+	}
+
+	// prefer calling this directly as type safe
+	void set_pairable(const BVHHandle &p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask, bool p_force_collision_check = true) {
+		// Returns true if the pairing state has changed.
+		bool state_changed = tree.item_set_pairable(p_handle, p_pairable, p_pairable_type, p_pairable_mask);
+
+		if (USE_PAIRS) {
+			// not sure if absolutely necessary to flush collisions here. It will cost performance to, instead
+			// of waiting for update, so only uncomment this if there are bugs.
+			//_check_for_collisions();
+
+			if ((p_force_collision_check || state_changed) && get_active(p_handle)) {
+				// when the pairable state changes, we need to force a collision check because newly pairable
+				// items may be in collision, and unpairable items might move out of collision.
+				// We cannot depend on waiting for the next update, because that may come much later.
+				Bounds aabb;
+				item_get_AABB(p_handle, aabb);
+
+				// passing false disables the optimization which prevents collision checks if
+				// the aabb hasn't changed
+				_add_changed_item(p_handle, aabb, false);
+
+				// force an immediate collision check (probably just for this one item)
+				// but it must be a FULL collision check, also checking pairable state and masks.
+				// This is because AABB intersecting objects may have changed pairable state / mask
+				// such that they should no longer be paired. E.g. lights.
+				_check_for_collisions(true);
+			} // only if active
+		}
+	}
+
+	// cull tests
+	int cull_aabb(const Bounds &p_aabb, T **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) {
+		typename BVHTREE_CLASS::CullParams params;
+
+		params.result_count_overall = 0;
+		params.result_max = p_result_max;
+		params.result_array = p_result_array;
+		params.subindex_array = p_subindex_array;
+		params.mask = p_mask;
+		params.pairable_type = 0;
+		params.test_pairable_only = false;
+		params.abb.from(p_aabb);
+
+		tree.cull_aabb(params);
+
+		return params.result_count_overall;
+	}
+
+	int cull_segment(const Point &p_from, const Point &p_to, T **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) {
+		typename BVHTREE_CLASS::CullParams params;
+
+		params.result_count_overall = 0;
+		params.result_max = p_result_max;
+		params.result_array = p_result_array;
+		params.subindex_array = p_subindex_array;
+		params.mask = p_mask;
+		params.pairable_type = 0;
+
+		params.segment.from = p_from;
+		params.segment.to = p_to;
+
+		tree.cull_segment(params);
+
+		return params.result_count_overall;
+	}
+
+	int cull_point(const Point &p_point, T **p_result_array, int p_result_max, int *p_subindex_array = nullptr, uint32_t p_mask = 0xFFFFFFFF) {
+		typename BVHTREE_CLASS::CullParams params;
+
+		params.result_count_overall = 0;
+		params.result_max = p_result_max;
+		params.result_array = p_result_array;
+		params.subindex_array = p_subindex_array;
+		params.mask = p_mask;
+		params.pairable_type = 0;
+
+		params.point = p_point;
+
+		tree.cull_point(params);
+		return params.result_count_overall;
+	}
+
+	int cull_convex(const Vector<Plane> &p_convex, T **p_result_array, int p_result_max, uint32_t p_mask = 0xFFFFFFFF) {
+		if (!p_convex.size()) {
+			return 0;
+		}
+
+		Vector<Vector3> convex_points = Geometry3D::compute_convex_mesh_points(&p_convex[0], p_convex.size());
+		if (convex_points.size() == 0) {
+			return 0;
+		}
+
+		typename BVHTREE_CLASS::CullParams params;
+		params.result_count_overall = 0;
+		params.result_max = p_result_max;
+		params.result_array = p_result_array;
+		params.subindex_array = nullptr;
+		params.mask = p_mask;
+		params.pairable_type = 0;
+
+		params.hull.planes = &p_convex[0];
+		params.hull.num_planes = p_convex.size();
+		params.hull.points = &convex_points[0];
+		params.hull.num_points = convex_points.size();
+
+		tree.cull_convex(params);
+
+		return params.result_count_overall;
+	}
+
+private:
+	// do this after moving etc.
+	void _check_for_collisions(bool p_full_check = false) {
+		if (!changed_items.size()) {
+			// noop
+			return;
+		}
+
+		Bounds bb;
+
+		typename BVHTREE_CLASS::CullParams params;
+
+		params.result_count_overall = 0;
+		params.result_max = INT_MAX;
+		params.result_array = nullptr;
+		params.subindex_array = nullptr;
+		params.mask = 0xFFFFFFFF;
+		params.pairable_type = 0;
+
+		for (unsigned int n = 0; n < changed_items.size(); n++) {
+			const BVHHandle &h = changed_items[n];
+
+			// use the expanded aabb for pairing
+			const Bounds &expanded_aabb = tree._pairs[h.id()].expanded_aabb;
+			BVHABB_CLASS abb;
+			abb.from(expanded_aabb);
+
+			// find all the existing paired aabbs that are no longer
+			// paired, and send callbacks
+			_find_leavers(h, abb, p_full_check);
+
+			uint32_t changed_item_ref_id = h.id();
+
+			// set up the test from this item.
+			// this includes whether to test the non pairable tree,
+			// and the item mask.
+			tree.item_fill_cullparams(h, params);
+
+			params.abb = abb;
+
+			params.result_count_overall = 0; // might not be needed
+			tree.cull_aabb(params, false);
+
+			for (unsigned int i = 0; i < tree._cull_hits.size(); i++) {
+				uint32_t ref_id = tree._cull_hits[i];
+
+				// don't collide against ourself
+				if (ref_id == changed_item_ref_id) {
+					continue;
+				}
+
+#ifdef BVH_CHECKS
+				// if neither are pairable, they should ignore each other
+				// THIS SHOULD NEVER HAPPEN .. now we only test the pairable tree
+				// if the changed item is not pairable
+				CRASH_COND(params.test_pairable_only && !tree._extra[ref_id].pairable);
+#endif
+
+				// checkmasks is already done in the cull routine.
+				BVHHandle h_collidee;
+				h_collidee.set_id(ref_id);
+
+				// find NEW enterers, and send callbacks for them only
+				_collide(h, h_collidee);
+			}
+		}
+		_reset();
+	}
+
+public:
+	void item_get_AABB(BVHHandle p_handle, Bounds &r_aabb) {
+		BVHABB_CLASS abb;
+		tree.item_get_ABB(p_handle, abb);
+		abb.to(r_aabb);
+	}
+
+private:
+	// supplemental funcs
+	bool item_is_pairable(BVHHandle p_handle) const { return _get_extra(p_handle).pairable; }
+	T *item_get_userdata(BVHHandle p_handle) const { return _get_extra(p_handle).userdata; }
+	int item_get_subindex(BVHHandle p_handle) const { return _get_extra(p_handle).subindex; }
+
+	void _unpair(BVHHandle p_from, BVHHandle p_to) {
+		tree._handle_sort(p_from, p_to);
+
+		typename BVHTREE_CLASS::ItemExtra &exa = tree._extra[p_from.id()];
+		typename BVHTREE_CLASS::ItemExtra &exb = tree._extra[p_to.id()];
+
+		// if the userdata is the same, no collisions should occur
+		if ((exa.userdata == exb.userdata) && exa.userdata) {
+			return;
+		}
+
+		typename BVHTREE_CLASS::ItemPairs &pairs_from = tree._pairs[p_from.id()];
+		typename BVHTREE_CLASS::ItemPairs &pairs_to = tree._pairs[p_to.id()];
+
+		void *ud_from = pairs_from.remove_pair_to(p_to);
+		pairs_to.remove_pair_to(p_from);
+
+		// callback
+		if (unpair_callback) {
+			unpair_callback(pair_callback_userdata, p_from, exa.userdata, exa.subindex, p_to, exb.userdata, exb.subindex, ud_from);
+		}
+	}
+
+	// returns true if unpair
+	bool _find_leavers_process_pair(typename BVHTREE_CLASS::ItemPairs &p_pairs_from, const BVHABB_CLASS &p_abb_from, BVHHandle p_from, BVHHandle p_to, bool p_full_check) {
+		BVHABB_CLASS abb_to;
+		tree.item_get_ABB(p_to, abb_to);
+
+		// do they overlap?
+		if (p_abb_from.intersects(abb_to)) {
+			// the full check for pairable / non pairable and mask changes is extra expense
+			// this need not be done in most cases (for speed) except in the case where set_pairable is called
+			// where the masks etc of the objects in question may have changed
+			if (!p_full_check) {
+				return false;
+			}
+			const typename BVHTREE_CLASS::ItemExtra &exa = _get_extra(p_from);
+			const typename BVHTREE_CLASS::ItemExtra &exb = _get_extra(p_to);
+
+			// one of the two must be pairable to still pair
+			// if neither are pairable, we always unpair
+			if (exa.pairable || exb.pairable) {
+				// the masks must still be compatible to pair
+				// i.e. if there is a hit between the two, then they should stay paired
+				if (tree._cull_pairing_mask_test_hit(exa.pairable_mask, exa.pairable_type, exb.pairable_mask, exb.pairable_type)) {
+					return false;
+				}
+			}
+		}
+
+		_unpair(p_from, p_to);
+		return true;
+	}
+
+	// find all the existing paired aabbs that are no longer
+	// paired, and send callbacks
+	void _find_leavers(BVHHandle p_handle, const BVHABB_CLASS &expanded_abb_from, bool p_full_check) {
+		typename BVHTREE_CLASS::ItemPairs &p_from = tree._pairs[p_handle.id()];
+
+		BVHABB_CLASS abb_from = expanded_abb_from;
+
+		// remove from pairing list for every partner
+		for (unsigned int n = 0; n < p_from.extended_pairs.size(); n++) {
+			BVHHandle h_to = p_from.extended_pairs[n].handle;
+			if (_find_leavers_process_pair(p_from, abb_from, p_handle, h_to, p_full_check)) {
+				// we need to keep the counter n up to date if we deleted a pair
+				// as the number of items in p_from.extended_pairs will have decreased by 1
+				// and we don't want to miss an item
+				n--;
+			}
+		}
+	}
+
+	// find NEW enterers, and send callbacks for them only
+	// handle a and b
+	void _collide(BVHHandle p_ha, BVHHandle p_hb) {
+		// only have to do this oneway, lower ID then higher ID
+		tree._handle_sort(p_ha, p_hb);
+
+		const typename BVHTREE_CLASS::ItemExtra &exa = _get_extra(p_ha);
+		const typename BVHTREE_CLASS::ItemExtra &exb = _get_extra(p_hb);
+
+		// if the userdata is the same, no collisions should occur
+		if ((exa.userdata == exb.userdata) && exa.userdata) {
+			return;
+		}
+
+		typename BVHTREE_CLASS::ItemPairs &p_from = tree._pairs[p_ha.id()];
+		typename BVHTREE_CLASS::ItemPairs &p_to = tree._pairs[p_hb.id()];
+
+		// does this pair exist already?
+		// or only check the one with lower number of pairs for greater speed
+		if (p_from.num_pairs <= p_to.num_pairs) {
+			if (p_from.contains_pair_to(p_hb)) {
+				return;
+			}
+		} else {
+			if (p_to.contains_pair_to(p_ha)) {
+				return;
+			}
+		}
+
+		// callback
+		void *callback_userdata = nullptr;
+
+		if (pair_callback) {
+			callback_userdata = pair_callback(pair_callback_userdata, p_ha, exa.userdata, exa.subindex, p_hb, exb.userdata, exb.subindex);
+		}
+
+		// new pair! .. only really need to store the userdata on the lower handle, but both have storage so...
+		p_from.add_pair_to(p_hb, callback_userdata);
+		p_to.add_pair_to(p_ha, callback_userdata);
+	}
+
+	// if we remove an item, we need to immediately remove the pairs, to prevent reading the pair after deletion
+	void _remove_pairs_containing(BVHHandle p_handle) {
+		typename BVHTREE_CLASS::ItemPairs &p_from = tree._pairs[p_handle.id()];
+
+		// remove from pairing list for every partner.
+		// can't easily use a for loop here, because removing changes the size of the list
+		while (p_from.extended_pairs.size()) {
+			BVHHandle h_to = p_from.extended_pairs[0].handle;
+			_unpair(p_handle, h_to);
+		}
+	}
+
+private:
+	const typename BVHTREE_CLASS::ItemExtra &_get_extra(BVHHandle p_handle) const {
+		return tree._extra[p_handle.id()];
+	}
+	const typename BVHTREE_CLASS::ItemRef &_get_ref(BVHHandle p_handle) const {
+		return tree._refs[p_handle.id()];
+	}
+
+	void _reset() {
+		changed_items.clear();
+		_tick++;
+	}
+
+	void _add_changed_item(BVHHandle p_handle, const Bounds &aabb, bool p_check_aabb = true) {
+		// Note that non pairable items can pair with pairable,
+		// so all types must be added to the list
+
+		// aabb check with expanded aabb. This greatly decreases processing
+		// at the cost of slightly less accurate pairing checks
+		// Note this pairing AABB is separate from the AABB in the actual tree
+		Bounds &expanded_aabb = tree._pairs[p_handle.id()].expanded_aabb;
+
+		// passing p_check_aabb false disables the optimization which prevents collision checks if
+		// the aabb hasn't changed. This is needed where set_pairable has been called, but the position
+		// has not changed.
+		if (p_check_aabb && expanded_aabb.encloses(aabb)) {
+			return;
+		}
+
+		// ALWAYS update the new expanded aabb, even if already changed once
+		// this tick, because it is vital that the AABB is kept up to date
+		expanded_aabb = aabb;
+		expanded_aabb.grow_by(tree._pairing_expansion);
+
+		// this code is to ensure that changed items only appear once on the updated list
+		// collision checking them multiple times is not needed, and repeats the same thing
+		uint32_t &last_updated_tick = tree._extra[p_handle.id()].last_updated_tick;
+
+		if (last_updated_tick == _tick) {
+			return; // already on changed list
+		}
+
+		// mark as on list
+		last_updated_tick = _tick;
+
+		// add to the list
+		changed_items.push_back(p_handle);
+	}
+
+	void _remove_changed_item(BVHHandle p_handle) {
+		// Care has to be taken here for items that are deleted. The ref ID
+		// could be reused on the same tick for new items. This is probably
+		// rare but should be taken into consideration
+
+		// callbacks
+		_remove_pairs_containing(p_handle);
+
+		// remove from changed items (not very efficient yet)
+		for (int n = 0; n < (int)changed_items.size(); n++) {
+			if (changed_items[n] == p_handle) {
+				changed_items.remove_unordered(n);
+
+				// because we are using an unordered remove,
+				// the last changed item will now be at spot 'n',
+				// and we need to redo it, so we prevent moving on to
+				// the next n at the next for iteration.
+				n--;
+			}
+		}
+
+		// reset the last updated tick (may not be necessary but just in case)
+		tree._extra[p_handle.id()].last_updated_tick = 0;
+	}
+
+	PairCallback pair_callback;
+	UnpairCallback unpair_callback;
+	void *pair_callback_userdata;
+	void *unpair_callback_userdata;
+
+	BVHTREE_CLASS tree;
+
+	// for collision pairing,
+	// maintain a list of all items moved etc on each frame / tick
+	LocalVector<BVHHandle, uint32_t, true> changed_items;
+	uint32_t _tick;
+
+public:
+	BVH_Manager() {
+		_tick = 1; // start from 1 so items with 0 indicate never updated
+		pair_callback = nullptr;
+		unpair_callback = nullptr;
+		pair_callback_userdata = nullptr;
+		unpair_callback_userdata = nullptr;
+	}
+};
+
+#undef BVHTREE_CLASS
+
+#endif // BVH_H
diff --git a/core/math/bvh_abb.h b/core/math/bvh_abb.h
new file mode 100644
index 0000000000..bd9a01a87e
--- /dev/null
+++ b/core/math/bvh_abb.h
@@ -0,0 +1,276 @@
+/*************************************************************************/
+/*  bvh_abb.h                                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef BVH_ABB_H
+#define BVH_ABB_H
+
+// special optimized version of axis aligned bounding box
+template <class Bounds = AABB, class Point = Vector3>
+struct BVH_ABB {
+	struct ConvexHull {
+		// convex hulls (optional)
+		const Plane *planes;
+		int num_planes;
+		const Vector3 *points;
+		int num_points;
+	};
+
+	struct Segment {
+		Point from;
+		Point to;
+	};
+
+	enum IntersectResult {
+		IR_MISS = 0,
+		IR_PARTIAL,
+		IR_FULL,
+	};
+
+	// we store mins with a negative value in order to test them with SIMD
+	Point min;
+	Point neg_max;
+
+	bool operator==(const BVH_ABB &o) const { return (min == o.min) && (neg_max == o.neg_max); }
+	bool operator!=(const BVH_ABB &o) const { return (*this == o) == false; }
+
+	void set(const Point &_min, const Point &_max) {
+		min = _min;
+		neg_max = -_max;
+	}
+
+	// to and from standard AABB
+	void from(const Bounds &p_aabb) {
+		min = p_aabb.position;
+		neg_max = -(p_aabb.position + p_aabb.size);
+	}
+
+	void to(Bounds &r_aabb) const {
+		r_aabb.position = min;
+		r_aabb.size = calculate_size();
+	}
+
+	void merge(const BVH_ABB &p_o) {
+		for (int axis = 0; axis < Point::AXIS_COUNT; ++axis) {
+			neg_max[axis] = MIN(neg_max[axis], p_o.neg_max[axis]);
+			min[axis] = MIN(min[axis], p_o.min[axis]);
+		}
+	}
+
+	Point calculate_size() const {
+		return -neg_max - min;
+	}
+
+	Point calculate_centre() const {
+		return Point((calculate_size() * 0.5) + min);
+	}
+
+	real_t get_proximity_to(const BVH_ABB &p_b) const {
+		const Point d = (min - neg_max) - (p_b.min - p_b.neg_max);
+		real_t proximity = 0.0;
+		for (int axis = 0; axis < Point::AXIS_COUNT; ++axis) {
+			proximity += Math::abs(d[axis]);
+		}
+		return proximity;
+	}
+
+	int select_by_proximity(const BVH_ABB &p_a, const BVH_ABB &p_b) const {
+		return (get_proximity_to(p_a) < get_proximity_to(p_b) ? 0 : 1);
+	}
+
+	uint32_t find_cutting_planes(const BVH_ABB::ConvexHull &p_hull, uint32_t *p_plane_ids) const {
+		uint32_t count = 0;
+
+		for (int n = 0; n < p_hull.num_planes; n++) {
+			const Plane &p = p_hull.planes[n];
+			if (intersects_plane(p)) {
+				p_plane_ids[count++] = n;
+			}
+		}
+
+		return count;
+	}
+
+	bool intersects_plane(const Plane &p_p) const {
+		Vector3 size = calculate_size();
+		Vector3 half_extents = size * 0.5;
+		Vector3 ofs = min + half_extents;
+
+		// forward side of plane?
+		Vector3 point_offset(
+				(p_p.normal.x < 0) ? -half_extents.x : half_extents.x,
+				(p_p.normal.y < 0) ? -half_extents.y : half_extents.y,
+				(p_p.normal.z < 0) ? -half_extents.z : half_extents.z);
+		Vector3 point = point_offset + ofs;
+
+		if (!p_p.is_point_over(point)) {
+			return false;
+		}
+
+		point = -point_offset + ofs;
+		if (p_p.is_point_over(point)) {
+			return false;
+		}
+
+		return true;
+	}
+
+	bool intersects_convex_optimized(const ConvexHull &p_hull, const uint32_t *p_plane_ids, uint32_t p_num_planes) const {
+		Vector3 size = calculate_size();
+		Vector3 half_extents = size * 0.5;
+		Vector3 ofs = min + half_extents;
+
+		for (unsigned int i = 0; i < p_num_planes; i++) {
+			const Plane &p = p_hull.planes[p_plane_ids[i]];
+			Vector3 point(
+					(p.normal.x > 0) ? -half_extents.x : half_extents.x,
+					(p.normal.y > 0) ? -half_extents.y : half_extents.y,
+					(p.normal.z > 0) ? -half_extents.z : half_extents.z);
+			point += ofs;
+			if (p.is_point_over(point)) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	bool intersects_convex_partial(const ConvexHull &p_hull) const {
+		Bounds bb;
+		to(bb);
+		return bb.intersects_convex_shape(p_hull.planes, p_hull.num_planes, p_hull.points, p_hull.num_points);
+	}
+
+	IntersectResult intersects_convex(const ConvexHull &p_hull) const {
+		if (intersects_convex_partial(p_hull)) {
+			// fully within? very important for tree checks
+			if (is_within_convex(p_hull)) {
+				return IR_FULL;
+			}
+
+			return IR_PARTIAL;
+		}
+
+		return IR_MISS;
+	}
+
+	bool is_within_convex(const ConvexHull &p_hull) const {
+		// use half extents routine
+		Bounds bb;
+		to(bb);
+		return bb.inside_convex_shape(p_hull.planes, p_hull.num_planes);
+	}
+
+	bool is_point_within_hull(const ConvexHull &p_hull, const Vector3 &p_pt) const {
+		for (int n = 0; n < p_hull.num_planes; n++) {
+			if (p_hull.planes[n].distance_to(p_pt) > 0.0f) {
+				return false;
+			}
+		}
+		return true;
+	}
+
+	bool intersects_segment(const Segment &p_s) const {
+		Bounds bb;
+		to(bb);
+		return bb.intersects_segment(p_s.from, p_s.to);
+	}
+
+	bool intersects_point(const Point &p_pt) const {
+		if (_any_lessthan(-p_pt, neg_max)) {
+			return false;
+		}
+		if (_any_lessthan(p_pt, min)) {
+			return false;
+		}
+		return true;
+	}
+
+	bool intersects(const BVH_ABB &p_o) const {
+		if (_any_morethan(p_o.min, -neg_max)) {
+			return false;
+		}
+		if (_any_morethan(min, -p_o.neg_max)) {
+			return false;
+		}
+		return true;
+	}
+
+	bool is_other_within(const BVH_ABB &p_o) const {
+		if (_any_lessthan(p_o.neg_max, neg_max)) {
+			return false;
+		}
+		if (_any_lessthan(p_o.min, min)) {
+			return false;
+		}
+		return true;
+	}
+
+	void grow(const Point &p_change) {
+		neg_max -= p_change;
+		min -= p_change;
+	}
+
+	void expand(real_t p_change) {
+		Point change;
+		change.set_all(p_change);
+		grow(change);
+	}
+
+	// Actually surface area metric.
+	float get_area() const {
+		Point d = calculate_size();
+		return 2.0f * (d.x * d.y + d.y * d.z + d.z * d.x);
+	}
+
+	void set_to_max_opposite_extents() {
+		neg_max.set_all(FLT_MAX);
+		min = neg_max;
+	}
+
+	bool _any_morethan(const Point &p_a, const Point &p_b) const {
+		for (int axis = 0; axis < Point::AXIS_COUNT; ++axis) {
+			if (p_a[axis] > p_b[axis]) {
+				return true;
+			}
+		}
+		return false;
+	}
+
+	bool _any_lessthan(const Point &p_a, const Point &p_b) const {
+		for (int axis = 0; axis < Point::AXIS_COUNT; ++axis) {
+			if (p_a[axis] < p_b[axis]) {
+				return true;
+			}
+		}
+		return false;
+	}
+};
+
+#endif // BVH_ABB_H
diff --git a/core/math/bvh_cull.inc b/core/math/bvh_cull.inc
new file mode 100644
index 0000000000..cba8ea6cb3
--- /dev/null
+++ b/core/math/bvh_cull.inc
@@ -0,0 +1,534 @@
+public:
+// cull parameters is a convenient way of passing a bunch
+// of arguments through the culling functions without
+// writing loads of code. Not all members are used for some cull checks
+struct CullParams {
+	int result_count_overall; // both trees
+	int result_count; // this tree only
+	int result_max;
+	T **result_array;
+	int *subindex_array;
+
+	// nobody truly understands how masks are intended to work.
+	uint32_t mask;
+	uint32_t pairable_type;
+
+	// optional components for different tests
+	Vector3 point;
+	BVHABB_CLASS abb;
+	typename BVHABB_CLASS::ConvexHull hull;
+	typename BVHABB_CLASS::Segment segment;
+
+	// when collision testing, non pairable moving items
+	// only need to be tested against the pairable tree.
+	// collisions with other non pairable items are irrelevant.
+	bool test_pairable_only;
+};
+
+private:
+void _cull_translate_hits(CullParams &p) {
+	int num_hits = _cull_hits.size();
+	int left = p.result_max - p.result_count_overall;
+
+	if (num_hits > left) {
+		num_hits = left;
+	}
+
+	int out_n = p.result_count_overall;
+
+	for (int n = 0; n < num_hits; n++) {
+		uint32_t ref_id = _cull_hits[n];
+
+		const ItemExtra &ex = _extra[ref_id];
+		p.result_array[out_n] = ex.userdata;
+
+		if (p.subindex_array) {
+			p.subindex_array[out_n] = ex.subindex;
+		}
+
+		out_n++;
+	}
+
+	p.result_count = num_hits;
+	p.result_count_overall += num_hits;
+}
+
+public:
+int cull_convex(CullParams &r_params, bool p_translate_hits = true) {
+	_cull_hits.clear();
+	r_params.result_count = 0;
+
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] == BVHCommon::INVALID) {
+			continue;
+		}
+
+		_cull_convex_iterative(_root_node_id[n], r_params);
+	}
+
+	if (p_translate_hits) {
+		_cull_translate_hits(r_params);
+	}
+
+	return r_params.result_count;
+}
+
+int cull_segment(CullParams &r_params, bool p_translate_hits = true) {
+	_cull_hits.clear();
+	r_params.result_count = 0;
+
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] == BVHCommon::INVALID) {
+			continue;
+		}
+
+		_cull_segment_iterative(_root_node_id[n], r_params);
+	}
+
+	if (p_translate_hits) {
+		_cull_translate_hits(r_params);
+	}
+
+	return r_params.result_count;
+}
+
+int cull_point(CullParams &r_params, bool p_translate_hits = true) {
+	_cull_hits.clear();
+	r_params.result_count = 0;
+
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] == BVHCommon::INVALID) {
+			continue;
+		}
+
+		_cull_point_iterative(_root_node_id[n], r_params);
+	}
+
+	if (p_translate_hits) {
+		_cull_translate_hits(r_params);
+	}
+
+	return r_params.result_count;
+}
+
+int cull_aabb(CullParams &r_params, bool p_translate_hits = true) {
+	_cull_hits.clear();
+	r_params.result_count = 0;
+
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] == BVHCommon::INVALID) {
+			continue;
+		}
+
+		if ((n == 0) && r_params.test_pairable_only) {
+			continue;
+		}
+
+		_cull_aabb_iterative(_root_node_id[n], r_params);
+	}
+
+	if (p_translate_hits) {
+		_cull_translate_hits(r_params);
+	}
+
+	return r_params.result_count;
+}
+
+bool _cull_hits_full(const CullParams &p) {
+	// instead of checking every hit, we can do a lazy check for this condition.
+	// it isn't a problem if we write too much _cull_hits because they only the
+	// result_max amount will be translated and outputted. But we might as
+	// well stop our cull checks after the maximum has been reached.
+	return (int)_cull_hits.size() >= p.result_max;
+}
+
+// write this logic once for use in all routines
+// double check this as a possible source of bugs in future.
+bool _cull_pairing_mask_test_hit(uint32_t p_maskA, uint32_t p_typeA, uint32_t p_maskB, uint32_t p_typeB) const {
+	// double check this as a possible source of bugs in future.
+	bool A_match_B = p_maskA & p_typeB;
+
+	if (!A_match_B) {
+		bool B_match_A = p_maskB & p_typeA;
+		if (!B_match_A) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+void _cull_hit(uint32_t p_ref_id, CullParams &p) {
+	// take into account masks etc
+	// this would be more efficient to do before plane checks,
+	// but done here for ease to get started
+	if (USE_PAIRS) {
+		const ItemExtra &ex = _extra[p_ref_id];
+
+		if (!_cull_pairing_mask_test_hit(p.mask, p.pairable_type, ex.pairable_mask, ex.pairable_type)) {
+			return;
+		}
+	}
+
+	_cull_hits.push_back(p_ref_id);
+}
+
+bool _cull_segment_iterative(uint32_t p_node_id, CullParams &r_params) {
+	// our function parameters to keep on a stack
+	struct CullSegParams {
+		uint32_t node_id;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<CullSegParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (CullSegParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+
+	CullSegParams csp;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(csp)) {
+		TNode &tnode = _nodes[csp.node_id];
+
+		if (tnode.is_leaf()) {
+			// lazy check for hits full up condition
+			if (_cull_hits_full(r_params)) {
+				return false;
+			}
+
+			TLeaf &leaf = _node_get_leaf(tnode);
+
+			// test children individually
+			for (int n = 0; n < leaf.num_items; n++) {
+				const BVHABB_CLASS &aabb = leaf.get_aabb(n);
+
+				if (aabb.intersects_segment(r_params.segment)) {
+					uint32_t child_id = leaf.get_item_ref_id(n);
+
+					// register hit
+					_cull_hit(child_id, r_params);
+				}
+			}
+		} else {
+			// test children individually
+			for (int n = 0; n < tnode.num_children; n++) {
+				uint32_t child_id = tnode.children[n];
+				const BVHABB_CLASS &child_abb = _nodes[child_id].aabb;
+
+				if (child_abb.intersects_segment(r_params.segment)) {
+					// add to the stack
+					CullSegParams *child = ii.request();
+					child->node_id = child_id;
+				}
+			}
+		}
+
+	} // while more nodes to pop
+
+	// true indicates results are not full
+	return true;
+}
+
+bool _cull_point_iterative(uint32_t p_node_id, CullParams &r_params) {
+	// our function parameters to keep on a stack
+	struct CullPointParams {
+		uint32_t node_id;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<CullPointParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (CullPointParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+
+	CullPointParams cpp;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(cpp)) {
+		TNode &tnode = _nodes[cpp.node_id];
+		// no hit with this node?
+		if (!tnode.aabb.intersects_point(r_params.point)) {
+			continue;
+		}
+
+		if (tnode.is_leaf()) {
+			// lazy check for hits full up condition
+			if (_cull_hits_full(r_params)) {
+				return false;
+			}
+
+			TLeaf &leaf = _node_get_leaf(tnode);
+
+			// test children individually
+			for (int n = 0; n < leaf.num_items; n++) {
+				if (leaf.get_aabb(n).intersects_point(r_params.point)) {
+					uint32_t child_id = leaf.get_item_ref_id(n);
+
+					// register hit
+					_cull_hit(child_id, r_params);
+				}
+			}
+		} else {
+			// test children individually
+			for (int n = 0; n < tnode.num_children; n++) {
+				uint32_t child_id = tnode.children[n];
+
+				// add to the stack
+				CullPointParams *child = ii.request();
+				child->node_id = child_id;
+			}
+		}
+
+	} // while more nodes to pop
+
+	// true indicates results are not full
+	return true;
+}
+
+bool _cull_aabb_iterative(uint32_t p_node_id, CullParams &r_params, bool p_fully_within = false) {
+	// our function parameters to keep on a stack
+	struct CullAABBParams {
+		uint32_t node_id;
+		bool fully_within;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<CullAABBParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (CullAABBParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+	ii.get_first()->fully_within = p_fully_within;
+
+	CullAABBParams cap;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(cap)) {
+		TNode &tnode = _nodes[cap.node_id];
+
+		if (tnode.is_leaf()) {
+			// lazy check for hits full up condition
+			if (_cull_hits_full(r_params)) {
+				return false;
+			}
+
+			TLeaf &leaf = _node_get_leaf(tnode);
+
+			// if fully within we can just add all items
+			// as long as they pass mask checks
+			if (cap.fully_within) {
+				for (int n = 0; n < leaf.num_items; n++) {
+					uint32_t child_id = leaf.get_item_ref_id(n);
+
+					// register hit
+					_cull_hit(child_id, r_params);
+				}
+			} else {
+				for (int n = 0; n < leaf.num_items; n++) {
+					const BVHABB_CLASS &aabb = leaf.get_aabb(n);
+
+					if (aabb.intersects(r_params.abb)) {
+						uint32_t child_id = leaf.get_item_ref_id(n);
+
+						// register hit
+						_cull_hit(child_id, r_params);
+					}
+				}
+			} // not fully within
+		} else {
+			if (!cap.fully_within) {
+				// test children individually
+				for (int n = 0; n < tnode.num_children; n++) {
+					uint32_t child_id = tnode.children[n];
+					const BVHABB_CLASS &child_abb = _nodes[child_id].aabb;
+
+					if (child_abb.intersects(r_params.abb)) {
+						// is the node totally within the aabb?
+						bool fully_within = r_params.abb.is_other_within(child_abb);
+
+						// add to the stack
+						CullAABBParams *child = ii.request();
+
+						// should always return valid child
+						child->node_id = child_id;
+						child->fully_within = fully_within;
+					}
+				}
+			} else {
+				for (int n = 0; n < tnode.num_children; n++) {
+					uint32_t child_id = tnode.children[n];
+
+					// add to the stack
+					CullAABBParams *child = ii.request();
+
+					// should always return valid child
+					child->node_id = child_id;
+					child->fully_within = true;
+				}
+			}
+		}
+
+	} // while more nodes to pop
+
+	// true indicates results are not full
+	return true;
+}
+
+// returns full up with results
+bool _cull_convex_iterative(uint32_t p_node_id, CullParams &r_params, bool p_fully_within = false) {
+	// our function parameters to keep on a stack
+	struct CullConvexParams {
+		uint32_t node_id;
+		bool fully_within;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<CullConvexParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (CullConvexParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+	ii.get_first()->fully_within = p_fully_within;
+
+	// preallocate these as a once off to be reused
+	uint32_t max_planes = r_params.hull.num_planes;
+	uint32_t *plane_ids = (uint32_t *)alloca(sizeof(uint32_t) * max_planes);
+
+	CullConvexParams ccp;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(ccp)) {
+		const TNode &tnode = _nodes[ccp.node_id];
+
+		if (!ccp.fully_within) {
+			typename BVHABB_CLASS::IntersectResult res = tnode.aabb.intersects_convex(r_params.hull);
+
+			switch (res) {
+				default: {
+					continue; // miss, just move on to the next node in the stack
+				} break;
+				case BVHABB_CLASS::IR_PARTIAL: {
+				} break;
+				case BVHABB_CLASS::IR_FULL: {
+					ccp.fully_within = true;
+				} break;
+			}
+
+		} // if not fully within already
+
+		if (tnode.is_leaf()) {
+			// lazy check for hits full up condition
+			if (_cull_hits_full(r_params)) {
+				return false;
+			}
+
+			const TLeaf &leaf = _node_get_leaf(tnode);
+
+			// if fully within, simply add all items to the result
+			// (taking into account masks)
+			if (ccp.fully_within) {
+				for (int n = 0; n < leaf.num_items; n++) {
+					uint32_t child_id = leaf.get_item_ref_id(n);
+
+					// register hit
+					_cull_hit(child_id, r_params);
+				}
+
+			} else {
+				// we can either use a naive check of all the planes against the AABB,
+				// or an optimized check, which finds in advance which of the planes can possibly
+				// cut the AABB, and only tests those. This can be much faster.
+#define BVH_CONVEX_CULL_OPTIMIZED
+#ifdef BVH_CONVEX_CULL_OPTIMIZED
+				// first find which planes cut the aabb
+				uint32_t num_planes = tnode.aabb.find_cutting_planes(r_params.hull, plane_ids);
+				BVH_ASSERT(num_planes <= max_planes);
+
+//#define BVH_CONVEX_CULL_OPTIMIZED_RIGOR_CHECK
+#ifdef BVH_CONVEX_CULL_OPTIMIZED_RIGOR_CHECK
+				// rigorous check
+				uint32_t results[MAX_ITEMS];
+				uint32_t num_results = 0;
+#endif
+
+				// test children individually
+				for (int n = 0; n < leaf.num_items; n++) {
+					//const Item &item = leaf.get_item(n);
+					const BVHABB_CLASS &aabb = leaf.get_aabb(n);
+
+					if (aabb.intersects_convex_optimized(r_params.hull, plane_ids, num_planes)) {
+						uint32_t child_id = leaf.get_item_ref_id(n);
+
+#ifdef BVH_CONVEX_CULL_OPTIMIZED_RIGOR_CHECK
+						results[num_results++] = child_id;
+#endif
+
+						// register hit
+						_cull_hit(child_id, r_params);
+					}
+				}
+
+#ifdef BVH_CONVEX_CULL_OPTIMIZED_RIGOR_CHECK
+				uint32_t test_count = 0;
+
+				for (int n = 0; n < leaf.num_items; n++) {
+					const BVHABB_CLASS &aabb = leaf.get_aabb(n);
+
+					if (aabb.intersects_convex_partial(r_params.hull)) {
+						uint32_t child_id = leaf.get_item_ref_id(n);
+
+						CRASH_COND(child_id != results[test_count++]);
+						CRASH_COND(test_count > num_results);
+					}
+				}
+#endif
+
+#else
+				// not BVH_CONVEX_CULL_OPTIMIZED
+				// test children individually
+				for (int n = 0; n < leaf.num_items; n++) {
+					const BVHABB_CLASS &aabb = leaf.get_aabb(n);
+
+					if (aabb.intersects_convex_partial(r_params.hull)) {
+						uint32_t child_id = leaf.get_item_ref_id(n);
+
+						// full up with results? exit early, no point in further testing
+						if (!_cull_hit(child_id, r_params))
+							return false;
+					}
+				}
+#endif // BVH_CONVEX_CULL_OPTIMIZED
+			} // if not fully within
+		} else {
+			for (int n = 0; n < tnode.num_children; n++) {
+				uint32_t child_id = tnode.children[n];
+
+				// add to the stack
+				CullConvexParams *child = ii.request();
+
+				// should always return valid child
+				child->node_id = child_id;
+				child->fully_within = ccp.fully_within;
+			}
+		}
+
+	} // while more nodes to pop
+
+	// true indicates results are not full
+	return true;
+}
diff --git a/core/math/bvh_debug.inc b/core/math/bvh_debug.inc
new file mode 100644
index 0000000000..a97304334c
--- /dev/null
+++ b/core/math/bvh_debug.inc
@@ -0,0 +1,68 @@
+public:
+#ifdef BVH_VERBOSE
+void _debug_recursive_print_tree(int p_tree_id) const {
+	if (_root_node_id[p_tree_id] != BVHCommon::INVALID)
+		_debug_recursive_print_tree_node(_root_node_id[p_tree_id]);
+}
+
+String _debug_aabb_to_string(const BVHABB_CLASS &aabb) const {
+	String sz = "(";
+	sz += itos(aabb.min.x);
+	sz += " ~ ";
+	sz += itos(-aabb.neg_max.x);
+	sz += ") (";
+
+	sz += itos(aabb.min.y);
+	sz += " ~ ";
+	sz += itos(-aabb.neg_max.y);
+	sz += ") (";
+
+	sz += itos(aabb.min.z);
+	sz += " ~ ";
+	sz += itos(-aabb.neg_max.z);
+	sz += ") ";
+
+	Vector3 size = aabb.calculate_size();
+	float vol = size.x * size.y * size.z;
+	sz += "vol " + itos(vol);
+
+	return sz;
+}
+
+void _debug_recursive_print_tree_node(uint32_t p_node_id, int depth = 0) const {
+	const TNode &tnode = _nodes[p_node_id];
+
+	String sz = "";
+	for (int n = 0; n < depth; n++) {
+		sz += "\t";
+	}
+	sz += itos(p_node_id);
+
+	if (tnode.is_leaf()) {
+		sz += " L";
+		sz += itos(tnode.height) + " ";
+		const TLeaf &leaf = _node_get_leaf(tnode);
+
+		sz += "[";
+		for (int n = 0; n < leaf.num_items; n++) {
+			if (n)
+				sz += ", ";
+			sz += "r";
+			sz += itos(leaf.get_item_ref_id(n));
+		}
+		sz += "]  ";
+	} else {
+		sz += " N";
+		sz += itos(tnode.height) + " ";
+	}
+
+	sz += _debug_aabb_to_string(tnode.aabb);
+	print_line(sz);
+
+	if (!tnode.is_leaf()) {
+		for (int n = 0; n < tnode.num_children; n++) {
+			_debug_recursive_print_tree_node(tnode.children[n], depth + 1);
+		}
+	}
+}
+#endif
diff --git a/core/math/bvh_integrity.inc b/core/math/bvh_integrity.inc
new file mode 100644
index 0000000000..02e9d30097
--- /dev/null
+++ b/core/math/bvh_integrity.inc
@@ -0,0 +1,42 @@
+void _integrity_check_all() {
+#ifdef BVH_INTEGRITY_CHECKS
+	for (int n = 0; n < NUM_TREES; n++) {
+		uint32_t root = _root_node_id[n];
+		if (root != BVHCommon::INVALID) {
+			_integrity_check_down(root);
+		}
+	}
+#endif
+}
+
+void _integrity_check_up(uint32_t p_node_id) {
+	TNode &node = _nodes[p_node_id];
+
+	BVHABB_CLASS abb = node.aabb;
+	node_update_aabb(node);
+
+	BVHABB_CLASS abb2 = node.aabb;
+	abb2.expand(-_node_expansion);
+
+	CRASH_COND(!abb.is_other_within(abb2));
+}
+
+void _integrity_check_down(uint32_t p_node_id) {
+	const TNode &node = _nodes[p_node_id];
+
+	if (node.is_leaf()) {
+		_integrity_check_up(p_node_id);
+	} else {
+		CRASH_COND(node.num_children != 2);
+
+		for (int n = 0; n < node.num_children; n++) {
+			uint32_t child_id = node.children[n];
+
+			// check the children parent pointers are correct
+			TNode &child = _nodes[child_id];
+			CRASH_COND(child.parent_id != p_node_id);
+
+			_integrity_check_down(child_id);
+		}
+	}
+}
diff --git a/core/math/bvh_logic.inc b/core/math/bvh_logic.inc
new file mode 100644
index 0000000000..afab08f151
--- /dev/null
+++ b/core/math/bvh_logic.inc
@@ -0,0 +1,230 @@
+
+// for slow incremental optimization, we will periodically remove each
+// item from the tree and reinsert, to give it a chance to find a better position
+void _logic_item_remove_and_reinsert(uint32_t p_ref_id) {
+	// get the reference
+	ItemRef &ref = _refs[p_ref_id];
+
+	// no need to optimize inactive items
+	if (!ref.is_active()) {
+		return;
+	}
+
+	// special case of debug draw
+	if (ref.item_id == BVHCommon::INVALID) {
+		return;
+	}
+
+	BVH_ASSERT(ref.tnode_id != BVHCommon::INVALID);
+
+	// some overlay elaborate way to find out which tree the node is in!
+	BVHHandle temp_handle;
+	temp_handle.set_id(p_ref_id);
+	uint32_t tree_id = _handle_get_tree_id(temp_handle);
+
+	// remove and reinsert
+	BVHABB_CLASS abb;
+	node_remove_item(p_ref_id, tree_id, &abb);
+
+	// we must choose where to add to tree
+	ref.tnode_id = _logic_choose_item_add_node(_root_node_id[tree_id], abb);
+	_node_add_item(ref.tnode_id, p_ref_id, abb);
+
+	refit_upward_and_balance(ref.tnode_id, tree_id);
+}
+
+// from randy gaul balance function
+BVHABB_CLASS _logic_abb_merge(const BVHABB_CLASS &a, const BVHABB_CLASS &b) {
+	BVHABB_CLASS c = a;
+	c.merge(b);
+	return c;
+}
+
+//--------------------------------------------------------------------------------------------------
+/**
+@file	q3DynamicAABBTree.h
+@author	Randy Gaul
+@date	10/10/2014
+	Copyright (c) 2014 Randy Gaul http://www.randygaul.net
+	This software is provided 'as-is', without any express or implied
+	warranty. In no event will the authors be held liable for any damages
+	arising from the use of this software.
+	Permission is granted to anyone to use this software for any purpose,
+	including commercial applications, and to alter it and redistribute it
+	freely, subject to the following restrictions:
+	  1. The origin of this software must not be misrepresented; you must not
+	     claim that you wrote the original software. If you use this software
+	     in a product, an acknowledgment in the product documentation would be
+	     appreciated but is not required.
+	  2. Altered source versions must be plainly marked as such, and must not
+	     be misrepresented as being the original software.
+	  3. This notice may not be removed or altered from any source distribution.
+*/
+//--------------------------------------------------------------------------------------------------
+
+// This function is based on the 'Balance' function from Randy Gaul's qu3e
+// https://github.com/RandyGaul/qu3e
+// It is MODIFIED from qu3e version.
+// This is the only function used (and _logic_abb_merge helper function).
+int32_t _logic_balance(int32_t iA, uint32_t p_tree_id) {
+	//	return iA; // uncomment this to bypass balance
+
+	TNode *A = &_nodes[iA];
+
+	if (A->is_leaf() || A->height == 1) {
+		return iA;
+	}
+
+	/*      A
+	      /   \
+	     B     C
+	    / \   / \
+	   D   E F   G
+	*/
+
+	CRASH_COND(A->num_children != 2);
+	int32_t iB = A->children[0];
+	int32_t iC = A->children[1];
+	TNode *B = &_nodes[iB];
+	TNode *C = &_nodes[iC];
+
+	int32_t balance = C->height - B->height;
+
+	// C is higher, promote C
+	if (balance > 1) {
+		int32_t iF = C->children[0];
+		int32_t iG = C->children[1];
+		TNode *F = &_nodes[iF];
+		TNode *G = &_nodes[iG];
+
+		// grandParent point to C
+		if (A->parent_id != BVHCommon::INVALID) {
+			if (_nodes[A->parent_id].children[0] == iA) {
+				_nodes[A->parent_id].children[0] = iC;
+
+			} else {
+				_nodes[A->parent_id].children[1] = iC;
+			}
+		} else {
+			// check this .. seems dodgy
+			change_root_node(iC, p_tree_id);
+		}
+
+		// Swap A and C
+		C->children[0] = iA;
+		C->parent_id = A->parent_id;
+		A->parent_id = iC;
+
+		// Finish rotation
+		if (F->height > G->height) {
+			C->children[1] = iF;
+			A->children[1] = iG;
+			G->parent_id = iA;
+			A->aabb = _logic_abb_merge(B->aabb, G->aabb);
+			C->aabb = _logic_abb_merge(A->aabb, F->aabb);
+
+			A->height = 1 + MAX(B->height, G->height);
+			C->height = 1 + MAX(A->height, F->height);
+		}
+
+		else {
+			C->children[1] = iG;
+			A->children[1] = iF;
+			F->parent_id = iA;
+			A->aabb = _logic_abb_merge(B->aabb, F->aabb);
+			C->aabb = _logic_abb_merge(A->aabb, G->aabb);
+
+			A->height = 1 + MAX(B->height, F->height);
+			C->height = 1 + MAX(A->height, G->height);
+		}
+
+		return iC;
+	}
+
+	// B is higher, promote B
+	else if (balance < -1) {
+		int32_t iD = B->children[0];
+		int32_t iE = B->children[1];
+		TNode *D = &_nodes[iD];
+		TNode *E = &_nodes[iE];
+
+		// grandParent point to B
+		if (A->parent_id != BVHCommon::INVALID) {
+			if (_nodes[A->parent_id].children[0] == iA) {
+				_nodes[A->parent_id].children[0] = iB;
+			} else {
+				_nodes[A->parent_id].children[1] = iB;
+			}
+		}
+
+		else {
+			// check this .. seems dodgy
+			change_root_node(iB, p_tree_id);
+		}
+
+		// Swap A and B
+		B->children[1] = iA;
+		B->parent_id = A->parent_id;
+		A->parent_id = iB;
+
+		// Finish rotation
+		if (D->height > E->height) {
+			B->children[0] = iD;
+			A->children[0] = iE;
+			E->parent_id = iA;
+			A->aabb = _logic_abb_merge(C->aabb, E->aabb);
+			B->aabb = _logic_abb_merge(A->aabb, D->aabb);
+
+			A->height = 1 + MAX(C->height, E->height);
+			B->height = 1 + MAX(A->height, D->height);
+		}
+
+		else {
+			B->children[0] = iE;
+			A->children[0] = iD;
+			D->parent_id = iA;
+			A->aabb = _logic_abb_merge(C->aabb, D->aabb);
+			B->aabb = _logic_abb_merge(A->aabb, E->aabb);
+
+			A->height = 1 + MAX(C->height, D->height);
+			B->height = 1 + MAX(A->height, E->height);
+		}
+
+		return iB;
+	}
+
+	return iA;
+}
+
+// either choose an existing node to add item to, or create a new node and return this
+uint32_t _logic_choose_item_add_node(uint32_t p_node_id, const BVHABB_CLASS &p_aabb) {
+	while (true) {
+		BVH_ASSERT(p_node_id != BVHCommon::INVALID);
+		TNode &tnode = _nodes[p_node_id];
+
+		if (tnode.is_leaf()) {
+			// if a leaf, and non full, use this to add to
+			if (!node_is_leaf_full(tnode)) {
+				return p_node_id;
+			}
+
+			// else split the leaf, and use one of the children to add to
+			return split_leaf(p_node_id, p_aabb);
+		}
+
+		// this should not happen???
+		// is still happening, need to debug and find circumstances. Is not that serious
+		// but would be nice to prevent. I think it only happens with the root node.
+		if (tnode.num_children == 1) {
+			WARN_PRINT_ONCE("BVH::recursive_choose_item_add_node, node with 1 child, recovering");
+			p_node_id = tnode.children[0];
+		} else {
+			BVH_ASSERT(tnode.num_children == 2);
+			TNode &childA = _nodes[tnode.children[0]];
+			TNode &childB = _nodes[tnode.children[1]];
+			int which = p_aabb.select_by_proximity(childA.aabb, childB.aabb);
+
+			p_node_id = tnode.children[which];
+		}
+	}
+}
diff --git a/core/math/bvh_misc.inc b/core/math/bvh_misc.inc
new file mode 100644
index 0000000000..71aa0e4fe0
--- /dev/null
+++ b/core/math/bvh_misc.inc
@@ -0,0 +1,55 @@
+
+int _handle_get_tree_id(BVHHandle p_handle) const {
+	if (USE_PAIRS) {
+		int tree = 0;
+		if (_extra[p_handle.id()].pairable) {
+			tree = 1;
+		}
+		return tree;
+	}
+	return 0;
+}
+
+public:
+void _handle_sort(BVHHandle &p_ha, BVHHandle &p_hb) const {
+	if (p_ha.id() > p_hb.id()) {
+		BVHHandle temp = p_hb;
+		p_hb = p_ha;
+		p_ha = temp;
+	}
+}
+
+private:
+void create_root_node(int p_tree) {
+	// if there is no root node, create one
+	if (_root_node_id[p_tree] == BVHCommon::INVALID) {
+		uint32_t root_node_id;
+		TNode *node = _nodes.request(root_node_id);
+		node->clear();
+		_root_node_id[p_tree] = root_node_id;
+
+		// make the root node a leaf
+		uint32_t leaf_id;
+		TLeaf *leaf = _leaves.request(leaf_id);
+		leaf->clear();
+		node->neg_leaf_id = -(int)leaf_id;
+	}
+}
+
+bool node_is_leaf_full(TNode &tnode) const {
+	const TLeaf &leaf = _node_get_leaf(tnode);
+	return leaf.is_full();
+}
+
+public:
+TLeaf &_node_get_leaf(TNode &tnode) {
+	BVH_ASSERT(tnode.is_leaf());
+	return _leaves[tnode.get_leaf_id()];
+}
+
+const TLeaf &_node_get_leaf(const TNode &tnode) const {
+	BVH_ASSERT(tnode.is_leaf());
+	return _leaves[tnode.get_leaf_id()];
+}
+
+private:
diff --git a/core/math/bvh_pair.inc b/core/math/bvh_pair.inc
new file mode 100644
index 0000000000..839db59a3a
--- /dev/null
+++ b/core/math/bvh_pair.inc
@@ -0,0 +1,62 @@
+public:
+// note .. maybe this can be attached to another node structure?
+// depends which works best for cache.
+struct ItemPairs {
+	struct Link {
+		void set(BVHHandle h, void *ud) {
+			handle = h;
+			userdata = ud;
+		}
+		BVHHandle handle;
+		void *userdata;
+	};
+
+	void clear() {
+		num_pairs = 0;
+		extended_pairs.reset();
+		expanded_aabb = Bounds();
+	}
+
+	Bounds expanded_aabb;
+
+	// maybe we can just use the number in the vector TODO
+	int32_t num_pairs;
+	LocalVector<Link> extended_pairs;
+
+	void add_pair_to(BVHHandle h, void *p_userdata) {
+		Link temp;
+		temp.set(h, p_userdata);
+
+		extended_pairs.push_back(temp);
+		num_pairs++;
+	}
+
+	uint32_t find_pair_to(BVHHandle h) const {
+		for (int n = 0; n < num_pairs; n++) {
+			if (extended_pairs[n].handle == h) {
+				return n;
+			}
+		}
+		return -1;
+	}
+
+	bool contains_pair_to(BVHHandle h) const {
+		return find_pair_to(h) != BVHCommon::INVALID;
+	}
+
+	// return success
+	void *remove_pair_to(BVHHandle h) {
+		void *userdata = nullptr;
+
+		for (int n = 0; n < num_pairs; n++) {
+			if (extended_pairs[n].handle == h) {
+				userdata = extended_pairs[n].userdata;
+				extended_pairs.remove_unordered(n);
+				num_pairs--;
+				break;
+			}
+		}
+
+		return userdata;
+	}
+};
diff --git a/core/math/bvh_public.inc b/core/math/bvh_public.inc
new file mode 100644
index 0000000000..2c1e406712
--- /dev/null
+++ b/core/math/bvh_public.inc
@@ -0,0 +1,423 @@
+public:
+BVHHandle item_add(T *p_userdata, bool p_active, const Bounds &p_aabb, int32_t p_subindex, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask, bool p_invisible = false) {
+#ifdef BVH_VERBOSE_TREE
+	VERBOSE_PRINT("\nitem_add BEFORE");
+	_debug_recursive_print_tree(0);
+	VERBOSE_PRINT("\n");
+#endif
+
+	BVHABB_CLASS abb;
+	abb.from(p_aabb);
+
+	// handle to be filled with the new item ref
+	BVHHandle handle;
+
+	// ref id easier to pass around than handle
+	uint32_t ref_id;
+
+	// this should never fail
+	ItemRef *ref = _refs.request(ref_id);
+
+	// the extra data should be parallel list to the references
+	uint32_t extra_id;
+	ItemExtra *extra = _extra.request(extra_id);
+	BVH_ASSERT(extra_id == ref_id);
+
+	// pairs info
+	if (USE_PAIRS) {
+		uint32_t pairs_id;
+		ItemPairs *pairs = _pairs.request(pairs_id);
+		pairs->clear();
+		BVH_ASSERT(pairs_id == ref_id);
+	}
+
+	extra->subindex = p_subindex;
+	extra->userdata = p_userdata;
+	extra->last_updated_tick = 0;
+
+	// add an active reference to the list for slow incremental optimize
+	// this list must be kept in sync with the references as they are added or removed.
+	extra->active_ref_id = _active_refs.size();
+	_active_refs.push_back(ref_id);
+
+	if (USE_PAIRS) {
+		extra->pairable_mask = p_pairable_mask;
+		extra->pairable_type = p_pairable_type;
+		extra->pairable = p_pairable;
+	} else {
+		// just for safety, in case this gets queried etc
+		extra->pairable = 0;
+		p_pairable = false;
+	}
+
+	// assign to handle to return
+	handle.set_id(ref_id);
+
+	uint32_t tree_id = 0;
+	if (p_pairable) {
+		tree_id = 1;
+	}
+
+	create_root_node(tree_id);
+
+	// we must choose where to add to tree
+	if (p_active) {
+		ref->tnode_id = _logic_choose_item_add_node(_root_node_id[tree_id], abb);
+
+		bool refit = _node_add_item(ref->tnode_id, ref_id, abb);
+
+		if (refit) {
+			// only need to refit from the parent
+			const TNode &add_node = _nodes[ref->tnode_id];
+			if (add_node.parent_id != BVHCommon::INVALID) {
+				refit_upward_and_balance(add_node.parent_id, tree_id);
+			}
+		}
+	} else {
+		ref->set_inactive();
+	}
+
+#ifdef BVH_VERBOSE
+	// memory use
+	int mem = _refs.estimate_memory_use();
+	mem += _nodes.estimate_memory_use();
+
+	String sz = _debug_aabb_to_string(abb);
+	VERBOSE_PRINT("\titem_add [" + itos(ref_id) + "] " + itos(_refs.size()) + " refs,\t" + itos(_nodes.size()) + " nodes " + sz);
+	VERBOSE_PRINT("mem use : " + itos(mem) + ", num nodes : " + itos(_nodes.size()));
+
+#endif
+
+	return handle;
+}
+
+void _debug_print_refs() {
+#ifdef BVH_VERBOSE_TREE
+	print_line("refs.....");
+	for (int n = 0; n < _refs.size(); n++) {
+		const ItemRef &ref = _refs[n];
+		print_line("tnode_id " + itos(ref.tnode_id) + ", item_id " + itos(ref.item_id));
+	}
+
+#endif
+}
+
+// returns false if noop
+bool item_move(BVHHandle p_handle, const Bounds &p_aabb) {
+	uint32_t ref_id = p_handle.id();
+
+	// get the reference
+	ItemRef &ref = _refs[ref_id];
+	if (!ref.is_active()) {
+		return false;
+	}
+
+	BVHABB_CLASS abb;
+	abb.from(p_aabb);
+
+	BVH_ASSERT(ref.tnode_id != BVHCommon::INVALID);
+	TNode &tnode = _nodes[ref.tnode_id];
+
+	// does it fit within the current aabb?
+	if (tnode.aabb.is_other_within(abb)) {
+		// do nothing .. fast path .. not moved enough to need refit
+
+		// however we WILL update the exact aabb in the leaf, as this will be needed
+		// for accurate collision detection
+		TLeaf &leaf = _node_get_leaf(tnode);
+
+		BVHABB_CLASS &leaf_abb = leaf.get_aabb(ref.item_id);
+
+		// no change?
+		if (leaf_abb == abb) {
+			return false;
+		}
+
+		leaf_abb = abb;
+		_integrity_check_all();
+
+		return true;
+	}
+
+	uint32_t tree_id = _handle_get_tree_id(p_handle);
+
+	// remove and reinsert
+	node_remove_item(ref_id, tree_id);
+
+	// we must choose where to add to tree
+	ref.tnode_id = _logic_choose_item_add_node(_root_node_id[tree_id], abb);
+
+	// add to the tree
+	bool needs_refit = _node_add_item(ref.tnode_id, ref_id, abb);
+
+	// only need to refit from the PARENT
+	if (needs_refit) {
+		// only need to refit from the parent
+		const TNode &add_node = _nodes[ref.tnode_id];
+		if (add_node.parent_id != BVHCommon::INVALID) {
+			// not sure we need to rebalance all the time, this can be done less often
+			refit_upward(add_node.parent_id);
+		}
+		//refit_upward_and_balance(add_node.parent_id);
+	}
+
+	return true;
+}
+
+void item_remove(BVHHandle p_handle) {
+	uint32_t ref_id = p_handle.id();
+
+	uint32_t tree_id = _handle_get_tree_id(p_handle);
+
+	VERBOSE_PRINT("item_remove [" + itos(ref_id) + "] ");
+
+	////////////////////////////////////////
+	// remove the active reference from the list for slow incremental optimize
+	// this list must be kept in sync with the references as they are added or removed.
+	uint32_t active_ref_id = _extra[ref_id].active_ref_id;
+	uint32_t ref_id_moved_back = _active_refs[_active_refs.size() - 1];
+
+	// swap back and decrement for fast unordered remove
+	_active_refs[active_ref_id] = ref_id_moved_back;
+	_active_refs.resize(_active_refs.size() - 1);
+
+	// keep the moved active reference up to date
+	_extra[ref_id_moved_back].active_ref_id = active_ref_id;
+	////////////////////////////////////////
+
+	// remove the item from the node (only if active)
+	if (_refs[ref_id].is_active()) {
+		node_remove_item(ref_id, tree_id);
+	}
+
+	// remove the item reference
+	_refs.free(ref_id);
+	_extra.free(ref_id);
+	if (USE_PAIRS) {
+		_pairs.free(ref_id);
+	}
+
+	// don't think refit_all is necessary?
+	//refit_all(_tree_id);
+
+#ifdef BVH_VERBOSE_TREE
+	_debug_recursive_print_tree(tree_id);
+#endif
+}
+
+// returns success
+bool item_activate(BVHHandle p_handle, const Bounds &p_aabb) {
+	uint32_t ref_id = p_handle.id();
+	ItemRef &ref = _refs[ref_id];
+	if (ref.is_active()) {
+		// noop
+		return false;
+	}
+
+	// add to tree
+	BVHABB_CLASS abb;
+	abb.from(p_aabb);
+
+	uint32_t tree_id = _handle_get_tree_id(p_handle);
+
+	// we must choose where to add to tree
+	ref.tnode_id = _logic_choose_item_add_node(_root_node_id[tree_id], abb);
+	_node_add_item(ref.tnode_id, ref_id, abb);
+
+	refit_upward_and_balance(ref.tnode_id, tree_id);
+
+	return true;
+}
+
+// returns success
+bool item_deactivate(BVHHandle p_handle) {
+	uint32_t ref_id = p_handle.id();
+	ItemRef &ref = _refs[ref_id];
+	if (!ref.is_active()) {
+		// noop
+		return false;
+	}
+
+	uint32_t tree_id = _handle_get_tree_id(p_handle);
+
+	// remove from tree
+	BVHABB_CLASS abb;
+	node_remove_item(ref_id, tree_id, &abb);
+
+	// mark as inactive
+	ref.set_inactive();
+	return true;
+}
+
+bool item_get_active(BVHHandle p_handle) const {
+	uint32_t ref_id = p_handle.id();
+	const ItemRef &ref = _refs[ref_id];
+	return ref.is_active();
+}
+
+// during collision testing, we want to set the mask and whether pairable for the item testing from
+void item_fill_cullparams(BVHHandle p_handle, CullParams &r_params) const {
+	uint32_t ref_id = p_handle.id();
+	const ItemExtra &extra = _extra[ref_id];
+
+	// testing from a non pairable item, we only want to test pairable items
+	r_params.test_pairable_only = extra.pairable == 0;
+
+	// we take into account the mask of the item testing from
+	r_params.mask = extra.pairable_mask;
+	r_params.pairable_type = extra.pairable_type;
+}
+
+bool item_is_pairable(const BVHHandle &p_handle) {
+	uint32_t ref_id = p_handle.id();
+	const ItemExtra &extra = _extra[ref_id];
+	return extra.pairable != 0;
+}
+
+void item_get_ABB(const BVHHandle &p_handle, BVHABB_CLASS &r_abb) {
+	// change tree?
+	uint32_t ref_id = p_handle.id();
+	const ItemRef &ref = _refs[ref_id];
+
+	TNode &tnode = _nodes[ref.tnode_id];
+	TLeaf &leaf = _node_get_leaf(tnode);
+
+	r_abb = leaf.get_aabb(ref.item_id);
+}
+
+bool item_set_pairable(const BVHHandle &p_handle, bool p_pairable, uint32_t p_pairable_type, uint32_t p_pairable_mask) {
+	// change tree?
+	uint32_t ref_id = p_handle.id();
+
+	ItemExtra &ex = _extra[ref_id];
+	ItemRef &ref = _refs[ref_id];
+
+	bool active = ref.is_active();
+	bool pairable_changed = (ex.pairable != 0) != p_pairable;
+	bool state_changed = pairable_changed || (ex.pairable_type != p_pairable_type) || (ex.pairable_mask != p_pairable_mask);
+
+	ex.pairable_type = p_pairable_type;
+	ex.pairable_mask = p_pairable_mask;
+
+	if (active && pairable_changed) {
+		// record abb
+		TNode &tnode = _nodes[ref.tnode_id];
+		TLeaf &leaf = _node_get_leaf(tnode);
+		BVHABB_CLASS abb = leaf.get_aabb(ref.item_id);
+
+		// make sure current tree is correct prior to changing
+		uint32_t tree_id = _handle_get_tree_id(p_handle);
+
+		// remove from old tree
+		node_remove_item(ref_id, tree_id);
+
+		// we must set the pairable AFTER getting the current tree
+		// because the pairable status determines which tree
+		ex.pairable = p_pairable;
+
+		// add to new tree
+		tree_id = _handle_get_tree_id(p_handle);
+		create_root_node(tree_id);
+
+		// we must choose where to add to tree
+		ref.tnode_id = _logic_choose_item_add_node(_root_node_id[tree_id], abb);
+		bool needs_refit = _node_add_item(ref.tnode_id, ref_id, abb);
+
+		// only need to refit from the PARENT
+		if (needs_refit) {
+			// only need to refit from the parent
+			const TNode &add_node = _nodes[ref.tnode_id];
+			if (add_node.parent_id != BVHCommon::INVALID) {
+				refit_upward_and_balance(add_node.parent_id, tree_id);
+			}
+		}
+	} else {
+		// always keep this up to date
+		ex.pairable = p_pairable;
+	}
+
+	return state_changed;
+}
+
+void incremental_optimize() {
+	// first update all aabbs as one off step..
+	// this is cheaper than doing it on each move as each leaf may get touched multiple times
+	// in a frame.
+	for (int n = 0; n < NUM_TREES; n++) {
+		if (_root_node_id[n] != BVHCommon::INVALID) {
+			refit_branch(_root_node_id[n]);
+		}
+	}
+
+	// now do small section reinserting to get things moving
+	// gradually, and keep items in the right leaf
+	if (_current_active_ref >= _active_refs.size()) {
+		_current_active_ref = 0;
+	}
+
+	// special case
+	if (!_active_refs.size()) {
+		return;
+	}
+
+	uint32_t ref_id = _active_refs[_current_active_ref++];
+
+	_logic_item_remove_and_reinsert(ref_id);
+
+#ifdef BVH_VERBOSE
+	/*
+	// memory use
+	int mem_refs = _refs.estimate_memory_use();
+	int mem_nodes = _nodes.estimate_memory_use();
+	int mem_leaves = _leaves.estimate_memory_use();
+
+	String sz;
+	sz += "mem_refs : " + itos(mem_refs) + " ";
+	sz += "mem_nodes : " + itos(mem_nodes) + " ";
+	sz += "mem_leaves : " + itos(mem_leaves) + " ";
+	sz += ", num nodes : " + itos(_nodes.size());
+	print_line(sz);
+	*/
+#endif
+}
+
+void update() {
+	incremental_optimize();
+
+	// keep the expansion values up to date with the world bound
+//#define BVH_ALLOW_AUTO_EXPANSION
+#ifdef BVH_ALLOW_AUTO_EXPANSION
+	if (_auto_node_expansion || _auto_pairing_expansion) {
+		BVHABB_CLASS world_bound;
+		world_bound.set_to_max_opposite_extents();
+
+		bool bound_valid = false;
+
+		for (int n = 0; n < NUM_TREES; n++) {
+			uint32_t node_id = _root_node_id[n];
+			if (node_id != BVHCommon::INVALID) {
+				world_bound.merge(_nodes[node_id].aabb);
+				bound_valid = true;
+			}
+		}
+
+		// if there are no nodes, do nothing, but if there are...
+		if (bound_valid) {
+			Bounds bb;
+			world_bound.to(bb);
+			real_t size = bb.get_longest_axis_size();
+
+			// automatic AI decision for best parameters.
+			// These can be overridden in project settings.
+
+			// these magic numbers are determined by experiment
+			if (_auto_node_expansion) {
+				_node_expansion = size * 0.025;
+			}
+			if (_auto_pairing_expansion) {
+				_pairing_expansion = size * 0.009;
+			}
+		}
+	}
+#endif
+}
diff --git a/core/math/bvh_refit.inc b/core/math/bvh_refit.inc
new file mode 100644
index 0000000000..717a3438c7
--- /dev/null
+++ b/core/math/bvh_refit.inc
@@ -0,0 +1,141 @@
+void _debug_node_verify_bound(uint32_t p_node_id) {
+	TNode &node = _nodes[p_node_id];
+	BVHABB_CLASS abb_before = node.aabb;
+
+	node_update_aabb(node);
+
+	BVHABB_CLASS abb_after = node.aabb;
+	CRASH_COND(abb_before != abb_after);
+}
+
+void node_update_aabb(TNode &tnode) {
+	tnode.aabb.set_to_max_opposite_extents();
+	tnode.height = 0;
+
+	if (!tnode.is_leaf()) {
+		for (int n = 0; n < tnode.num_children; n++) {
+			uint32_t child_node_id = tnode.children[n];
+
+			// merge with child aabb
+			const TNode &tchild = _nodes[child_node_id];
+			tnode.aabb.merge(tchild.aabb);
+
+			// do heights at the same time
+			if (tchild.height > tnode.height) {
+				tnode.height = tchild.height;
+			}
+		}
+
+		// the height of a non leaf is always 1 bigger than the biggest child
+		tnode.height++;
+
+#ifdef BVH_CHECKS
+		if (!tnode.num_children) {
+			// the 'blank' aabb will screw up parent aabbs
+			WARN_PRINT("BVH_Tree::TNode no children, AABB is undefined");
+		}
+#endif
+	} else {
+		// leaf
+		const TLeaf &leaf = _node_get_leaf(tnode);
+
+		for (int n = 0; n < leaf.num_items; n++) {
+			tnode.aabb.merge(leaf.get_aabb(n));
+		}
+
+		// now the leaf items are unexpanded, we expand only in the node AABB
+		tnode.aabb.expand(_node_expansion);
+#ifdef BVH_CHECKS
+		if (!leaf.num_items) {
+			// the 'blank' aabb will screw up parent aabbs
+			WARN_PRINT("BVH_Tree::TLeaf no items, AABB is undefined");
+		}
+#endif
+	}
+}
+
+void refit_all(int p_tree_id) {
+	refit_downward(_root_node_id[p_tree_id]);
+}
+
+void refit_upward(uint32_t p_node_id) {
+	while (p_node_id != BVHCommon::INVALID) {
+		TNode &tnode = _nodes[p_node_id];
+		node_update_aabb(tnode);
+		p_node_id = tnode.parent_id;
+	}
+}
+
+void refit_upward_and_balance(uint32_t p_node_id, uint32_t p_tree_id) {
+	while (p_node_id != BVHCommon::INVALID) {
+		uint32_t before = p_node_id;
+		p_node_id = _logic_balance(p_node_id, p_tree_id);
+
+		if (before != p_node_id) {
+			VERBOSE_PRINT("REBALANCED!");
+		}
+
+		TNode &tnode = _nodes[p_node_id];
+
+		// update overall aabb from the children
+		node_update_aabb(tnode);
+
+		p_node_id = tnode.parent_id;
+	}
+}
+
+void refit_downward(uint32_t p_node_id) {
+	TNode &tnode = _nodes[p_node_id];
+
+	// do children first
+	if (!tnode.is_leaf()) {
+		for (int n = 0; n < tnode.num_children; n++) {
+			refit_downward(tnode.children[n]);
+		}
+	}
+
+	node_update_aabb(tnode);
+}
+
+// go down to the leaves, then refit upward
+void refit_branch(uint32_t p_node_id) {
+	// our function parameters to keep on a stack
+	struct RefitParams {
+		uint32_t node_id;
+	};
+
+	// most of the iterative functionality is contained in this helper class
+	BVH_IterativeInfo<RefitParams> ii;
+
+	// alloca must allocate the stack from this function, it cannot be allocated in the
+	// helper class
+	ii.stack = (RefitParams *)alloca(ii.get_alloca_stacksize());
+
+	// seed the stack
+	ii.get_first()->node_id = p_node_id;
+
+	RefitParams rp;
+
+	// while there are still more nodes on the stack
+	while (ii.pop(rp)) {
+		TNode &tnode = _nodes[rp.node_id];
+
+		// do children first
+		if (!tnode.is_leaf()) {
+			for (int n = 0; n < tnode.num_children; n++) {
+				uint32_t child_id = tnode.children[n];
+
+				// add to the stack
+				RefitParams *child = ii.request();
+				child->node_id = child_id;
+			}
+		} else {
+			// leaf .. only refit upward if dirty
+			TLeaf &leaf = _node_get_leaf(tnode);
+			if (leaf.is_dirty()) {
+				leaf.set_dirty(false);
+				refit_upward(p_node_id);
+			}
+		}
+	} // while more nodes to pop
+}
diff --git a/core/math/bvh_split.inc b/core/math/bvh_split.inc
new file mode 100644
index 0000000000..3fcc4c7b10
--- /dev/null
+++ b/core/math/bvh_split.inc
@@ -0,0 +1,294 @@
+void _split_inform_references(uint32_t p_node_id) {
+	TNode &node = _nodes[p_node_id];
+	TLeaf &leaf = _node_get_leaf(node);
+
+	for (int n = 0; n < leaf.num_items; n++) {
+		uint32_t ref_id = leaf.get_item_ref_id(n);
+
+		ItemRef &ref = _refs[ref_id];
+		ref.tnode_id = p_node_id;
+		ref.item_id = n;
+	}
+}
+
+void _split_leaf_sort_groups_simple(int &num_a, int &num_b, uint16_t *group_a, uint16_t *group_b, const BVHABB_CLASS *temp_bounds, const BVHABB_CLASS full_bound) {
+	// special case for low leaf sizes .. should static compile out
+	if (MAX_ITEMS < 4) {
+		uint32_t ind = group_a[0];
+
+		// add to b
+		group_b[num_b++] = ind;
+
+		// remove from a
+		group_a[0] = group_a[num_a - 1];
+		num_a--;
+		return;
+	}
+
+	Point centre = full_bound.calculate_centre();
+	Point size = full_bound.calculate_size();
+
+	int order[3];
+
+	order[0] = size.min_axis();
+	order[2] = size.max_axis();
+	order[1] = 3 - (order[0] + order[2]);
+
+	// simplest case, split on the longest axis
+	int split_axis = order[0];
+	for (int a = 0; a < num_a; a++) {
+		uint32_t ind = group_a[a];
+
+		if (temp_bounds[ind].min.coord[split_axis] > centre.coord[split_axis]) {
+			// add to b
+			group_b[num_b++] = ind;
+
+			// remove from a
+			group_a[a] = group_a[num_a - 1];
+			num_a--;
+
+			// do this one again, as it has been replaced
+			a--;
+		}
+	}
+
+	// detect when split on longest axis failed
+	int min_threshold = MAX_ITEMS / 4;
+	int min_group_size[3];
+	min_group_size[0] = MIN(num_a, num_b);
+	if (min_group_size[0] < min_threshold) {
+		// slow but sure .. first move everything back into a
+		for (int b = 0; b < num_b; b++) {
+			group_a[num_a++] = group_b[b];
+		}
+		num_b = 0;
+
+		// now calculate the best split
+		for (int axis = 1; axis < 3; axis++) {
+			split_axis = order[axis];
+			int count = 0;
+
+			for (int a = 0; a < num_a; a++) {
+				uint32_t ind = group_a[a];
+
+				if (temp_bounds[ind].min.coord[split_axis] > centre.coord[split_axis]) {
+					count++;
+				}
+			}
+
+			min_group_size[axis] = MIN(count, num_a - count);
+		} // for axis
+
+		// best axis
+		int best_axis = 0;
+		int best_min = min_group_size[0];
+		for (int axis = 1; axis < 3; axis++) {
+			if (min_group_size[axis] > best_min) {
+				best_min = min_group_size[axis];
+				best_axis = axis;
+			}
+		}
+
+		// now finally do the split
+		if (best_min > 0) {
+			split_axis = order[best_axis];
+
+			for (int a = 0; a < num_a; a++) {
+				uint32_t ind = group_a[a];
+
+				if (temp_bounds[ind].min.coord[split_axis] > centre.coord[split_axis]) {
+					// add to b
+					group_b[num_b++] = ind;
+
+					// remove from a
+					group_a[a] = group_a[num_a - 1];
+					num_a--;
+
+					// do this one again, as it has been replaced
+					a--;
+				}
+			}
+		} // if there was a split!
+	} // if the longest axis wasn't a good split
+
+	// special case, none crossed threshold
+	if (!num_b) {
+		uint32_t ind = group_a[0];
+
+		// add to b
+		group_b[num_b++] = ind;
+
+		// remove from a
+		group_a[0] = group_a[num_a - 1];
+		num_a--;
+	}
+	// opposite problem! :)
+	if (!num_a) {
+		uint32_t ind = group_b[0];
+
+		// add to a
+		group_a[num_a++] = ind;
+
+		// remove from b
+		group_b[0] = group_b[num_b - 1];
+		num_b--;
+	}
+}
+
+void _split_leaf_sort_groups(int &num_a, int &num_b, uint16_t *group_a, uint16_t *group_b, const BVHABB_CLASS *temp_bounds) {
+	BVHABB_CLASS groupb_aabb;
+	groupb_aabb.set_to_max_opposite_extents();
+	for (int n = 0; n < num_b; n++) {
+		int which = group_b[n];
+		groupb_aabb.merge(temp_bounds[which]);
+	}
+	BVHABB_CLASS groupb_aabb_new;
+
+	BVHABB_CLASS rest_aabb;
+
+	float best_size = FLT_MAX;
+	int best_candidate = -1;
+
+	// find most likely from a to move into b
+	for (int check = 0; check < num_a; check++) {
+		rest_aabb.set_to_max_opposite_extents();
+		groupb_aabb_new = groupb_aabb;
+
+		// find aabb of all the rest
+		for (int rest = 0; rest < num_a; rest++) {
+			if (rest == check) {
+				continue;
+			}
+
+			int which = group_a[rest];
+			rest_aabb.merge(temp_bounds[which]);
+		}
+
+		groupb_aabb_new.merge(temp_bounds[group_a[check]]);
+
+		// now compare the sizes
+		float size = groupb_aabb_new.get_area() + rest_aabb.get_area();
+		if (size < best_size) {
+			best_size = size;
+			best_candidate = check;
+		}
+	}
+
+	// we should now have the best, move it from group a to group b
+	group_b[num_b++] = group_a[best_candidate];
+
+	// remove best candidate from group a
+	num_a--;
+	group_a[best_candidate] = group_a[num_a];
+}
+
+uint32_t split_leaf(uint32_t p_node_id, const BVHABB_CLASS &p_added_item_aabb) {
+	return split_leaf_complex(p_node_id, p_added_item_aabb);
+}
+
+// aabb is the new inserted node
+uint32_t split_leaf_complex(uint32_t p_node_id, const BVHABB_CLASS &p_added_item_aabb) {
+	VERBOSE_PRINT("split_leaf");
+
+	// note the tnode before and AFTER splitting may be a different address
+	// in memory because the vector could get relocated. So we need to reget
+	// the tnode after the split
+	BVH_ASSERT(_nodes[p_node_id].is_leaf());
+
+	// first create child leaf nodes
+	uint32_t *child_ids = (uint32_t *)alloca(sizeof(uint32_t) * MAX_CHILDREN);
+
+	for (int n = 0; n < MAX_CHILDREN; n++) {
+		// create node children
+		TNode *child_node = _nodes.request(child_ids[n]);
+
+		child_node->clear();
+
+		// back link to parent
+		child_node->parent_id = p_node_id;
+
+		// make each child a leaf node
+		node_make_leaf(child_ids[n]);
+	}
+
+	// don't get any leaves or nodes till AFTER the split
+	TNode &tnode = _nodes[p_node_id];
+	uint32_t orig_leaf_id = tnode.get_leaf_id();
+	const TLeaf &orig_leaf = _node_get_leaf(tnode);
+
+	// store the final child ids
+	for (int n = 0; n < MAX_CHILDREN; n++) {
+		tnode.children[n] = child_ids[n];
+	}
+
+	// mark as no longer a leaf node
+	tnode.num_children = MAX_CHILDREN;
+
+	// 2 groups, A and B, and assign children to each to split equally
+	int max_children = orig_leaf.num_items + 1; // plus 1 for the wildcard .. the item being added
+	//CRASH_COND(max_children > MAX_CHILDREN);
+
+	uint16_t *group_a = (uint16_t *)alloca(sizeof(uint16_t) * max_children);
+	uint16_t *group_b = (uint16_t *)alloca(sizeof(uint16_t) * max_children);
+
+	// we are copying the ABBs. This is ugly, but we need one extra for the inserted item...
+	BVHABB_CLASS *temp_bounds = (BVHABB_CLASS *)alloca(sizeof(BVHABB_CLASS) * max_children);
+
+	int num_a = max_children;
+	int num_b = 0;
+
+	// setup - start with all in group a
+	for (int n = 0; n < orig_leaf.num_items; n++) {
+		group_a[n] = n;
+		temp_bounds[n] = orig_leaf.get_aabb(n);
+	}
+	// wildcard
+	int wildcard = orig_leaf.num_items;
+
+	group_a[wildcard] = wildcard;
+	temp_bounds[wildcard] = p_added_item_aabb;
+
+	// we can choose here either an equal split, or just 1 in the new leaf
+	_split_leaf_sort_groups_simple(num_a, num_b, group_a, group_b, temp_bounds, tnode.aabb);
+
+	uint32_t wildcard_node = BVHCommon::INVALID;
+
+	// now there should be equal numbers in both groups
+	for (int n = 0; n < num_a; n++) {
+		int which = group_a[n];
+
+		if (which != wildcard) {
+			const BVHABB_CLASS &source_item_aabb = orig_leaf.get_aabb(which);
+			uint32_t source_item_ref_id = orig_leaf.get_item_ref_id(which);
+			//const Item &source_item = orig_leaf.get_item(which);
+			_node_add_item(tnode.children[0], source_item_ref_id, source_item_aabb);
+		} else {
+			wildcard_node = tnode.children[0];
+		}
+	}
+	for (int n = 0; n < num_b; n++) {
+		int which = group_b[n];
+
+		if (which != wildcard) {
+			const BVHABB_CLASS &source_item_aabb = orig_leaf.get_aabb(which);
+			uint32_t source_item_ref_id = orig_leaf.get_item_ref_id(which);
+			//const Item &source_item = orig_leaf.get_item(which);
+			_node_add_item(tnode.children[1], source_item_ref_id, source_item_aabb);
+		} else {
+			wildcard_node = tnode.children[1];
+		}
+	}
+
+	// now remove all items from the parent and replace with the child nodes
+	_leaves.free(orig_leaf_id);
+
+	// we should keep the references up to date!
+	for (int n = 0; n < MAX_CHILDREN; n++) {
+		_split_inform_references(tnode.children[n]);
+	}
+
+	refit_upward(p_node_id);
+
+	BVH_ASSERT(wildcard_node != BVHCommon::INVALID);
+	return wildcard_node;
+}
diff --git a/core/math/bvh_structs.inc b/core/math/bvh_structs.inc
new file mode 100644
index 0000000000..1d1e0e6468
--- /dev/null
+++ b/core/math/bvh_structs.inc
@@ -0,0 +1,180 @@
+
+public:
+struct ItemRef {
+	uint32_t tnode_id; // -1 is invalid
+	uint32_t item_id; // in the leaf
+
+	bool is_active() const { return tnode_id != BVHCommon::INACTIVE; }
+	void set_inactive() {
+		tnode_id = BVHCommon::INACTIVE;
+		item_id = BVHCommon::INACTIVE;
+	}
+};
+
+// extra info kept in separate parallel list to the references,
+// as this is less used as keeps cache better
+struct ItemExtra {
+	uint32_t last_updated_tick;
+	uint32_t pairable;
+	uint32_t pairable_mask;
+	uint32_t pairable_type;
+
+	int32_t subindex;
+
+	// the active reference is a separate list of which references
+	// are active so that we can slowly iterate through it over many frames for
+	// slow optimize.
+	uint32_t active_ref_id;
+
+	T *userdata;
+};
+
+// this is an item OR a child node depending on whether a leaf node
+struct Item {
+	BVHABB_CLASS aabb;
+	uint32_t item_ref_id;
+};
+
+// tree leaf
+struct TLeaf {
+	uint16_t num_items;
+
+private:
+	uint16_t dirty;
+	// separate data orientated lists for faster SIMD traversal
+	uint32_t item_ref_ids[MAX_ITEMS];
+	BVHABB_CLASS aabbs[MAX_ITEMS];
+
+public:
+	// accessors
+	BVHABB_CLASS &get_aabb(uint32_t p_id) { return aabbs[p_id]; }
+	const BVHABB_CLASS &get_aabb(uint32_t p_id) const { return aabbs[p_id]; }
+
+	uint32_t &get_item_ref_id(uint32_t p_id) { return item_ref_ids[p_id]; }
+	const uint32_t &get_item_ref_id(uint32_t p_id) const { return item_ref_ids[p_id]; }
+
+	bool is_dirty() const { return dirty; }
+	void set_dirty(bool p) { dirty = p; }
+
+	void clear() {
+		num_items = 0;
+		set_dirty(true);
+	}
+	bool is_full() const { return num_items >= MAX_ITEMS; }
+
+	void remove_item_unordered(uint32_t p_id) {
+		BVH_ASSERT(p_id < num_items);
+		num_items--;
+		aabbs[p_id] = aabbs[num_items];
+		item_ref_ids[p_id] = item_ref_ids[num_items];
+	}
+
+	uint32_t request_item() {
+		if (num_items < MAX_ITEMS) {
+			uint32_t id = num_items;
+			num_items++;
+			return id;
+		}
+		return -1;
+	}
+};
+
+// tree node
+struct TNode {
+	BVHABB_CLASS aabb;
+	// either number of children if positive
+	// or leaf id if negative (leaf id 0 is disallowed)
+	union {
+		int32_t num_children;
+		int32_t neg_leaf_id;
+	};
+	uint32_t parent_id; // or -1
+	uint16_t children[MAX_CHILDREN];
+
+	// height in the tree, where leaves are 0, and all above are 1+
+	// (or the highest where there is a tie off)
+	int32_t height;
+
+	bool is_leaf() const { return num_children < 0; }
+	void set_leaf_id(int id) { neg_leaf_id = -id; }
+	int get_leaf_id() const { return -neg_leaf_id; }
+
+	void clear() {
+		num_children = 0;
+		parent_id = BVHCommon::INVALID;
+		height = 0; // or -1 for testing
+
+		// for safety set to improbable value
+		aabb.set_to_max_opposite_extents();
+
+		// other members are not blanked for speed .. they may be uninitialized
+	}
+
+	bool is_full_of_children() const { return num_children >= MAX_CHILDREN; }
+
+	void remove_child_internal(uint32_t child_num) {
+		children[child_num] = children[num_children - 1];
+		num_children--;
+	}
+
+	int find_child(uint32_t p_child_node_id) {
+		BVH_ASSERT(!is_leaf());
+
+		for (int n = 0; n < num_children; n++) {
+			if (children[n] == p_child_node_id) {
+				return n;
+			}
+		}
+
+		// not found
+		return -1;
+	}
+};
+
+// instead of using linked list we maintain
+// item references (for quick lookup)
+PooledList<ItemRef, true> _refs;
+PooledList<ItemExtra, true> _extra;
+PooledList<ItemPairs> _pairs;
+
+// these 2 are not in sync .. nodes != leaves!
+PooledList<TNode, true> _nodes;
+PooledList<TLeaf, true> _leaves;
+
+// we can maintain an un-ordered list of which references are active,
+// in order to do a slow incremental optimize of the tree over each frame.
+// This will work best if dynamic objects and static objects are in a different tree.
+LocalVector<uint32_t, uint32_t, true> _active_refs;
+uint32_t _current_active_ref = 0;
+
+// instead of translating directly to the userdata output,
+// we keep an intermediate list of hits as reference IDs, which can be used
+// for pairing collision detection
+LocalVector<uint32_t, uint32_t, true> _cull_hits;
+
+// we now have multiple root nodes, allowing us to store
+// more than 1 tree. This can be more efficient, while sharing the same
+// common lists
+enum { NUM_TREES = 2,
+};
+
+// Tree 0 - Non pairable
+// Tree 1 - Pairable
+// This is more efficient because in physics we only need check non pairable against the pairable tree.
+uint32_t _root_node_id[NUM_TREES];
+
+// these values may need tweaking according to the project
+// the bound of the world, and the average velocities of the objects
+
+// node expansion is important in the rendering tree
+// larger values give less re-insertion as items move...
+// but on the other hand over estimates the bounding box of nodes.
+// we can either use auto mode, where the expansion is based on the root node size, or specify manually
+real_t _node_expansion = 0.5;
+bool _auto_node_expansion = true;
+
+// pairing expansion important for physics pairing
+// larger values gives more 'sticky' pairing, and is less likely to exhibit tunneling
+// we can either use auto mode, where the expansion is based on the root node size, or specify manually
+real_t _pairing_expansion = 0.1;
+bool _auto_pairing_expansion = true;
diff --git a/core/math/bvh_tree.h b/core/math/bvh_tree.h
new file mode 100644
index 0000000000..3169d31ec7
--- /dev/null
+++ b/core/math/bvh_tree.h
@@ -0,0 +1,421 @@
+/*************************************************************************/
+/*  bvh_tree.h                                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef BVH_TREE_H
+#define BVH_TREE_H
+
+// BVH Tree
+// This is an implementation of a dynamic BVH with templated leaf size.
+// This differs from most dynamic BVH in that it can handle more than 1 object
+// in leaf nodes. This can make it far more efficient in certain circumstances.
+// It also means that the splitting logic etc have to be completely different
+// to a simpler tree.
+// Note that MAX_CHILDREN should be fixed at 2 for now.
+
+#include "core/math/aabb.h"
+#include "core/math/bvh_abb.h"
+#include "core/math/geometry_3d.h"
+#include "core/math/vector3.h"
+#include "core/string/print_string.h"
+#include "core/templates/local_vector.h"
+#include "core/templates/pooled_list.h"
+#include <limits.h>
+
+#define BVHABB_CLASS BVH_ABB<Bounds, Point>
+
+// never do these checks in release
+#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
+//#define BVH_VERBOSE
+//#define BVH_VERBOSE_TREE
+
+//#define BVH_VERBOSE_FRAME
+//#define BVH_CHECKS
+//#define BVH_INTEGRITY_CHECKS
+#endif
+
+// debug only assert
+#ifdef BVH_CHECKS
+#define BVH_ASSERT(a) CRASH_COND((a) == false)
+#else
+#define BVH_ASSERT(a)
+#endif
+
+#ifdef BVH_VERBOSE
+#define VERBOSE_PRINT print_line
+#else
+#define VERBOSE_PRINT(a)
+#endif
+
+// really just a namespace
+struct BVHCommon {
+	// these could possibly also be the same constant,
+	// although this may be useful for debugging.
+	// or use zero for invalid and +1 based indices.
+	static const uint32_t INVALID = (0xffffffff);
+	static const uint32_t INACTIVE = (0xfffffffe);
+};
+
+// really a handle, can be anything
+// note that zero is a valid reference for the BVH .. this may involve using
+// a plus one based ID for clients that expect 0 to be invalid.
+struct BVHHandle {
+	// conversion operator
+	operator uint32_t() const { return _data; }
+	void set(uint32_t p_value) { _data = p_value; }
+
+	uint32_t _data;
+
+	void set_invalid() { _data = BVHCommon::INVALID; }
+	bool is_invalid() const { return _data == BVHCommon::INVALID; }
+	uint32_t id() const { return _data; }
+	void set_id(uint32_t p_id) { _data = p_id; }
+
+	bool operator==(const BVHHandle &p_h) const { return _data == p_h._data; }
+	bool operator!=(const BVHHandle &p_h) const { return (*this == p_h) == false; }
+};
+
+// helper class to make iterative versions of recursive functions
+template <class T>
+class BVH_IterativeInfo {
+public:
+	enum {
+		ALLOCA_STACK_SIZE = 128
+	};
+
+	int32_t depth = 1;
+	int32_t threshold = ALLOCA_STACK_SIZE - 2;
+	T *stack;
+	//only used in rare occasions when you run out of alloca memory
+	// because tree is too unbalanced.
+	LocalVector<T> aux_stack;
+	int32_t get_alloca_stacksize() const { return ALLOCA_STACK_SIZE * sizeof(T); }
+
+	T *get_first() const {
+		return &stack[0];
+	}
+
+	// pop the last member of the stack, or return false
+	bool pop(T &r_value) {
+		if (!depth) {
+			return false;
+		}
+
+		depth--;
+		r_value = stack[depth];
+		return true;
+	}
+
+	// request new addition to stack
+	T *request() {
+		if (depth > threshold) {
+			if (aux_stack.is_empty()) {
+				aux_stack.resize(ALLOCA_STACK_SIZE * 2);
+				memcpy(aux_stack.ptr(), stack, get_alloca_stacksize());
+			} else {
+				aux_stack.resize(aux_stack.size() * 2);
+			}
+			stack = aux_stack.ptr();
+			threshold = aux_stack.size() - 2;
+		}
+		return &stack[depth++];
+	}
+};
+
+template <class T, int MAX_CHILDREN, int MAX_ITEMS, bool USE_PAIRS = false, class Bounds = AABB, class Point = Vector3>
+class BVH_Tree {
+	friend class BVH;
+
+#include "bvh_pair.inc"
+#include "bvh_structs.inc"
+
+public:
+	BVH_Tree() {
+		for (int n = 0; n < NUM_TREES; n++) {
+			_root_node_id[n] = BVHCommon::INVALID;
+		}
+
+		// disallow zero leaf ids
+		// (as these ids are stored as negative numbers in the node)
+		uint32_t dummy_leaf_id;
+		_leaves.request(dummy_leaf_id);
+	}
+
+private:
+	bool node_add_child(uint32_t p_node_id, uint32_t p_child_node_id) {
+		TNode &tnode = _nodes[p_node_id];
+		if (tnode.is_full_of_children()) {
+			return false;
+		}
+
+		tnode.children[tnode.num_children] = p_child_node_id;
+		tnode.num_children += 1;
+
+		// back link in the child to the parent
+		TNode &tnode_child = _nodes[p_child_node_id];
+		tnode_child.parent_id = p_node_id;
+
+		return true;
+	}
+
+	void node_replace_child(uint32_t p_parent_id, uint32_t p_old_child_id, uint32_t p_new_child_id) {
+		TNode &parent = _nodes[p_parent_id];
+		BVH_ASSERT(!parent.is_leaf());
+
+		int child_num = parent.find_child(p_old_child_id);
+		BVH_ASSERT(child_num != BVHCommon::INVALID);
+		parent.children[child_num] = p_new_child_id;
+
+		TNode &new_child = _nodes[p_new_child_id];
+		new_child.parent_id = p_parent_id;
+	}
+
+	void node_remove_child(uint32_t p_parent_id, uint32_t p_child_id, uint32_t p_tree_id, bool p_prevent_sibling = false) {
+		TNode &parent = _nodes[p_parent_id];
+		BVH_ASSERT(!parent.is_leaf());
+
+		int child_num = parent.find_child(p_child_id);
+		BVH_ASSERT(child_num != BVHCommon::INVALID);
+
+		parent.remove_child_internal(child_num);
+
+		// no need to keep back references for children at the moment
+
+		uint32_t sibling_id; // always a node id, as tnode is never a leaf
+		bool sibling_present = false;
+
+		// if there are more children, or this is the root node, don't try and delete
+		if (parent.num_children > 1) {
+			return;
+		}
+
+		// if there is 1 sibling, it can be moved to be a child of the
+		if (parent.num_children == 1) {
+			// else there is now a redundant node with one child, which can be removed
+			sibling_id = parent.children[0];
+			sibling_present = true;
+		}
+
+		// now there may be no children in this node .. in which case it can be deleted
+		// remove node if empty
+		// remove link from parent
+		uint32_t grandparent_id = parent.parent_id;
+
+		// special case for root node
+		if (grandparent_id == BVHCommon::INVALID) {
+			if (sibling_present) {
+				// change the root node
+				change_root_node(sibling_id, p_tree_id);
+
+				// delete the old root node as no longer needed
+				_nodes.free(p_parent_id);
+			}
+
+			return;
+		}
+
+		if (sibling_present) {
+			node_replace_child(grandparent_id, p_parent_id, sibling_id);
+		} else {
+			node_remove_child(grandparent_id, p_parent_id, p_tree_id, true);
+		}
+
+		// put the node on the free list to recycle
+		_nodes.free(p_parent_id);
+	}
+
+	void change_root_node(uint32_t p_new_root_id, uint32_t p_tree_id) {
+		_root_node_id[p_tree_id] = p_new_root_id;
+		TNode &root = _nodes[p_new_root_id];
+
+		// mark no parent
+		root.parent_id = BVHCommon::INVALID;
+	}
+
+	void node_make_leaf(uint32_t p_node_id) {
+		uint32_t child_leaf_id;
+		TLeaf *child_leaf = _leaves.request(child_leaf_id);
+		child_leaf->clear();
+
+		// zero is reserved at startup, to prevent this id being used
+		// (as they are stored as negative values in the node, and zero is already taken)
+		BVH_ASSERT(child_leaf_id != 0);
+
+		TNode &node = _nodes[p_node_id];
+		node.neg_leaf_id = -(int)child_leaf_id;
+	}
+
+	void node_remove_item(uint32_t p_ref_id, uint32_t p_tree_id, BVHABB_CLASS *r_old_aabb = nullptr) {
+		// get the reference
+		ItemRef &ref = _refs[p_ref_id];
+		uint32_t owner_node_id = ref.tnode_id;
+
+		// debug draw special
+		// This may not be needed
+		if (owner_node_id == BVHCommon::INVALID) {
+			return;
+		}
+
+		TNode &tnode = _nodes[owner_node_id];
+		CRASH_COND(!tnode.is_leaf());
+
+		TLeaf &leaf = _node_get_leaf(tnode);
+
+		// if the aabb is not determining the corner size, then there is no need to refit!
+		// (optimization, as merging AABBs takes a lot of time)
+		const BVHABB_CLASS &old_aabb = leaf.get_aabb(ref.item_id);
+
+		// shrink a little to prevent using corner aabbs
+		// in order to miss the corners first we shrink by node_expansion
+		// (which is added to the overall bound of the leaf), then we also
+		// shrink by an epsilon, in order to miss out the very corner aabbs
+		// which are important in determining the bound. Any other aabb
+		// within this can be removed and not affect the overall bound.
+		BVHABB_CLASS node_bound = tnode.aabb;
+		node_bound.expand(-_node_expansion - 0.001f);
+		bool refit = true;
+
+		if (node_bound.is_other_within(old_aabb)) {
+			refit = false;
+		}
+
+		// record the old aabb if required (for incremental remove_and_reinsert)
+		if (r_old_aabb) {
+			*r_old_aabb = old_aabb;
+		}
+
+		leaf.remove_item_unordered(ref.item_id);
+
+		if (leaf.num_items) {
+			// the swapped item has to have its reference changed to, to point to the new item id
+			uint32_t swapped_ref_id = leaf.get_item_ref_id(ref.item_id);
+
+			ItemRef &swapped_ref = _refs[swapped_ref_id];
+
+			swapped_ref.item_id = ref.item_id;
+
+			// only have to refit if it is an edge item
+			// This is a VERY EXPENSIVE STEP
+			// we defer the refit updates until the update function is called once per frame
+			if (refit) {
+				leaf.set_dirty(true);
+			}
+		} else {
+			// remove node if empty
+			// remove link from parent
+			if (tnode.parent_id != BVHCommon::INVALID) {
+				// DANGER .. this can potentially end up with root node with 1 child ...
+				// we don't want this and must check for it
+
+				uint32_t parent_id = tnode.parent_id;
+
+				node_remove_child(parent_id, owner_node_id, p_tree_id);
+				refit_upward(parent_id);
+
+				// put the node on the free list to recycle
+				_nodes.free(owner_node_id);
+			}
+
+			// else if no parent, it is the root node. Do not delete
+		}
+
+		ref.tnode_id = BVHCommon::INVALID;
+		ref.item_id = BVHCommon::INVALID; // unset
+	}
+
+	// returns true if needs refit of PARENT tree only, the node itself AABB is calculated
+	// within this routine
+	bool _node_add_item(uint32_t p_node_id, uint32_t p_ref_id, const BVHABB_CLASS &p_aabb) {
+		ItemRef &ref = _refs[p_ref_id];
+		ref.tnode_id = p_node_id;
+
+		TNode &node = _nodes[p_node_id];
+		BVH_ASSERT(node.is_leaf());
+		TLeaf &leaf = _node_get_leaf(node);
+
+		// optimization - we only need to do a refit
+		// if the added item is changing the AABB of the node.
+		// in most cases it won't.
+		bool needs_refit = true;
+
+		// expand bound now
+		BVHABB_CLASS expanded = p_aabb;
+		expanded.expand(_node_expansion);
+
+		// the bound will only be valid if there is an item in there already
+		if (leaf.num_items) {
+			if (node.aabb.is_other_within(expanded)) {
+				// no change to node AABBs
+				needs_refit = false;
+			} else {
+				node.aabb.merge(expanded);
+			}
+		} else {
+			// bound of the node = the new aabb
+			node.aabb = expanded;
+		}
+
+		ref.item_id = leaf.request_item();
+		BVH_ASSERT(ref.item_id != BVHCommon::INVALID);
+
+		// set the aabb of the new item
+		leaf.get_aabb(ref.item_id) = p_aabb;
+
+		// back reference on the item back to the item reference
+		leaf.get_item_ref_id(ref.item_id) = p_ref_id;
+
+		return needs_refit;
+	}
+
+	uint32_t _node_create_another_child(uint32_t p_node_id, const BVHABB_CLASS &p_aabb) {
+		uint32_t child_node_id;
+		TNode *child_node = _nodes.request(child_node_id);
+		child_node->clear();
+
+		// may not be necessary
+		child_node->aabb = p_aabb;
+
+		node_add_child(p_node_id, child_node_id);
+
+		return child_node_id;
+	}
+
+#include "bvh_cull.inc"
+#include "bvh_debug.inc"
+#include "bvh_integrity.inc"
+#include "bvh_logic.inc"
+#include "bvh_misc.inc"
+#include "bvh_public.inc"
+#include "bvh_refit.inc"
+#include "bvh_split.inc"
+};
+
+#undef VERBOSE_PRINT
+
+#endif // BVH_TREE_H
diff --git a/core/math/camera_matrix.cpp b/core/math/camera_matrix.cpp
index 1066cf5e30..66c18f7b3c 100644
--- a/core/math/camera_matrix.cpp
+++ b/core/math/camera_matrix.cpp
@@ -315,8 +315,8 @@ Vector2 CameraMatrix::get_far_plane_half_extents() const {
 	return Vector2(res.x, res.y);
 }
 
-bool CameraMatrix::get_endpoints(const Transform &p_transform, Vector3 *p_8points) const {
-	Vector<Plane> planes = get_projection_planes(Transform());
+bool CameraMatrix::get_endpoints(const Transform3D &p_transform, Vector3 *p_8points) const {
+	Vector<Plane> planes = get_projection_planes(Transform3D());
 	const Planes intersections[8][3] = {
 		{ PLANE_FAR, PLANE_LEFT, PLANE_TOP },
 		{ PLANE_FAR, PLANE_LEFT, PLANE_BOTTOM },
@@ -338,7 +338,7 @@ bool CameraMatrix::get_endpoints(const Transform &p_transform, Vector3 *p_8point
 	return true;
 }
 
-Vector<Plane> CameraMatrix::get_projection_planes(const Transform &p_transform) const {
+Vector<Plane> CameraMatrix::get_projection_planes(const Transform3D &p_transform) const {
 	/** Fast Plane Extraction from combined modelview/projection matrices.
 	 * References:
 	 * https://web.archive.org/web/20011221205252/http://www.markmorley.com/opengl/frustumculling.html
@@ -707,8 +707,8 @@ void CameraMatrix::scale_translate_to_fit(const AABB &p_aabb) {
 	matrix[3][3] = 1;
 }
 
-CameraMatrix::operator Transform() const {
-	Transform tr;
+CameraMatrix::operator Transform3D() const {
+	Transform3D tr;
 	const real_t *m = &matrix[0][0];
 
 	tr.basis.elements[0][0] = m[0];
@@ -730,8 +730,8 @@ CameraMatrix::operator Transform() const {
 	return tr;
 }
 
-CameraMatrix::CameraMatrix(const Transform &p_transform) {
-	const Transform &tr = p_transform;
+CameraMatrix::CameraMatrix(const Transform3D &p_transform) {
+	const Transform3D &tr = p_transform;
 	real_t *m = &matrix[0][0];
 
 	m[0] = tr.basis.elements[0][0];
diff --git a/core/math/camera_matrix.h b/core/math/camera_matrix.h
index 3f327d3bc4..786d46055a 100644
--- a/core/math/camera_matrix.h
+++ b/core/math/camera_matrix.h
@@ -32,7 +32,7 @@
 #define CAMERA_MATRIX_H
 
 #include "core/math/rect2.h"
-#include "core/math/transform.h"
+#include "core/math/transform_3d.h"
 
 struct CameraMatrix {
 	enum Planes {
@@ -71,9 +71,9 @@ struct CameraMatrix {
 	real_t get_fov() const;
 	bool is_orthogonal() const;
 
-	Vector<Plane> get_projection_planes(const Transform &p_transform) const;
+	Vector<Plane> get_projection_planes(const Transform3D &p_transform) const;
 
-	bool get_endpoints(const Transform &p_transform, Vector3 *p_8points) const;
+	bool get_endpoints(const Transform3D &p_transform, Vector3 *p_8points) const;
 	Vector2 get_viewport_half_extents() const;
 	Vector2 get_far_plane_half_extents() const;
 
@@ -90,7 +90,7 @@ struct CameraMatrix {
 	void scale_translate_to_fit(const AABB &p_aabb);
 	void make_scale(const Vector3 &p_scale);
 	int get_pixels_per_meter(int p_for_pixel_width) const;
-	operator Transform() const;
+	operator Transform3D() const;
 
 	void flip_y();
 
@@ -112,7 +112,7 @@ struct CameraMatrix {
 	float get_lod_multiplier() const;
 
 	CameraMatrix();
-	CameraMatrix(const Transform &p_transform);
+	CameraMatrix(const Transform3D &p_transform);
 	~CameraMatrix();
 };
 
diff --git a/core/math/color.cpp b/core/math/color.cpp
index 64abd6dd08..dc86cacf8f 100644
--- a/core/math/color.cpp
+++ b/core/math/color.cpp
@@ -211,6 +211,14 @@ bool Color::is_equal_approx(const Color &p_color) const {
 	return Math::is_equal_approx(r, p_color.r) && Math::is_equal_approx(g, p_color.g) && Math::is_equal_approx(b, p_color.b) && Math::is_equal_approx(a, p_color.a);
 }
 
+Color Color::clamp(const Color &p_min, const Color &p_max) const {
+	return Color(
+			CLAMP(r, p_min.r, p_max.r),
+			CLAMP(g, p_min.g, p_max.g),
+			CLAMP(b, p_min.b, p_max.b),
+			CLAMP(a, p_min.a, p_max.a));
+}
+
 void Color::invert() {
 	r = 1.0 - r;
 	g = 1.0 - g;
@@ -360,7 +368,7 @@ Color Color::named(const String &p_name) {
 		ERR_FAIL_V_MSG(Color(), "Invalid color name: " + p_name + ".");
 		return Color();
 	}
-	return get_named_color(idx);
+	return named_colors[idx].color;
 }
 
 Color Color::named(const String &p_name, const Color &p_default) {
@@ -368,7 +376,7 @@ Color Color::named(const String &p_name, const Color &p_default) {
 	if (idx == -1) {
 		return p_default;
 	}
-	return get_named_color(idx);
+	return named_colors[idx].color;
 }
 
 int Color::find_named_color(const String &p_name) {
@@ -401,10 +409,12 @@ int Color::get_named_color_count() {
 }
 
 String Color::get_named_color_name(int p_idx) {
+	ERR_FAIL_INDEX_V(p_idx, get_named_color_count(), "");
 	return named_colors[p_idx].name;
 }
 
 Color Color::get_named_color(int p_idx) {
+	ERR_FAIL_INDEX_V(p_idx, get_named_color_count(), Color());
 	return named_colors[p_idx].color;
 }
 
@@ -458,7 +468,7 @@ Color Color::from_hsv(float p_h, float p_s, float p_v, float p_a) const {
 }
 
 Color::operator String() const {
-	return rtos(r) + ", " + rtos(g) + ", " + rtos(b) + ", " + rtos(a);
+	return "(" + String::num(r, 4) + ", " + String::num(g, 4) + ", " + String::num(b, 4) + ", " + String::num(a, 4) + ")";
 }
 
 Color Color::operator+(const Color &p_color) const {
diff --git a/core/math/color.h b/core/math/color.h
index e404d80c8a..a95dbf4f60 100644
--- a/core/math/color.h
+++ b/core/math/color.h
@@ -89,6 +89,7 @@ struct Color {
 
 	bool is_equal_approx(const Color &p_color) const;
 
+	Color clamp(const Color &p_min = Color(0, 0, 0, 0), const Color &p_max = Color(1, 1, 1, 1)) const;
 	void invert();
 	Color inverted() const;
 
diff --git a/core/math/convex_hull.cpp b/core/math/convex_hull.cpp
new file mode 100644
index 0000000000..682a7ea39e
--- /dev/null
+++ b/core/math/convex_hull.cpp
@@ -0,0 +1,2290 @@
+/*************************************************************************/
+/*  convex_hull.cpp                                                      */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+/*
+ * Based on Godot's patched VHACD-version of Bullet's btConvexHullComputer.
+ * See /thirdparty/vhacd/btConvexHullComputer.cpp at 64403ddcab9f1dca2408f0a412a22d899708bbb1
+ * In turn, based on /src/LinearMath/btConvexHullComputer.cpp in <https://github.com/bulletphysics/bullet3>
+ * at 73b217fb07e7e3ce126caeb28ab3c9ddd0718467
+ *
+ * Changes:
+ * - int32_t is consistently used instead of int in some cases
+ * - integrated patch db0d6c92927f5a1358b887f2645c11f3014f0e8a from Bullet (CWE-190 integer overflow in btConvexHullComputer)
+ * - adapted to Godot's code style
+ * - replaced Bullet's types (e.g. vectors) with Godot's
+ * - replaced custom Pool implementation with PagedAllocator
+ */
+
+/*
+Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "convex_hull.h"
+
+#include "core/error/error_macros.h"
+#include "core/math/aabb.h"
+#include "core/math/math_defs.h"
+#include "core/os/memory.h"
+#include "core/templates/paged_allocator.h"
+
+#include <string.h>
+
+//#define DEBUG_CONVEX_HULL
+//#define SHOW_ITERATIONS
+
+// -- GODOT start --
+// Assembly optimizations are not used at the moment.
+//#define USE_X86_64_ASM
+// -- GODOT end --
+
+#ifdef DEBUG_ENABLED
+#define CHULL_ASSERT(m_cond)                                     \
+	do {                                                         \
+		if (unlikely(!(m_cond))) {                               \
+			ERR_PRINT("Assertion \"" _STR(m_cond) "\" failed."); \
+		}                                                        \
+	} while (0)
+#else
+#define CHULL_ASSERT(m_cond) \
+	do {                     \
+	} while (0)
+#endif
+
+#if defined(DEBUG_CONVEX_HULL) || defined(SHOW_ITERATIONS)
+#include <stdio.h>
+#endif
+
+// Convex hull implementation based on Preparata and Hong
+// Ole Kniemeyer, MAXON Computer GmbH
+class ConvexHullInternal {
+public:
+	class Point64 {
+	public:
+		int64_t x;
+		int64_t y;
+		int64_t z;
+
+		Point64(int64_t p_x, int64_t p_y, int64_t p_z) {
+			x = p_x;
+			y = p_y;
+			z = p_z;
+		}
+
+		bool is_zero() {
+			return (x == 0) && (y == 0) && (z == 0);
+		}
+
+		int64_t dot(const Point64 &b) const {
+			return x * b.x + y * b.y + z * b.z;
+		}
+	};
+
+	class Point32 {
+	public:
+		int32_t x = 0;
+		int32_t y = 0;
+		int32_t z = 0;
+		int32_t index = -1;
+
+		Point32() {
+		}
+
+		Point32(int32_t p_x, int32_t p_y, int32_t p_z) {
+			x = p_x;
+			y = p_y;
+			z = p_z;
+		}
+
+		bool operator==(const Point32 &b) const {
+			return (x == b.x) && (y == b.y) && (z == b.z);
+		}
+
+		bool operator!=(const Point32 &b) const {
+			return (x != b.x) || (y != b.y) || (z != b.z);
+		}
+
+		bool is_zero() {
+			return (x == 0) && (y == 0) && (z == 0);
+		}
+
+		Point64 cross(const Point32 &b) const {
+			return Point64((int64_t)y * b.z - (int64_t)z * b.y, (int64_t)z * b.x - (int64_t)x * b.z, (int64_t)x * b.y - (int64_t)y * b.x);
+		}
+
+		Point64 cross(const Point64 &b) const {
+			return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x);
+		}
+
+		int64_t dot(const Point32 &b) const {
+			return (int64_t)x * b.x + (int64_t)y * b.y + (int64_t)z * b.z;
+		}
+
+		int64_t dot(const Point64 &b) const {
+			return x * b.x + y * b.y + z * b.z;
+		}
+
+		Point32 operator+(const Point32 &b) const {
+			return Point32(x + b.x, y + b.y, z + b.z);
+		}
+
+		Point32 operator-(const Point32 &b) const {
+			return Point32(x - b.x, y - b.y, z - b.z);
+		}
+	};
+
+	class Int128 {
+	public:
+		uint64_t low = 0;
+		uint64_t high = 0;
+
+		Int128() {
+		}
+
+		Int128(uint64_t p_low, uint64_t p_high) {
+			low = p_low;
+			high = p_high;
+		}
+
+		Int128(uint64_t p_low) {
+			low = p_low;
+			high = 0;
+		}
+
+		Int128(int64_t p_value) {
+			low = p_value;
+			if (p_value >= 0) {
+				high = 0;
+			} else {
+				high = (uint64_t)-1LL;
+			}
+		}
+
+		static Int128 mul(int64_t a, int64_t b);
+
+		static Int128 mul(uint64_t a, uint64_t b);
+
+		Int128 operator-() const {
+			return Int128((uint64_t) - (int64_t)low, ~high + (low == 0));
+		}
+
+		Int128 operator+(const Int128 &b) const {
+#ifdef USE_X86_64_ASM
+			Int128 result;
+			__asm__("addq %[bl], %[rl]\n\t"
+					"adcq %[bh], %[rh]\n\t"
+					: [rl] "=r"(result.low), [rh] "=r"(result.high)
+					: "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
+					: "cc");
+			return result;
+#else
+			uint64_t lo = low + b.low;
+			return Int128(lo, high + b.high + (lo < low));
+#endif
+		}
+
+		Int128 operator-(const Int128 &b) const {
+#ifdef USE_X86_64_ASM
+			Int128 result;
+			__asm__("subq %[bl], %[rl]\n\t"
+					"sbbq %[bh], %[rh]\n\t"
+					: [rl] "=r"(result.low), [rh] "=r"(result.high)
+					: "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
+					: "cc");
+			return result;
+#else
+			return *this + -b;
+#endif
+		}
+
+		Int128 &operator+=(const Int128 &b) {
+#ifdef USE_X86_64_ASM
+			__asm__("addq %[bl], %[rl]\n\t"
+					"adcq %[bh], %[rh]\n\t"
+					: [rl] "=r"(low), [rh] "=r"(high)
+					: "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
+					: "cc");
+#else
+			uint64_t lo = low + b.low;
+			if (lo < low) {
+				++high;
+			}
+			low = lo;
+			high += b.high;
+#endif
+			return *this;
+		}
+
+		Int128 &operator++() {
+			if (++low == 0) {
+				++high;
+			}
+			return *this;
+		}
+
+		Int128 operator*(int64_t b) const;
+
+		real_t to_scalar() const {
+			return ((int64_t)high >= 0) ? real_t(high) * (real_t(0x100000000LL) * real_t(0x100000000LL)) + real_t(low) : -(-*this).to_scalar();
+		}
+
+		int32_t get_sign() const {
+			return ((int64_t)high < 0) ? -1 : (high || low) ? 1 :
+																0;
+		}
+
+		bool operator<(const Int128 &b) const {
+			return (high < b.high) || ((high == b.high) && (low < b.low));
+		}
+
+		int32_t ucmp(const Int128 &b) const {
+			if (high < b.high) {
+				return -1;
+			}
+			if (high > b.high) {
+				return 1;
+			}
+			if (low < b.low) {
+				return -1;
+			}
+			if (low > b.low) {
+				return 1;
+			}
+			return 0;
+		}
+	};
+
+	class Rational64 {
+	private:
+		uint64_t numerator;
+		uint64_t denominator;
+		int32_t sign;
+
+	public:
+		Rational64(int64_t p_numerator, int64_t p_denominator) {
+			if (p_numerator > 0) {
+				sign = 1;
+				numerator = (uint64_t)p_numerator;
+			} else if (p_numerator < 0) {
+				sign = -1;
+				numerator = (uint64_t)-p_numerator;
+			} else {
+				sign = 0;
+				numerator = 0;
+			}
+			if (p_denominator > 0) {
+				denominator = (uint64_t)p_denominator;
+			} else if (p_denominator < 0) {
+				sign = -sign;
+				denominator = (uint64_t)-p_denominator;
+			} else {
+				denominator = 0;
+			}
+		}
+
+		bool is_negative_infinity() const {
+			return (sign < 0) && (denominator == 0);
+		}
+
+		bool is_nan() const {
+			return (sign == 0) && (denominator == 0);
+		}
+
+		int32_t compare(const Rational64 &b) const;
+
+		real_t to_scalar() const {
+			return sign * ((denominator == 0) ? FLT_MAX : (real_t)numerator / denominator);
+		}
+	};
+
+	class Rational128 {
+	private:
+		Int128 numerator;
+		Int128 denominator;
+		int32_t sign;
+		bool is_int_64;
+
+	public:
+		Rational128(int64_t p_value) {
+			if (p_value > 0) {
+				sign = 1;
+				this->numerator = p_value;
+			} else if (p_value < 0) {
+				sign = -1;
+				this->numerator = -p_value;
+			} else {
+				sign = 0;
+				this->numerator = (uint64_t)0;
+			}
+			this->denominator = (uint64_t)1;
+			is_int_64 = true;
+		}
+
+		Rational128(const Int128 &p_numerator, const Int128 &p_denominator) {
+			sign = p_numerator.get_sign();
+			if (sign >= 0) {
+				this->numerator = p_numerator;
+			} else {
+				this->numerator = -p_numerator;
+			}
+			int32_t dsign = p_denominator.get_sign();
+			if (dsign >= 0) {
+				this->denominator = p_denominator;
+			} else {
+				sign = -sign;
+				this->denominator = -p_denominator;
+			}
+			is_int_64 = false;
+		}
+
+		int32_t compare(const Rational128 &b) const;
+
+		int32_t compare(int64_t b) const;
+
+		real_t to_scalar() const {
+			return sign * ((denominator.get_sign() == 0) ? FLT_MAX : numerator.to_scalar() / denominator.to_scalar());
+		}
+	};
+
+	class PointR128 {
+	public:
+		Int128 x;
+		Int128 y;
+		Int128 z;
+		Int128 denominator;
+
+		PointR128() {
+		}
+
+		PointR128(Int128 p_x, Int128 p_y, Int128 p_z, Int128 p_denominator) {
+			x = p_x;
+			y = p_y;
+			z = p_z;
+			denominator = p_denominator;
+		}
+
+		real_t xvalue() const {
+			return x.to_scalar() / denominator.to_scalar();
+		}
+
+		real_t yvalue() const {
+			return y.to_scalar() / denominator.to_scalar();
+		}
+
+		real_t zvalue() const {
+			return z.to_scalar() / denominator.to_scalar();
+		}
+	};
+
+	class Edge;
+	class Face;
+
+	class Vertex {
+	public:
+		Vertex *next = nullptr;
+		Vertex *prev = nullptr;
+		Edge *edges = nullptr;
+		Face *first_nearby_face = nullptr;
+		Face *last_nearby_face = nullptr;
+		PointR128 point128;
+		Point32 point;
+		int32_t copy = -1;
+
+		Vertex() {
+		}
+
+#ifdef DEBUG_CONVEX_HULL
+		void print() {
+			printf("V%d (%d, %d, %d)", point.index, point.x, point.y, point.z);
+		}
+
+		void print_graph();
+#endif
+
+		Point32 operator-(const Vertex &b) const {
+			return point - b.point;
+		}
+
+		Rational128 dot(const Point64 &b) const {
+			return (point.index >= 0) ? Rational128(point.dot(b)) : Rational128(point128.x * b.x + point128.y * b.y + point128.z * b.z, point128.denominator);
+		}
+
+		real_t xvalue() const {
+			return (point.index >= 0) ? real_t(point.x) : point128.xvalue();
+		}
+
+		real_t yvalue() const {
+			return (point.index >= 0) ? real_t(point.y) : point128.yvalue();
+		}
+
+		real_t zvalue() const {
+			return (point.index >= 0) ? real_t(point.z) : point128.zvalue();
+		}
+
+		void receive_nearby_faces(Vertex *p_src) {
+			if (last_nearby_face) {
+				last_nearby_face->next_with_same_nearby_vertex = p_src->first_nearby_face;
+			} else {
+				first_nearby_face = p_src->first_nearby_face;
+			}
+			if (p_src->last_nearby_face) {
+				last_nearby_face = p_src->last_nearby_face;
+			}
+			for (Face *f = p_src->first_nearby_face; f; f = f->next_with_same_nearby_vertex) {
+				CHULL_ASSERT(f->nearby_vertex == p_src);
+				f->nearby_vertex = this;
+			}
+			p_src->first_nearby_face = nullptr;
+			p_src->last_nearby_face = nullptr;
+		}
+	};
+
+	class Edge {
+	public:
+		Edge *next = nullptr;
+		Edge *prev = nullptr;
+		Edge *reverse = nullptr;
+		Vertex *target = nullptr;
+		Face *face = nullptr;
+		int32_t copy = -1;
+
+		void link(Edge *n) {
+			CHULL_ASSERT(reverse->target == n->reverse->target);
+			next = n;
+			n->prev = this;
+		}
+
+#ifdef DEBUG_CONVEX_HULL
+		void print() {
+			printf("E%p : %d -> %d,  n=%p p=%p   (0 %d\t%d\t%d) -> (%d %d %d)", this, reverse->target->point.index, target->point.index, next, prev,
+					reverse->target->point.x, reverse->target->point.y, reverse->target->point.z, target->point.x, target->point.y, target->point.z);
+		}
+#endif
+	};
+
+	class Face {
+	public:
+		Face *next = nullptr;
+		Vertex *nearby_vertex = nullptr;
+		Face *next_with_same_nearby_vertex = nullptr;
+		Point32 origin;
+		Point32 dir0;
+		Point32 dir1;
+
+		Face() {
+		}
+
+		void init(Vertex *p_a, Vertex *p_b, Vertex *p_c) {
+			nearby_vertex = p_a;
+			origin = p_a->point;
+			dir0 = *p_b - *p_a;
+			dir1 = *p_c - *p_a;
+			if (p_a->last_nearby_face) {
+				p_a->last_nearby_face->next_with_same_nearby_vertex = this;
+			} else {
+				p_a->first_nearby_face = this;
+			}
+			p_a->last_nearby_face = this;
+		}
+
+		Point64 get_normal() {
+			return dir0.cross(dir1);
+		}
+	};
+
+	template <typename UWord, typename UHWord>
+	class DMul {
+	private:
+		static uint32_t high(uint64_t p_value) {
+			return (uint32_t)(p_value >> 32);
+		}
+
+		static uint32_t low(uint64_t p_value) {
+			return (uint32_t)p_value;
+		}
+
+		static uint64_t mul(uint32_t a, uint32_t b) {
+			return (uint64_t)a * (uint64_t)b;
+		}
+
+		static void shl_half(uint64_t &p_value) {
+			p_value <<= 32;
+		}
+
+		static uint64_t high(Int128 p_value) {
+			return p_value.high;
+		}
+
+		static uint64_t low(Int128 p_value) {
+			return p_value.low;
+		}
+
+		static Int128 mul(uint64_t a, uint64_t b) {
+			return Int128::mul(a, b);
+		}
+
+		static void shl_half(Int128 &p_value) {
+			p_value.high = p_value.low;
+			p_value.low = 0;
+		}
+
+	public:
+		static void mul(UWord p_a, UWord p_b, UWord &r_low, UWord &r_high) {
+			UWord p00 = mul(low(p_a), low(p_b));
+			UWord p01 = mul(low(p_a), high(p_b));
+			UWord p10 = mul(high(p_a), low(p_b));
+			UWord p11 = mul(high(p_a), high(p_b));
+			UWord p0110 = UWord(low(p01)) + UWord(low(p10));
+			p11 += high(p01);
+			p11 += high(p10);
+			p11 += high(p0110);
+			shl_half(p0110);
+			p00 += p0110;
+			if (p00 < p0110) {
+				++p11;
+			}
+			r_low = p00;
+			r_high = p11;
+		}
+	};
+
+private:
+	class IntermediateHull {
+	public:
+		Vertex *min_xy = nullptr;
+		Vertex *max_xy = nullptr;
+		Vertex *min_yx = nullptr;
+		Vertex *max_yx = nullptr;
+
+		IntermediateHull() {
+		}
+
+		void print();
+	};
+
+	enum Orientation { NONE,
+		CLOCKWISE,
+		COUNTER_CLOCKWISE };
+
+	Vector3 scaling;
+	Vector3 center;
+	PagedAllocator<Vertex> vertex_pool;
+	PagedAllocator<Edge> edge_pool;
+	PagedAllocator<Face> face_pool;
+	LocalVector<Vertex *> original_vertices;
+	int32_t merge_stamp = 0;
+	int32_t min_axis = 0;
+	int32_t med_axis = 0;
+	int32_t max_axis = 0;
+	int32_t used_edge_pairs = 0;
+	int32_t max_used_edge_pairs = 0;
+
+	static Orientation get_orientation(const Edge *p_prev, const Edge *p_next, const Point32 &p_s, const Point32 &p_t);
+	Edge *find_max_angle(bool p_ccw, const Vertex *p_start, const Point32 &p_s, const Point64 &p_rxs, const Point64 &p_ssxrxs, Rational64 &p_min_cot);
+	void find_edge_for_coplanar_faces(Vertex *p_c0, Vertex *p_c1, Edge *&p_e0, Edge *&p_e1, Vertex *p_stop0, Vertex *p_stop1);
+
+	Edge *new_edge_pair(Vertex *p_from, Vertex *p_to);
+
+	void remove_edge_pair(Edge *p_edge) {
+		Edge *n = p_edge->next;
+		Edge *r = p_edge->reverse;
+
+		CHULL_ASSERT(p_edge->target && r->target);
+
+		if (n != p_edge) {
+			n->prev = p_edge->prev;
+			p_edge->prev->next = n;
+			r->target->edges = n;
+		} else {
+			r->target->edges = nullptr;
+		}
+
+		n = r->next;
+
+		if (n != r) {
+			n->prev = r->prev;
+			r->prev->next = n;
+			p_edge->target->edges = n;
+		} else {
+			p_edge->target->edges = nullptr;
+		}
+
+		edge_pool.free(p_edge);
+		edge_pool.free(r);
+		used_edge_pairs--;
+	}
+
+	void compute_internal(int32_t p_start, int32_t p_end, IntermediateHull &r_result);
+
+	bool merge_projection(IntermediateHull &p_h0, IntermediateHull &p_h1, Vertex *&r_c0, Vertex *&r_c1);
+
+	void merge(IntermediateHull &p_h0, IntermediateHull &p_h1);
+
+	Vector3 to_gd_vector(const Point32 &p_v);
+
+	Vector3 get_gd_normal(Face *p_face);
+
+	bool shift_face(Face *p_face, real_t p_amount, LocalVector<Vertex *> p_stack);
+
+public:
+	~ConvexHullInternal() {
+		vertex_pool.reset(true);
+		edge_pool.reset(true);
+		face_pool.reset(true);
+	}
+
+	Vertex *vertex_list;
+
+	void compute(const Vector3 *p_coords, int32_t p_count);
+
+	Vector3 get_coordinates(const Vertex *p_v);
+
+	real_t shrink(real_t amount, real_t p_clamp_amount);
+};
+
+ConvexHullInternal::Int128 ConvexHullInternal::Int128::operator*(int64_t b) const {
+	bool negative = (int64_t)high < 0;
+	Int128 a = negative ? -*this : *this;
+	if (b < 0) {
+		negative = !negative;
+		b = -b;
+	}
+	Int128 result = mul(a.low, (uint64_t)b);
+	result.high += a.high * (uint64_t)b;
+	return negative ? -result : result;
+}
+
+ConvexHullInternal::Int128 ConvexHullInternal::Int128::mul(int64_t a, int64_t b) {
+	Int128 result;
+
+#ifdef USE_X86_64_ASM
+	__asm__("imulq %[b]"
+			: "=a"(result.low), "=d"(result.high)
+			: "0"(a), [b] "r"(b)
+			: "cc");
+	return result;
+
+#else
+	bool negative = a < 0;
+	if (negative) {
+		a = -a;
+	}
+	if (b < 0) {
+		negative = !negative;
+		b = -b;
+	}
+	DMul<uint64_t, uint32_t>::mul((uint64_t)a, (uint64_t)b, result.low, result.high);
+	return negative ? -result : result;
+#endif
+}
+
+ConvexHullInternal::Int128 ConvexHullInternal::Int128::mul(uint64_t a, uint64_t b) {
+	Int128 result;
+
+#ifdef USE_X86_64_ASM
+	__asm__("mulq %[b]"
+			: "=a"(result.low), "=d"(result.high)
+			: "0"(a), [b] "r"(b)
+			: "cc");
+
+#else
+	DMul<uint64_t, uint32_t>::mul(a, b, result.low, result.high);
+#endif
+
+	return result;
+}
+
+int32_t ConvexHullInternal::Rational64::compare(const Rational64 &b) const {
+	if (sign != b.sign) {
+		return sign - b.sign;
+	} else if (sign == 0) {
+		return 0;
+	}
+
+	//	return (numerator * b.denominator > b.numerator * denominator) ? sign : (numerator * b.denominator < b.numerator * denominator) ? -sign : 0;
+
+#ifdef USE_X86_64_ASM
+
+	int32_t result;
+	int64_t tmp;
+	int64_t dummy;
+	__asm__("mulq %[bn]\n\t"
+			"movq %%rax, %[tmp]\n\t"
+			"movq %%rdx, %%rbx\n\t"
+			"movq %[tn], %%rax\n\t"
+			"mulq %[bd]\n\t"
+			"subq %[tmp], %%rax\n\t"
+			"sbbq %%rbx, %%rdx\n\t" // rdx:rax contains 128-bit-difference "numerator*b.denominator - b.numerator*denominator"
+			"setnsb %%bh\n\t" // bh=1 if difference is non-negative, bh=0 otherwise
+			"orq %%rdx, %%rax\n\t"
+			"setnzb %%bl\n\t" // bl=1 if difference if non-zero, bl=0 if it is zero
+			"decb %%bh\n\t" // now bx=0x0000 if difference is zero, 0xff01 if it is negative, 0x0001 if it is positive (i.e., same sign as difference)
+			"shll $16, %%ebx\n\t" // ebx has same sign as difference
+			: "=&b"(result), [tmp] "=&r"(tmp), "=a"(dummy)
+			: "a"(denominator), [bn] "g"(b.numerator), [tn] "g"(numerator), [bd] "g"(b.denominator)
+			: "%rdx", "cc");
+	return result ? result ^ sign // if sign is +1, only bit 0 of result is inverted, which does not change the sign of result (and cannot result in zero)
+					// if sign is -1, all bits of result are inverted, which changes the sign of result (and again cannot result in zero)
+					:
+					  0;
+
+#else
+
+	return sign * Int128::mul(numerator, b.denominator).ucmp(Int128::mul(denominator, b.numerator));
+
+#endif
+}
+
+int32_t ConvexHullInternal::Rational128::compare(const Rational128 &b) const {
+	if (sign != b.sign) {
+		return sign - b.sign;
+	} else if (sign == 0) {
+		return 0;
+	}
+	if (is_int_64) {
+		return -b.compare(sign * (int64_t)numerator.low);
+	}
+
+	Int128 nbd_low, nbd_high, dbn_low, dbn_high;
+	DMul<Int128, uint64_t>::mul(numerator, b.denominator, nbd_low, nbd_high);
+	DMul<Int128, uint64_t>::mul(denominator, b.numerator, dbn_low, dbn_high);
+
+	int32_t cmp = nbd_high.ucmp(dbn_high);
+	if (cmp) {
+		return cmp * sign;
+	}
+	return nbd_low.ucmp(dbn_low) * sign;
+}
+
+int32_t ConvexHullInternal::Rational128::compare(int64_t b) const {
+	if (is_int_64) {
+		int64_t a = sign * (int64_t)numerator.low;
+		return (a > b) ? 1 : (a < b) ? -1 :
+										 0;
+	}
+	if (b > 0) {
+		if (sign <= 0) {
+			return -1;
+		}
+	} else if (b < 0) {
+		if (sign >= 0) {
+			return 1;
+		}
+		b = -b;
+	} else {
+		return sign;
+	}
+
+	return numerator.ucmp(denominator * b) * sign;
+}
+
+ConvexHullInternal::Edge *ConvexHullInternal::new_edge_pair(Vertex *p_from, Vertex *p_to) {
+	CHULL_ASSERT(p_from && p_to);
+	Edge *e = edge_pool.alloc();
+	Edge *r = edge_pool.alloc();
+	e->reverse = r;
+	r->reverse = e;
+	e->copy = merge_stamp;
+	r->copy = merge_stamp;
+	e->target = p_to;
+	r->target = p_from;
+	e->face = nullptr;
+	r->face = nullptr;
+	used_edge_pairs++;
+	if (used_edge_pairs > max_used_edge_pairs) {
+		max_used_edge_pairs = used_edge_pairs;
+	}
+	return e;
+}
+
+bool ConvexHullInternal::merge_projection(IntermediateHull &r_h0, IntermediateHull &r_h1, Vertex *&r_c0, Vertex *&r_c1) {
+	Vertex *v0 = r_h0.max_yx;
+	Vertex *v1 = r_h1.min_yx;
+	if ((v0->point.x == v1->point.x) && (v0->point.y == v1->point.y)) {
+		CHULL_ASSERT(v0->point.z < v1->point.z);
+		Vertex *v1p = v1->prev;
+		if (v1p == v1) {
+			r_c0 = v0;
+			if (v1->edges) {
+				CHULL_ASSERT(v1->edges->next == v1->edges);
+				v1 = v1->edges->target;
+				CHULL_ASSERT(v1->edges->next == v1->edges);
+			}
+			r_c1 = v1;
+			return false;
+		}
+		Vertex *v1n = v1->next;
+		v1p->next = v1n;
+		v1n->prev = v1p;
+		if (v1 == r_h1.min_xy) {
+			if ((v1n->point.x < v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y < v1p->point.y))) {
+				r_h1.min_xy = v1n;
+			} else {
+				r_h1.min_xy = v1p;
+			}
+		}
+		if (v1 == r_h1.max_xy) {
+			if ((v1n->point.x > v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y > v1p->point.y))) {
+				r_h1.max_xy = v1n;
+			} else {
+				r_h1.max_xy = v1p;
+			}
+		}
+	}
+
+	v0 = r_h0.max_xy;
+	v1 = r_h1.max_xy;
+	Vertex *v00 = nullptr;
+	Vertex *v10 = nullptr;
+	int32_t sign = 1;
+
+	for (int32_t side = 0; side <= 1; side++) {
+		int32_t dx = (v1->point.x - v0->point.x) * sign;
+		if (dx > 0) {
+			while (true) {
+				int32_t dy = v1->point.y - v0->point.y;
+
+				Vertex *w0 = side ? v0->next : v0->prev;
+				if (w0 != v0) {
+					int32_t dx0 = (w0->point.x - v0->point.x) * sign;
+					int32_t dy0 = w0->point.y - v0->point.y;
+					if ((dy0 <= 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx <= dy * dx0)))) {
+						v0 = w0;
+						dx = (v1->point.x - v0->point.x) * sign;
+						continue;
+					}
+				}
+
+				Vertex *w1 = side ? v1->next : v1->prev;
+				if (w1 != v1) {
+					int32_t dx1 = (w1->point.x - v1->point.x) * sign;
+					int32_t dy1 = w1->point.y - v1->point.y;
+					int32_t dxn = (w1->point.x - v0->point.x) * sign;
+					if ((dxn > 0) && (dy1 < 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx < dy * dx1)))) {
+						v1 = w1;
+						dx = dxn;
+						continue;
+					}
+				}
+
+				break;
+			}
+		} else if (dx < 0) {
+			while (true) {
+				int32_t dy = v1->point.y - v0->point.y;
+
+				Vertex *w1 = side ? v1->prev : v1->next;
+				if (w1 != v1) {
+					int32_t dx1 = (w1->point.x - v1->point.x) * sign;
+					int32_t dy1 = w1->point.y - v1->point.y;
+					if ((dy1 >= 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx <= dy * dx1)))) {
+						v1 = w1;
+						dx = (v1->point.x - v0->point.x) * sign;
+						continue;
+					}
+				}
+
+				Vertex *w0 = side ? v0->prev : v0->next;
+				if (w0 != v0) {
+					int32_t dx0 = (w0->point.x - v0->point.x) * sign;
+					int32_t dy0 = w0->point.y - v0->point.y;
+					int32_t dxn = (v1->point.x - w0->point.x) * sign;
+					if ((dxn < 0) && (dy0 > 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx < dy * dx0)))) {
+						v0 = w0;
+						dx = dxn;
+						continue;
+					}
+				}
+
+				break;
+			}
+		} else {
+			int32_t x = v0->point.x;
+			int32_t y0 = v0->point.y;
+			Vertex *w0 = v0;
+			Vertex *t;
+			while (((t = side ? w0->next : w0->prev) != v0) && (t->point.x == x) && (t->point.y <= y0)) {
+				w0 = t;
+				y0 = t->point.y;
+			}
+			v0 = w0;
+
+			int32_t y1 = v1->point.y;
+			Vertex *w1 = v1;
+			while (((t = side ? w1->prev : w1->next) != v1) && (t->point.x == x) && (t->point.y >= y1)) {
+				w1 = t;
+				y1 = t->point.y;
+			}
+			v1 = w1;
+		}
+
+		if (side == 0) {
+			v00 = v0;
+			v10 = v1;
+
+			v0 = r_h0.min_xy;
+			v1 = r_h1.min_xy;
+			sign = -1;
+		}
+	}
+
+	v0->prev = v1;
+	v1->next = v0;
+
+	v00->next = v10;
+	v10->prev = v00;
+
+	if (r_h1.min_xy->point.x < r_h0.min_xy->point.x) {
+		r_h0.min_xy = r_h1.min_xy;
+	}
+	if (r_h1.max_xy->point.x >= r_h0.max_xy->point.x) {
+		r_h0.max_xy = r_h1.max_xy;
+	}
+
+	r_h0.max_yx = r_h1.max_yx;
+
+	r_c0 = v00;
+	r_c1 = v10;
+
+	return true;
+}
+
+void ConvexHullInternal::compute_internal(int32_t p_start, int32_t p_end, IntermediateHull &r_result) {
+	int32_t n = p_end - p_start;
+	switch (n) {
+		case 0:
+			r_result.min_xy = nullptr;
+			r_result.max_xy = nullptr;
+			r_result.min_yx = nullptr;
+			r_result.max_yx = nullptr;
+			return;
+		case 2: {
+			Vertex *v = original_vertices[p_start];
+			Vertex *w = original_vertices[p_start + 1];
+			if (v->point != w->point) {
+				int32_t dx = v->point.x - w->point.x;
+				int32_t dy = v->point.y - w->point.y;
+
+				if ((dx == 0) && (dy == 0)) {
+					if (v->point.z > w->point.z) {
+						Vertex *t = w;
+						w = v;
+						v = t;
+					}
+					CHULL_ASSERT(v->point.z < w->point.z);
+					v->next = v;
+					v->prev = v;
+					r_result.min_xy = v;
+					r_result.max_xy = v;
+					r_result.min_yx = v;
+					r_result.max_yx = v;
+				} else {
+					v->next = w;
+					v->prev = w;
+					w->next = v;
+					w->prev = v;
+
+					if ((dx < 0) || ((dx == 0) && (dy < 0))) {
+						r_result.min_xy = v;
+						r_result.max_xy = w;
+					} else {
+						r_result.min_xy = w;
+						r_result.max_xy = v;
+					}
+
+					if ((dy < 0) || ((dy == 0) && (dx < 0))) {
+						r_result.min_yx = v;
+						r_result.max_yx = w;
+					} else {
+						r_result.min_yx = w;
+						r_result.max_yx = v;
+					}
+				}
+
+				Edge *e = new_edge_pair(v, w);
+				e->link(e);
+				v->edges = e;
+
+				e = e->reverse;
+				e->link(e);
+				w->edges = e;
+
+				return;
+			}
+			[[fallthrough]];
+		}
+		case 1: {
+			Vertex *v = original_vertices[p_start];
+			v->edges = nullptr;
+			v->next = v;
+			v->prev = v;
+
+			r_result.min_xy = v;
+			r_result.max_xy = v;
+			r_result.min_yx = v;
+			r_result.max_yx = v;
+
+			return;
+		}
+	}
+
+	int32_t split0 = p_start + n / 2;
+	Point32 p = original_vertices[split0 - 1]->point;
+	int32_t split1 = split0;
+	while ((split1 < p_end) && (original_vertices[split1]->point == p)) {
+		split1++;
+	}
+	compute_internal(p_start, split0, r_result);
+	IntermediateHull hull1;
+	compute_internal(split1, p_end, hull1);
+#ifdef DEBUG_CONVEX_HULL
+	printf("\n\nMerge\n");
+	r_result.print();
+	hull1.print();
+#endif
+	merge(r_result, hull1);
+#ifdef DEBUG_CONVEX_HULL
+	printf("\n  Result\n");
+	r_result.print();
+#endif
+}
+
+#ifdef DEBUG_CONVEX_HULL
+void ConvexHullInternal::IntermediateHull::print() {
+	printf("    Hull\n");
+	for (Vertex *v = min_xy; v;) {
+		printf("      ");
+		v->print();
+		if (v == max_xy) {
+			printf(" max_xy");
+		}
+		if (v == min_yx) {
+			printf(" min_yx");
+		}
+		if (v == max_yx) {
+			printf(" max_yx");
+		}
+		if (v->next->prev != v) {
+			printf(" Inconsistency");
+		}
+		printf("\n");
+		v = v->next;
+		if (v == min_xy) {
+			break;
+		}
+	}
+	if (min_xy) {
+		min_xy->copy = (min_xy->copy == -1) ? -2 : -1;
+		min_xy->print_graph();
+	}
+}
+
+void ConvexHullInternal::Vertex::print_graph() {
+	print();
+	printf("\nEdges\n");
+	Edge *e = edges;
+	if (e) {
+		do {
+			e->print();
+			printf("\n");
+			e = e->next;
+		} while (e != edges);
+		do {
+			Vertex *v = e->target;
+			if (v->copy != copy) {
+				v->copy = copy;
+				v->print_graph();
+			}
+			e = e->next;
+		} while (e != edges);
+	}
+}
+#endif
+
+ConvexHullInternal::Orientation ConvexHullInternal::get_orientation(const Edge *p_prev, const Edge *p_next, const Point32 &p_s, const Point32 &p_t) {
+	CHULL_ASSERT(p_prev->reverse->target == p_next->reverse->target);
+	if (p_prev->next == p_next) {
+		if (p_prev->prev == p_next) {
+			Point64 n = p_t.cross(p_s);
+			Point64 m = (*p_prev->target - *p_next->reverse->target).cross(*p_next->target - *p_next->reverse->target);
+			CHULL_ASSERT(!m.is_zero());
+			int64_t dot = n.dot(m);
+			CHULL_ASSERT(dot != 0);
+			return (dot > 0) ? COUNTER_CLOCKWISE : CLOCKWISE;
+		}
+		return COUNTER_CLOCKWISE;
+	} else if (p_prev->prev == p_next) {
+		return CLOCKWISE;
+	} else {
+		return NONE;
+	}
+}
+
+ConvexHullInternal::Edge *ConvexHullInternal::find_max_angle(bool p_ccw, const Vertex *p_start, const Point32 &p_s, const Point64 &p_rxs, const Point64 &p_sxrxs, Rational64 &p_min_cot) {
+	Edge *min_edge = nullptr;
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("find max edge for %d\n", p_start->point.index);
+#endif
+	Edge *e = p_start->edges;
+	if (e) {
+		do {
+			if (e->copy > merge_stamp) {
+				Point32 t = *e->target - *p_start;
+				Rational64 cot(t.dot(p_sxrxs), t.dot(p_rxs));
+#ifdef DEBUG_CONVEX_HULL
+				printf("      Angle is %f (%d) for ", Math::atan(cot.to_scalar()), (int32_t)cot.is_nan());
+				e->print();
+#endif
+				if (cot.is_nan()) {
+					CHULL_ASSERT(p_ccw ? (t.dot(p_s) < 0) : (t.dot(p_s) > 0));
+				} else {
+					int32_t cmp;
+					if (min_edge == nullptr) {
+						p_min_cot = cot;
+						min_edge = e;
+					} else if ((cmp = cot.compare(p_min_cot)) < 0) {
+						p_min_cot = cot;
+						min_edge = e;
+					} else if ((cmp == 0) && (p_ccw == (get_orientation(min_edge, e, p_s, t) == COUNTER_CLOCKWISE))) {
+						min_edge = e;
+					}
+				}
+#ifdef DEBUG_CONVEX_HULL
+				printf("\n");
+#endif
+			}
+			e = e->next;
+		} while (e != p_start->edges);
+	}
+	return min_edge;
+}
+
+void ConvexHullInternal::find_edge_for_coplanar_faces(Vertex *p_c0, Vertex *p_c1, Edge *&p_e0, Edge *&p_e1, Vertex *p_stop0, Vertex *p_stop1) {
+	Edge *start0 = p_e0;
+	Edge *start1 = p_e1;
+	Point32 et0 = start0 ? start0->target->point : p_c0->point;
+	Point32 et1 = start1 ? start1->target->point : p_c1->point;
+	Point32 s = p_c1->point - p_c0->point;
+	Point64 normal = ((start0 ? start0 : start1)->target->point - p_c0->point).cross(s);
+	int64_t dist = p_c0->point.dot(normal);
+	CHULL_ASSERT(!start1 || (start1->target->point.dot(normal) == dist));
+	Point64 perp = s.cross(normal);
+	CHULL_ASSERT(!perp.is_zero());
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("   Advancing %d %d  (%p %p, %d %d)\n", p_c0->point.index, p_c1->point.index, start0, start1, start0 ? start0->target->point.index : -1, start1 ? start1->target->point.index : -1);
+#endif
+
+	int64_t max_dot0 = et0.dot(perp);
+	if (p_e0) {
+		while (p_e0->target != p_stop0) {
+			Edge *e = p_e0->reverse->prev;
+			if (e->target->point.dot(normal) < dist) {
+				break;
+			}
+			CHULL_ASSERT(e->target->point.dot(normal) == dist);
+			if (e->copy == merge_stamp) {
+				break;
+			}
+			int64_t dot = e->target->point.dot(perp);
+			if (dot <= max_dot0) {
+				break;
+			}
+			max_dot0 = dot;
+			p_e0 = e;
+			et0 = e->target->point;
+		}
+	}
+
+	int64_t max_dot1 = et1.dot(perp);
+	if (p_e1) {
+		while (p_e1->target != p_stop1) {
+			Edge *e = p_e1->reverse->next;
+			if (e->target->point.dot(normal) < dist) {
+				break;
+			}
+			CHULL_ASSERT(e->target->point.dot(normal) == dist);
+			if (e->copy == merge_stamp) {
+				break;
+			}
+			int64_t dot = e->target->point.dot(perp);
+			if (dot <= max_dot1) {
+				break;
+			}
+			max_dot1 = dot;
+			p_e1 = e;
+			et1 = e->target->point;
+		}
+	}
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("   Starting at %d %d\n", et0.index, et1.index);
+#endif
+
+	int64_t dx = max_dot1 - max_dot0;
+	if (dx > 0) {
+		while (true) {
+			int64_t dy = (et1 - et0).dot(s);
+
+			if (p_e0 && (p_e0->target != p_stop0)) {
+				Edge *f0 = p_e0->next->reverse;
+				if (f0->copy > merge_stamp) {
+					int64_t dx0 = (f0->target->point - et0).dot(perp);
+					int64_t dy0 = (f0->target->point - et0).dot(s);
+					if ((dx0 == 0) ? (dy0 < 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) >= 0))) {
+						et0 = f0->target->point;
+						dx = (et1 - et0).dot(perp);
+						p_e0 = (p_e0 == start0) ? nullptr : f0;
+						continue;
+					}
+				}
+			}
+
+			if (p_e1 && (p_e1->target != p_stop1)) {
+				Edge *f1 = p_e1->reverse->next;
+				if (f1->copy > merge_stamp) {
+					Point32 d1 = f1->target->point - et1;
+					if (d1.dot(normal) == 0) {
+						int64_t dx1 = d1.dot(perp);
+						int64_t dy1 = d1.dot(s);
+						int64_t dxn = (f1->target->point - et0).dot(perp);
+						if ((dxn > 0) && ((dx1 == 0) ? (dy1 < 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) > 0)))) {
+							p_e1 = f1;
+							et1 = p_e1->target->point;
+							dx = dxn;
+							continue;
+						}
+					} else {
+						CHULL_ASSERT((p_e1 == start1) && (d1.dot(normal) < 0));
+					}
+				}
+			}
+
+			break;
+		}
+	} else if (dx < 0) {
+		while (true) {
+			int64_t dy = (et1 - et0).dot(s);
+
+			if (p_e1 && (p_e1->target != p_stop1)) {
+				Edge *f1 = p_e1->prev->reverse;
+				if (f1->copy > merge_stamp) {
+					int64_t dx1 = (f1->target->point - et1).dot(perp);
+					int64_t dy1 = (f1->target->point - et1).dot(s);
+					if ((dx1 == 0) ? (dy1 > 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) <= 0))) {
+						et1 = f1->target->point;
+						dx = (et1 - et0).dot(perp);
+						p_e1 = (p_e1 == start1) ? nullptr : f1;
+						continue;
+					}
+				}
+			}
+
+			if (p_e0 && (p_e0->target != p_stop0)) {
+				Edge *f0 = p_e0->reverse->prev;
+				if (f0->copy > merge_stamp) {
+					Point32 d0 = f0->target->point - et0;
+					if (d0.dot(normal) == 0) {
+						int64_t dx0 = d0.dot(perp);
+						int64_t dy0 = d0.dot(s);
+						int64_t dxn = (et1 - f0->target->point).dot(perp);
+						if ((dxn < 0) && ((dx0 == 0) ? (dy0 > 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) < 0)))) {
+							p_e0 = f0;
+							et0 = p_e0->target->point;
+							dx = dxn;
+							continue;
+						}
+					} else {
+						CHULL_ASSERT((p_e0 == start0) && (d0.dot(normal) < 0));
+					}
+				}
+			}
+
+			break;
+		}
+	}
+#ifdef DEBUG_CONVEX_HULL
+	printf("   Advanced edges to %d %d\n", et0.index, et1.index);
+#endif
+}
+
+void ConvexHullInternal::merge(IntermediateHull &p_h0, IntermediateHull &p_h1) {
+	if (!p_h1.max_xy) {
+		return;
+	}
+	if (!p_h0.max_xy) {
+		p_h0 = p_h1;
+		return;
+	}
+
+	merge_stamp--;
+
+	Vertex *c0 = nullptr;
+	Edge *to_prev0 = nullptr;
+	Edge *first_new0 = nullptr;
+	Edge *pending_head0 = nullptr;
+	Edge *pending_tail0 = nullptr;
+	Vertex *c1 = nullptr;
+	Edge *to_prev1 = nullptr;
+	Edge *first_new1 = nullptr;
+	Edge *pending_head1 = nullptr;
+	Edge *pending_tail1 = nullptr;
+	Point32 prev_point;
+
+	if (merge_projection(p_h0, p_h1, c0, c1)) {
+		Point32 s = *c1 - *c0;
+		Point64 normal = Point32(0, 0, -1).cross(s);
+		Point64 t = s.cross(normal);
+		CHULL_ASSERT(!t.is_zero());
+
+		Edge *e = c0->edges;
+		Edge *start0 = nullptr;
+		if (e) {
+			do {
+				int64_t dot = (*e->target - *c0).dot(normal);
+				CHULL_ASSERT(dot <= 0);
+				if ((dot == 0) && ((*e->target - *c0).dot(t) > 0)) {
+					if (!start0 || (get_orientation(start0, e, s, Point32(0, 0, -1)) == CLOCKWISE)) {
+						start0 = e;
+					}
+				}
+				e = e->next;
+			} while (e != c0->edges);
+		}
+
+		e = c1->edges;
+		Edge *start1 = nullptr;
+		if (e) {
+			do {
+				int64_t dot = (*e->target - *c1).dot(normal);
+				CHULL_ASSERT(dot <= 0);
+				if ((dot == 0) && ((*e->target - *c1).dot(t) > 0)) {
+					if (!start1 || (get_orientation(start1, e, s, Point32(0, 0, -1)) == COUNTER_CLOCKWISE)) {
+						start1 = e;
+					}
+				}
+				e = e->next;
+			} while (e != c1->edges);
+		}
+
+		if (start0 || start1) {
+			find_edge_for_coplanar_faces(c0, c1, start0, start1, nullptr, nullptr);
+			if (start0) {
+				c0 = start0->target;
+			}
+			if (start1) {
+				c1 = start1->target;
+			}
+		}
+
+		prev_point = c1->point;
+		prev_point.z++;
+	} else {
+		prev_point = c1->point;
+		prev_point.x++;
+	}
+
+	Vertex *first0 = c0;
+	Vertex *first1 = c1;
+	bool first_run = true;
+
+	while (true) {
+		Point32 s = *c1 - *c0;
+		Point32 r = prev_point - c0->point;
+		Point64 rxs = r.cross(s);
+		Point64 sxrxs = s.cross(rxs);
+
+#ifdef DEBUG_CONVEX_HULL
+		printf("\n  Checking %d %d\n", c0->point.index, c1->point.index);
+#endif
+		Rational64 min_cot0(0, 0);
+		Edge *min0 = find_max_angle(false, c0, s, rxs, sxrxs, min_cot0);
+		Rational64 min_cot1(0, 0);
+		Edge *min1 = find_max_angle(true, c1, s, rxs, sxrxs, min_cot1);
+		if (!min0 && !min1) {
+			Edge *e = new_edge_pair(c0, c1);
+			e->link(e);
+			c0->edges = e;
+
+			e = e->reverse;
+			e->link(e);
+			c1->edges = e;
+			return;
+		} else {
+			int32_t cmp = !min0 ? 1 : !min1 ? -1 :
+												min_cot0.compare(min_cot1);
+#ifdef DEBUG_CONVEX_HULL
+			printf("    -> Result %d\n", cmp);
+#endif
+			if (first_run || ((cmp >= 0) ? !min_cot1.is_negative_infinity() : !min_cot0.is_negative_infinity())) {
+				Edge *e = new_edge_pair(c0, c1);
+				if (pending_tail0) {
+					pending_tail0->prev = e;
+				} else {
+					pending_head0 = e;
+				}
+				e->next = pending_tail0;
+				pending_tail0 = e;
+
+				e = e->reverse;
+				if (pending_tail1) {
+					pending_tail1->next = e;
+				} else {
+					pending_head1 = e;
+				}
+				e->prev = pending_tail1;
+				pending_tail1 = e;
+			}
+
+			Edge *e0 = min0;
+			Edge *e1 = min1;
+
+#ifdef DEBUG_CONVEX_HULL
+			printf("   Found min edges to %d %d\n", e0 ? e0->target->point.index : -1, e1 ? e1->target->point.index : -1);
+#endif
+
+			if (cmp == 0) {
+				find_edge_for_coplanar_faces(c0, c1, e0, e1, nullptr, nullptr);
+			}
+
+			if ((cmp >= 0) && e1) {
+				if (to_prev1) {
+					for (Edge *e = to_prev1->next, *n = nullptr; e != min1; e = n) {
+						n = e->next;
+						remove_edge_pair(e);
+					}
+				}
+
+				if (pending_tail1) {
+					if (to_prev1) {
+						to_prev1->link(pending_head1);
+					} else {
+						min1->prev->link(pending_head1);
+						first_new1 = pending_head1;
+					}
+					pending_tail1->link(min1);
+					pending_head1 = nullptr;
+					pending_tail1 = nullptr;
+				} else if (!to_prev1) {
+					first_new1 = min1;
+				}
+
+				prev_point = c1->point;
+				c1 = e1->target;
+				to_prev1 = e1->reverse;
+			}
+
+			if ((cmp <= 0) && e0) {
+				if (to_prev0) {
+					for (Edge *e = to_prev0->prev, *n = nullptr; e != min0; e = n) {
+						n = e->prev;
+						remove_edge_pair(e);
+					}
+				}
+
+				if (pending_tail0) {
+					if (to_prev0) {
+						pending_head0->link(to_prev0);
+					} else {
+						pending_head0->link(min0->next);
+						first_new0 = pending_head0;
+					}
+					min0->link(pending_tail0);
+					pending_head0 = nullptr;
+					pending_tail0 = nullptr;
+				} else if (!to_prev0) {
+					first_new0 = min0;
+				}
+
+				prev_point = c0->point;
+				c0 = e0->target;
+				to_prev0 = e0->reverse;
+			}
+		}
+
+		if ((c0 == first0) && (c1 == first1)) {
+			if (to_prev0 == nullptr) {
+				pending_head0->link(pending_tail0);
+				c0->edges = pending_tail0;
+			} else {
+				for (Edge *e = to_prev0->prev, *n = nullptr; e != first_new0; e = n) {
+					n = e->prev;
+					remove_edge_pair(e);
+				}
+				if (pending_tail0) {
+					pending_head0->link(to_prev0);
+					first_new0->link(pending_tail0);
+				}
+			}
+
+			if (to_prev1 == nullptr) {
+				pending_tail1->link(pending_head1);
+				c1->edges = pending_tail1;
+			} else {
+				for (Edge *e = to_prev1->next, *n = nullptr; e != first_new1; e = n) {
+					n = e->next;
+					remove_edge_pair(e);
+				}
+				if (pending_tail1) {
+					to_prev1->link(pending_head1);
+					pending_tail1->link(first_new1);
+				}
+			}
+
+			return;
+		}
+
+		first_run = false;
+	}
+}
+
+struct PointComparator {
+	_FORCE_INLINE_ bool operator()(const ConvexHullInternal::Point32 &p, const ConvexHullInternal::Point32 &q) const {
+		return (p.y < q.y) || ((p.y == q.y) && ((p.x < q.x) || ((p.x == q.x) && (p.z < q.z))));
+	}
+};
+
+void ConvexHullInternal::compute(const Vector3 *p_coords, int32_t p_count) {
+	AABB aabb;
+	for (int32_t i = 0; i < p_count; i++) {
+		Vector3 p = p_coords[i];
+		if (i == 0) {
+			aabb.position = p;
+		} else {
+			aabb.expand_to(p);
+		}
+	}
+
+	Vector3 s = aabb.size;
+	max_axis = s.max_axis();
+	min_axis = s.min_axis();
+	if (min_axis == max_axis) {
+		min_axis = (max_axis + 1) % 3;
+	}
+	med_axis = 3 - max_axis - min_axis;
+
+	s /= real_t(10216);
+	if (((med_axis + 1) % 3) != max_axis) {
+		s *= -1;
+	}
+	scaling = s;
+
+	if (s[0] != 0) {
+		s[0] = real_t(1) / s[0];
+	}
+	if (s[1] != 0) {
+		s[1] = real_t(1) / s[1];
+	}
+	if (s[2] != 0) {
+		s[2] = real_t(1) / s[2];
+	}
+
+	center = aabb.position;
+
+	LocalVector<Point32> points;
+	points.resize(p_count);
+	for (int32_t i = 0; i < p_count; i++) {
+		Vector3 p = p_coords[i];
+		p = (p - center) * s;
+		points[i].x = (int32_t)p[med_axis];
+		points[i].y = (int32_t)p[max_axis];
+		points[i].z = (int32_t)p[min_axis];
+		points[i].index = i;
+	}
+
+	points.sort_custom<PointComparator>();
+
+	vertex_pool.reset(true);
+	original_vertices.resize(p_count);
+	for (int32_t i = 0; i < p_count; i++) {
+		Vertex *v = vertex_pool.alloc();
+		v->edges = nullptr;
+		v->point = points[i];
+		v->copy = -1;
+		original_vertices[i] = v;
+	}
+
+	points.clear();
+
+	edge_pool.reset(true);
+
+	used_edge_pairs = 0;
+	max_used_edge_pairs = 0;
+
+	merge_stamp = -3;
+
+	IntermediateHull hull;
+	compute_internal(0, p_count, hull);
+	vertex_list = hull.min_xy;
+#ifdef DEBUG_CONVEX_HULL
+	printf("max. edges %d (3v = %d)", max_used_edge_pairs, 3 * p_count);
+#endif
+}
+
+Vector3 ConvexHullInternal::to_gd_vector(const Point32 &p_v) {
+	Vector3 p;
+	p[med_axis] = real_t(p_v.x);
+	p[max_axis] = real_t(p_v.y);
+	p[min_axis] = real_t(p_v.z);
+	return p * scaling;
+}
+
+Vector3 ConvexHullInternal::get_gd_normal(Face *p_face) {
+	return to_gd_vector(p_face->dir0).cross(to_gd_vector(p_face->dir1)).normalized();
+}
+
+Vector3 ConvexHullInternal::get_coordinates(const Vertex *p_v) {
+	Vector3 p;
+	p[med_axis] = p_v->xvalue();
+	p[max_axis] = p_v->yvalue();
+	p[min_axis] = p_v->zvalue();
+	return p * scaling + center;
+}
+
+real_t ConvexHullInternal::shrink(real_t p_amount, real_t p_clamp_amount) {
+	if (!vertex_list) {
+		return 0;
+	}
+	int32_t stamp = --merge_stamp;
+	LocalVector<Vertex *> stack;
+	vertex_list->copy = stamp;
+	stack.push_back(vertex_list);
+	LocalVector<Face *> faces;
+
+	Point32 ref = vertex_list->point;
+	Int128 hull_center_x(0, 0);
+	Int128 hull_center_y(0, 0);
+	Int128 hull_center_z(0, 0);
+	Int128 volume(0, 0);
+
+	while (stack.size() > 0) {
+		Vertex *v = stack[stack.size() - 1];
+		stack.remove(stack.size() - 1);
+		Edge *e = v->edges;
+		if (e) {
+			do {
+				if (e->target->copy != stamp) {
+					e->target->copy = stamp;
+					stack.push_back(e->target);
+				}
+				if (e->copy != stamp) {
+					Face *face = face_pool.alloc();
+					face->init(e->target, e->reverse->prev->target, v);
+					faces.push_back(face);
+					Edge *f = e;
+
+					Vertex *a = nullptr;
+					Vertex *b = nullptr;
+					do {
+						if (a && b) {
+							int64_t vol = (v->point - ref).dot((a->point - ref).cross(b->point - ref));
+							CHULL_ASSERT(vol >= 0);
+							Point32 c = v->point + a->point + b->point + ref;
+							hull_center_x += vol * c.x;
+							hull_center_y += vol * c.y;
+							hull_center_z += vol * c.z;
+							volume += vol;
+						}
+
+						CHULL_ASSERT(f->copy != stamp);
+						f->copy = stamp;
+						f->face = face;
+
+						a = b;
+						b = f->target;
+
+						f = f->reverse->prev;
+					} while (f != e);
+				}
+				e = e->next;
+			} while (e != v->edges);
+		}
+	}
+
+	if (volume.get_sign() <= 0) {
+		return 0;
+	}
+
+	Vector3 hull_center;
+	hull_center[med_axis] = hull_center_x.to_scalar();
+	hull_center[max_axis] = hull_center_y.to_scalar();
+	hull_center[min_axis] = hull_center_z.to_scalar();
+	hull_center /= 4 * volume.to_scalar();
+	hull_center *= scaling;
+
+	int32_t face_count = faces.size();
+
+	if (p_clamp_amount > 0) {
+		real_t min_dist = FLT_MAX;
+		for (int32_t i = 0; i < face_count; i++) {
+			Vector3 normal = get_gd_normal(faces[i]);
+			real_t dist = normal.dot(to_gd_vector(faces[i]->origin) - hull_center);
+			if (dist < min_dist) {
+				min_dist = dist;
+			}
+		}
+
+		if (min_dist <= 0) {
+			return 0;
+		}
+
+		p_amount = MIN(p_amount, min_dist * p_clamp_amount);
+	}
+
+	uint32_t seed = 243703;
+	for (int32_t i = 0; i < face_count; i++, seed = 1664525 * seed + 1013904223) {
+		SWAP(faces[i], faces[seed % face_count]);
+	}
+
+	for (int32_t i = 0; i < face_count; i++) {
+		if (!shift_face(faces[i], p_amount, stack)) {
+			return -p_amount;
+		}
+	}
+
+	return p_amount;
+}
+
+bool ConvexHullInternal::shift_face(Face *p_face, real_t p_amount, LocalVector<Vertex *> p_stack) {
+	Vector3 orig_shift = get_gd_normal(p_face) * -p_amount;
+	if (scaling[0] != 0) {
+		orig_shift[0] /= scaling[0];
+	}
+	if (scaling[1] != 0) {
+		orig_shift[1] /= scaling[1];
+	}
+	if (scaling[2] != 0) {
+		orig_shift[2] /= scaling[2];
+	}
+	Point32 shift((int32_t)orig_shift[med_axis], (int32_t)orig_shift[max_axis], (int32_t)orig_shift[min_axis]);
+	if (shift.is_zero()) {
+		return true;
+	}
+	Point64 normal = p_face->get_normal();
+#ifdef DEBUG_CONVEX_HULL
+	printf("\nShrinking p_face (%d %d %d) (%d %d %d) (%d %d %d) by (%d %d %d)\n",
+			p_face->origin.x, p_face->origin.y, p_face->origin.z, p_face->dir0.x, p_face->dir0.y, p_face->dir0.z, p_face->dir1.x, p_face->dir1.y, p_face->dir1.z, shift.x, shift.y, shift.z);
+#endif
+	int64_t orig_dot = p_face->origin.dot(normal);
+	Point32 shifted_origin = p_face->origin + shift;
+	int64_t shifted_dot = shifted_origin.dot(normal);
+	CHULL_ASSERT(shifted_dot <= orig_dot);
+	if (shifted_dot >= orig_dot) {
+		return false;
+	}
+
+	Edge *intersection = nullptr;
+
+	Edge *start_edge = p_face->nearby_vertex->edges;
+#ifdef DEBUG_CONVEX_HULL
+	printf("Start edge is ");
+	start_edge->print();
+	printf(", normal is (%lld %lld %lld), shifted dot is %lld\n", normal.x, normal.y, normal.z, shifted_dot);
+#endif
+	Rational128 opt_dot = p_face->nearby_vertex->dot(normal);
+	int32_t cmp = opt_dot.compare(shifted_dot);
+#ifdef SHOW_ITERATIONS
+	int32_t n = 0;
+#endif
+	if (cmp >= 0) {
+		Edge *e = start_edge;
+		do {
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			Rational128 dot = e->target->dot(normal);
+			CHULL_ASSERT(dot.compare(orig_dot) <= 0);
+#ifdef DEBUG_CONVEX_HULL
+			printf("Moving downwards, edge is ");
+			e->print();
+			printf(", dot is %f (%f %lld)\n", (float)dot.to_scalar(), (float)opt_dot.to_scalar(), shifted_dot);
+#endif
+			if (dot.compare(opt_dot) < 0) {
+				int32_t c = dot.compare(shifted_dot);
+				opt_dot = dot;
+				e = e->reverse;
+				start_edge = e;
+				if (c < 0) {
+					intersection = e;
+					break;
+				}
+				cmp = c;
+			}
+			e = e->prev;
+		} while (e != start_edge);
+
+		if (!intersection) {
+			return false;
+		}
+	} else {
+		Edge *e = start_edge;
+		do {
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			Rational128 dot = e->target->dot(normal);
+			CHULL_ASSERT(dot.compare(orig_dot) <= 0);
+#ifdef DEBUG_CONVEX_HULL
+			printf("Moving upwards, edge is ");
+			e->print();
+			printf(", dot is %f (%f %lld)\n", (float)dot.to_scalar(), (float)opt_dot.to_scalar(), shifted_dot);
+#endif
+			if (dot.compare(opt_dot) > 0) {
+				cmp = dot.compare(shifted_dot);
+				if (cmp >= 0) {
+					intersection = e;
+					break;
+				}
+				opt_dot = dot;
+				e = e->reverse;
+				start_edge = e;
+			}
+			e = e->prev;
+		} while (e != start_edge);
+
+		if (!intersection) {
+			return true;
+		}
+	}
+
+#ifdef SHOW_ITERATIONS
+	printf("Needed %d iterations to find initial intersection\n", n);
+#endif
+
+	if (cmp == 0) {
+		Edge *e = intersection->reverse->next;
+#ifdef SHOW_ITERATIONS
+		n = 0;
+#endif
+		while (e->target->dot(normal).compare(shifted_dot) <= 0) {
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			e = e->next;
+			if (e == intersection->reverse) {
+				return true;
+			}
+#ifdef DEBUG_CONVEX_HULL
+			printf("Checking for outwards edge, current edge is ");
+			e->print();
+			printf("\n");
+#endif
+		}
+#ifdef SHOW_ITERATIONS
+		printf("Needed %d iterations to check for complete containment\n", n);
+#endif
+	}
+
+	Edge *first_intersection = nullptr;
+	Edge *face_edge = nullptr;
+	Edge *first_face_edge = nullptr;
+
+#ifdef SHOW_ITERATIONS
+	int32_t m = 0;
+#endif
+	while (true) {
+#ifdef SHOW_ITERATIONS
+		m++;
+#endif
+#ifdef DEBUG_CONVEX_HULL
+		printf("Intersecting edge is ");
+		intersection->print();
+		printf("\n");
+#endif
+		if (cmp == 0) {
+			Edge *e = intersection->reverse->next;
+			start_edge = e;
+#ifdef SHOW_ITERATIONS
+			n = 0;
+#endif
+			while (true) {
+#ifdef SHOW_ITERATIONS
+				n++;
+#endif
+				if (e->target->dot(normal).compare(shifted_dot) >= 0) {
+					break;
+				}
+				intersection = e->reverse;
+				e = e->next;
+				if (e == start_edge) {
+					return true;
+				}
+			}
+#ifdef SHOW_ITERATIONS
+			printf("Needed %d iterations to advance intersection\n", n);
+#endif
+		}
+
+#ifdef DEBUG_CONVEX_HULL
+		printf("Advanced intersecting edge to ");
+		intersection->print();
+		printf(", cmp = %d\n", cmp);
+#endif
+
+		if (!first_intersection) {
+			first_intersection = intersection;
+		} else if (intersection == first_intersection) {
+			break;
+		}
+
+		int32_t prev_cmp = cmp;
+		Edge *prev_intersection = intersection;
+		Edge *prev_face_edge = face_edge;
+
+		Edge *e = intersection->reverse;
+#ifdef SHOW_ITERATIONS
+		n = 0;
+#endif
+		while (true) {
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			e = e->reverse->prev;
+			CHULL_ASSERT(e != intersection->reverse);
+			cmp = e->target->dot(normal).compare(shifted_dot);
+#ifdef DEBUG_CONVEX_HULL
+			printf("Testing edge ");
+			e->print();
+			printf(" -> cmp = %d\n", cmp);
+#endif
+			if (cmp >= 0) {
+				intersection = e;
+				break;
+			}
+		}
+#ifdef SHOW_ITERATIONS
+		printf("Needed %d iterations to find other intersection of p_face\n", n);
+#endif
+
+		if (cmp > 0) {
+			Vertex *removed = intersection->target;
+			e = intersection->reverse;
+			if (e->prev == e) {
+				removed->edges = nullptr;
+			} else {
+				removed->edges = e->prev;
+				e->prev->link(e->next);
+				e->link(e);
+			}
+#ifdef DEBUG_CONVEX_HULL
+			printf("1: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
+#endif
+
+			Point64 n0 = intersection->face->get_normal();
+			Point64 n1 = intersection->reverse->face->get_normal();
+			int64_t m00 = p_face->dir0.dot(n0);
+			int64_t m01 = p_face->dir1.dot(n0);
+			int64_t m10 = p_face->dir0.dot(n1);
+			int64_t m11 = p_face->dir1.dot(n1);
+			int64_t r0 = (intersection->face->origin - shifted_origin).dot(n0);
+			int64_t r1 = (intersection->reverse->face->origin - shifted_origin).dot(n1);
+			Int128 det = Int128::mul(m00, m11) - Int128::mul(m01, m10);
+			CHULL_ASSERT(det.get_sign() != 0);
+			Vertex *v = vertex_pool.alloc();
+			v->point.index = -1;
+			v->copy = -1;
+			v->point128 = PointR128(Int128::mul(p_face->dir0.x * r0, m11) - Int128::mul(p_face->dir0.x * r1, m01) + Int128::mul(p_face->dir1.x * r1, m00) - Int128::mul(p_face->dir1.x * r0, m10) + det * shifted_origin.x,
+					Int128::mul(p_face->dir0.y * r0, m11) - Int128::mul(p_face->dir0.y * r1, m01) + Int128::mul(p_face->dir1.y * r1, m00) - Int128::mul(p_face->dir1.y * r0, m10) + det * shifted_origin.y,
+					Int128::mul(p_face->dir0.z * r0, m11) - Int128::mul(p_face->dir0.z * r1, m01) + Int128::mul(p_face->dir1.z * r1, m00) - Int128::mul(p_face->dir1.z * r0, m10) + det * shifted_origin.z,
+					det);
+			v->point.x = (int32_t)v->point128.xvalue();
+			v->point.y = (int32_t)v->point128.yvalue();
+			v->point.z = (int32_t)v->point128.zvalue();
+			intersection->target = v;
+			v->edges = e;
+
+			p_stack.push_back(v);
+			p_stack.push_back(removed);
+			p_stack.push_back(nullptr);
+		}
+
+		if (cmp || prev_cmp || (prev_intersection->reverse->next->target != intersection->target)) {
+			face_edge = new_edge_pair(prev_intersection->target, intersection->target);
+			if (prev_cmp == 0) {
+				face_edge->link(prev_intersection->reverse->next);
+			}
+			if ((prev_cmp == 0) || prev_face_edge) {
+				prev_intersection->reverse->link(face_edge);
+			}
+			if (cmp == 0) {
+				intersection->reverse->prev->link(face_edge->reverse);
+			}
+			face_edge->reverse->link(intersection->reverse);
+		} else {
+			face_edge = prev_intersection->reverse->next;
+		}
+
+		if (prev_face_edge) {
+			if (prev_cmp > 0) {
+				face_edge->link(prev_face_edge->reverse);
+			} else if (face_edge != prev_face_edge->reverse) {
+				p_stack.push_back(prev_face_edge->target);
+				while (face_edge->next != prev_face_edge->reverse) {
+					Vertex *removed = face_edge->next->target;
+					remove_edge_pair(face_edge->next);
+					p_stack.push_back(removed);
+#ifdef DEBUG_CONVEX_HULL
+					printf("2: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
+#endif
+				}
+				p_stack.push_back(nullptr);
+			}
+		}
+		face_edge->face = p_face;
+		face_edge->reverse->face = intersection->face;
+
+		if (!first_face_edge) {
+			first_face_edge = face_edge;
+		}
+	}
+#ifdef SHOW_ITERATIONS
+	printf("Needed %d iterations to process all intersections\n", m);
+#endif
+
+	if (cmp > 0) {
+		first_face_edge->reverse->target = face_edge->target;
+		first_intersection->reverse->link(first_face_edge);
+		first_face_edge->link(face_edge->reverse);
+	} else if (first_face_edge != face_edge->reverse) {
+		p_stack.push_back(face_edge->target);
+		while (first_face_edge->next != face_edge->reverse) {
+			Vertex *removed = first_face_edge->next->target;
+			remove_edge_pair(first_face_edge->next);
+			p_stack.push_back(removed);
+#ifdef DEBUG_CONVEX_HULL
+			printf("3: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
+#endif
+		}
+		p_stack.push_back(nullptr);
+	}
+
+	CHULL_ASSERT(p_stack.size() > 0);
+	vertex_list = p_stack[0];
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("Removing part\n");
+#endif
+#ifdef SHOW_ITERATIONS
+	n = 0;
+#endif
+	uint32_t pos = 0;
+	while (pos < p_stack.size()) {
+		uint32_t end = p_stack.size();
+		while (pos < end) {
+			Vertex *kept = p_stack[pos++];
+#ifdef DEBUG_CONVEX_HULL
+			kept->print();
+#endif
+			bool deeper = false;
+			Vertex *removed;
+			while ((removed = p_stack[pos++]) != nullptr) {
+#ifdef SHOW_ITERATIONS
+				n++;
+#endif
+				kept->receive_nearby_faces(removed);
+				while (removed->edges) {
+					if (!deeper) {
+						deeper = true;
+						p_stack.push_back(kept);
+					}
+					p_stack.push_back(removed->edges->target);
+					remove_edge_pair(removed->edges);
+				}
+			}
+			if (deeper) {
+				p_stack.push_back(nullptr);
+			}
+		}
+	}
+#ifdef SHOW_ITERATIONS
+	printf("Needed %d iterations to remove part\n", n);
+#endif
+
+	p_stack.resize(0);
+	p_face->origin = shifted_origin;
+
+	return true;
+}
+
+static int32_t get_vertex_copy(ConvexHullInternal::Vertex *p_vertex, LocalVector<ConvexHullInternal::Vertex *> &p_vertices) {
+	int32_t index = p_vertex->copy;
+	if (index < 0) {
+		index = p_vertices.size();
+		p_vertex->copy = index;
+		p_vertices.push_back(p_vertex);
+#ifdef DEBUG_CONVEX_HULL
+		printf("Vertex %d gets index *%d\n", p_vertex->point.index, index);
+#endif
+	}
+	return index;
+}
+
+real_t ConvexHullComputer::compute(const Vector3 *p_coords, int32_t p_count, real_t p_shrink, real_t p_shrink_clamp) {
+	if (p_count <= 0) {
+		vertices.clear();
+		edges.clear();
+		faces.clear();
+		return 0;
+	}
+
+	ConvexHullInternal hull;
+	hull.compute(p_coords, p_count);
+
+	real_t shift = 0;
+	if ((p_shrink > 0) && ((shift = hull.shrink(p_shrink, p_shrink_clamp)) < 0)) {
+		vertices.clear();
+		edges.clear();
+		faces.clear();
+		return shift;
+	}
+
+	vertices.resize(0);
+	edges.resize(0);
+	faces.resize(0);
+
+	LocalVector<ConvexHullInternal::Vertex *> old_vertices;
+	get_vertex_copy(hull.vertex_list, old_vertices);
+	int32_t copied = 0;
+	while (copied < (int32_t)old_vertices.size()) {
+		ConvexHullInternal::Vertex *v = old_vertices[copied];
+		vertices.push_back(hull.get_coordinates(v));
+		ConvexHullInternal::Edge *first_edge = v->edges;
+		if (first_edge) {
+			int32_t first_copy = -1;
+			int32_t prev_copy = -1;
+			ConvexHullInternal::Edge *e = first_edge;
+			do {
+				if (e->copy < 0) {
+					int32_t s = edges.size();
+					edges.push_back(Edge());
+					edges.push_back(Edge());
+					Edge *c = &edges[s];
+					Edge *r = &edges[s + 1];
+					e->copy = s;
+					e->reverse->copy = s + 1;
+					c->reverse = 1;
+					r->reverse = -1;
+					c->target_vertex = get_vertex_copy(e->target, old_vertices);
+					r->target_vertex = copied;
+#ifdef DEBUG_CONVEX_HULL
+					printf("      CREATE: Vertex *%d has edge to *%d\n", copied, c->get_target_vertex());
+#endif
+				}
+				if (prev_copy >= 0) {
+					edges[e->copy].next = prev_copy - e->copy;
+				} else {
+					first_copy = e->copy;
+				}
+				prev_copy = e->copy;
+				e = e->next;
+			} while (e != first_edge);
+			edges[first_copy].next = prev_copy - first_copy;
+		}
+		copied++;
+	}
+
+	for (int32_t i = 0; i < copied; i++) {
+		ConvexHullInternal::Vertex *v = old_vertices[i];
+		ConvexHullInternal::Edge *first_edge = v->edges;
+		if (first_edge) {
+			ConvexHullInternal::Edge *e = first_edge;
+			do {
+				if (e->copy >= 0) {
+#ifdef DEBUG_CONVEX_HULL
+					printf("Vertex *%d has edge to *%d\n", i, edges[e->copy].get_target_vertex());
+#endif
+					faces.push_back(e->copy);
+					ConvexHullInternal::Edge *f = e;
+					do {
+#ifdef DEBUG_CONVEX_HULL
+						printf("   Face *%d\n", edges[f->copy].get_target_vertex());
+#endif
+						f->copy = -1;
+						f = f->reverse->prev;
+					} while (f != e);
+				}
+				e = e->next;
+			} while (e != first_edge);
+		}
+	}
+
+	return shift;
+}
+
+Error ConvexHullComputer::convex_hull(const Vector<Vector3> &p_points, Geometry3D::MeshData &r_mesh) {
+	r_mesh = Geometry3D::MeshData(); // clear
+
+	if (p_points.size() == 0) {
+		return FAILED; // matches QuickHull
+	}
+
+	ConvexHullComputer ch;
+	ch.compute(p_points.ptr(), p_points.size(), -1.0, -1.0);
+
+	r_mesh.vertices = ch.vertices;
+
+	r_mesh.edges.resize(ch.edges.size());
+	for (uint32_t i = 0; i < ch.edges.size(); i++) {
+		r_mesh.edges.write[i].a = (&ch.edges[i])->get_source_vertex();
+		r_mesh.edges.write[i].b = (&ch.edges[i])->get_target_vertex();
+	}
+
+	r_mesh.faces.resize(ch.faces.size());
+	for (uint32_t i = 0; i < ch.faces.size(); i++) {
+		const Edge *e_start = &ch.edges[ch.faces[i]];
+		const Edge *e = e_start;
+		Geometry3D::MeshData::Face &face = r_mesh.faces.write[i];
+
+		do {
+			face.indices.push_back(e->get_target_vertex());
+
+			e = e->get_next_edge_of_face();
+		} while (e != e_start);
+
+		// compute normal
+		if (face.indices.size() >= 3) {
+			face.plane = Plane(r_mesh.vertices[face.indices[0]], r_mesh.vertices[face.indices[2]], r_mesh.vertices[face.indices[1]]);
+		} else {
+			WARN_PRINT("Too few vertices per face.");
+		}
+	}
+
+	return OK;
+}
diff --git a/core/math/convex_hull.h b/core/math/convex_hull.h
new file mode 100644
index 0000000000..ba7be9c5e8
--- /dev/null
+++ b/core/math/convex_hull.h
@@ -0,0 +1,112 @@
+/*************************************************************************/
+/*  convex_hull.h                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+/*
+Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef CONVEX_HULL_H
+#define CONVEX_HULL_H
+
+#include "core/math/geometry_3d.h"
+#include "core/math/vector3.h"
+#include "core/templates/local_vector.h"
+#include "core/templates/vector.h"
+
+/// Convex hull implementation based on Preparata and Hong
+/// See http://code.google.com/p/bullet/issues/detail?id=275
+/// Ole Kniemeyer, MAXON Computer GmbH
+class ConvexHullComputer {
+public:
+	class Edge {
+	private:
+		int32_t next = 0;
+		int32_t reverse = 0;
+		int32_t target_vertex = 0;
+
+		friend class ConvexHullComputer;
+
+	public:
+		int32_t get_source_vertex() const {
+			return (this + reverse)->target_vertex;
+		}
+
+		int32_t get_target_vertex() const {
+			return target_vertex;
+		}
+
+		const Edge *get_next_edge_of_vertex() const // clockwise list of all edges of a vertex
+		{
+			return this + next;
+		}
+
+		const Edge *get_next_edge_of_face() const // counter-clockwise list of all edges of a face
+		{
+			return (this + reverse)->get_next_edge_of_vertex();
+		}
+
+		const Edge *get_reverse_edge() const {
+			return this + reverse;
+		}
+	};
+
+	// Vertices of the output hull
+	Vector<Vector3> vertices;
+
+	// Edges of the output hull
+	LocalVector<Edge> edges;
+
+	// Faces of the convex hull. Each entry is an index into the "edges" array pointing to an edge of the face. Faces are planar n-gons
+	LocalVector<int32_t> faces;
+
+	/*
+		Compute convex hull of "count" vertices stored in "coords".
+		If "shrink" is positive, the convex hull is shrunken by that amount (each face is moved by "shrink" length units
+		towards the center along its normal).
+		If "shrinkClamp" is positive, "shrink" is clamped to not exceed "shrinkClamp * innerRadius", where "innerRadius"
+		is the minimum distance of a face to the center of the convex hull.
+		The returned value is the amount by which the hull has been shrunken. If it is negative, the amount was so large
+		that the resulting convex hull is empty.
+		The output convex hull can be found in the member variables "vertices", "edges", "faces".
+		*/
+	real_t compute(const Vector3 *p_coords, int32_t p_count, real_t p_shrink, real_t p_shrink_clamp);
+
+	static Error convex_hull(const Vector<Vector3> &p_points, Geometry3D::MeshData &r_mesh);
+};
+
+#endif // CONVEX_HULL_H
diff --git a/core/math/delaunay_3d.h b/core/math/delaunay_3d.h
index 25cc1125db..81adf4d19a 100644
--- a/core/math/delaunay_3d.h
+++ b/core/math/delaunay_3d.h
@@ -31,10 +31,10 @@
 #ifndef DELAUNAY_3D_H
 #define DELAUNAY_3D_H
 
+#include "core/io/file_access.h"
 #include "core/math/aabb.h"
 #include "core/math/camera_matrix.h"
 #include "core/math/vector3.h"
-#include "core/os/file_access.h"
 #include "core/string/print_string.h"
 #include "core/templates/local_vector.h"
 #include "core/templates/oa_hash_map.h"
@@ -375,8 +375,7 @@ public:
 		OutputSimplex *ret_simplicesw = ret_simplices.ptrw();
 		uint32_t simplices_written = 0;
 
-		for (List<Simplex *>::Element *E = simplex_list.front(); E; E = E->next()) {
-			Simplex *simplex = E->get();
+		for (Simplex *simplex : simplex_list) {
 			bool invalid = false;
 			for (int j = 0; j < 4; j++) {
 				if (simplex->points[j] >= point_count) {
diff --git a/core/math/dynamic_bvh.cpp b/core/math/dynamic_bvh.cpp
index 200095d8cb..f3fb473981 100644
--- a/core/math/dynamic_bvh.cpp
+++ b/core/math/dynamic_bvh.cpp
@@ -181,7 +181,7 @@ DynamicBVH::Volume DynamicBVH::_bounds(Node **leaves, int p_count) {
 
 void DynamicBVH::_bottom_up(Node **leaves, int p_count) {
 	while (p_count > 1) {
-		real_t minsize = Math_INF;
+		real_t minsize = INFINITY;
 		int minidx[2] = { -1, -1 };
 		for (int i = 0; i < p_count; ++i) {
 			for (int j = i + 1; j < p_count; ++j) {
@@ -312,8 +312,11 @@ void DynamicBVH::optimize_incremental(int passes) {
 	if (passes < 0) {
 		passes = total_leaves;
 	}
-	if (bvh_root && (passes > 0)) {
+	if (passes > 0) {
 		do {
+			if (!bvh_root) {
+				break;
+			}
 			Node *node = bvh_root;
 			unsigned bit = 0;
 			while (node->is_internal()) {
diff --git a/core/math/expression.cpp b/core/math/expression.cpp
index f7ac44d321..05f2c8dac9 100644
--- a/core/math/expression.cpp
+++ b/core/math/expression.cpp
@@ -33,7 +33,7 @@
 #include "core/io/marshalls.h"
 #include "core/math/math_funcs.h"
 #include "core/object/class_db.h"
-#include "core/object/reference.h"
+#include "core/object/ref_counted.h"
 #include "core/os/os.h"
 #include "core/variant/variant_parser.h"
 
@@ -397,10 +397,10 @@ Error Expression::_get_token(Token &r_token) {
 						r_token.value = Math_TAU;
 					} else if (id == "INF") {
 						r_token.type = TK_CONSTANT;
-						r_token.value = Math_INF;
+						r_token.value = INFINITY;
 					} else if (id == "NAN") {
 						r_token.type = TK_CONSTANT;
-						r_token.value = Math_NAN;
+						r_token.value = NAN;
 					} else if (id == "not") {
 						r_token.type = TK_OP_NOT;
 					} else if (id == "or") {
diff --git a/core/math/expression.h b/core/math/expression.h
index a6b288ed6e..aecf662d0a 100644
--- a/core/math/expression.h
+++ b/core/math/expression.h
@@ -31,10 +31,10 @@
 #ifndef EXPRESSION_H
 #define EXPRESSION_H
 
-#include "core/object/reference.h"
+#include "core/object/ref_counted.h"
 
-class Expression : public Reference {
-	GDCLASS(Expression, Reference);
+class Expression : public RefCounted {
+	GDCLASS(Expression, RefCounted);
 
 private:
 	struct Input {
diff --git a/core/math/face3.cpp b/core/math/face3.cpp
index 20c316c322..9af3f868d2 100644
--- a/core/math/face3.cpp
+++ b/core/math/face3.cpp
@@ -230,7 +230,7 @@ bool Face3::intersects_aabb(const AABB &p_aabb) const {
 
 			real_t minA, maxA, minB, maxB;
 			p_aabb.project_range_in_plane(Plane(axis, 0), minA, maxA);
-			project_range(axis, Transform(), minB, maxB);
+			project_range(axis, Transform3D(), minB, maxB);
 
 			if (maxA < minB || maxB < minA) {
 				return false;
@@ -244,7 +244,7 @@ Face3::operator String() const {
 	return String() + vertex[0] + ", " + vertex[1] + ", " + vertex[2];
 }
 
-void Face3::project_range(const Vector3 &p_normal, const Transform &p_transform, real_t &r_min, real_t &r_max) const {
+void Face3::project_range(const Vector3 &p_normal, const Transform3D &p_transform, real_t &r_min, real_t &r_max) const {
 	for (int i = 0; i < 3; i++) {
 		Vector3 v = p_transform.xform(vertex[i]);
 		real_t d = p_normal.dot(v);
@@ -259,7 +259,7 @@ void Face3::project_range(const Vector3 &p_normal, const Transform &p_transform,
 	}
 }
 
-void Face3::get_support(const Vector3 &p_normal, const Transform &p_transform, Vector3 *p_vertices, int *p_count, int p_max) const {
+void Face3::get_support(const Vector3 &p_normal, const Transform3D &p_transform, Vector3 *p_vertices, int *p_count, int p_max) const {
 #define _FACE_IS_VALID_SUPPORT_THRESHOLD 0.98
 #define _EDGE_IS_VALID_SUPPORT_THRESHOLD 0.05
 
diff --git a/core/math/face3.h b/core/math/face3.h
index 2e86b0a904..9e9026e54e 100644
--- a/core/math/face3.h
+++ b/core/math/face3.h
@@ -33,7 +33,7 @@
 
 #include "core/math/aabb.h"
 #include "core/math/plane.h"
-#include "core/math/transform.h"
+#include "core/math/transform_3d.h"
 #include "core/math/vector3.h"
 
 class Face3 {
@@ -50,8 +50,8 @@ public:
 	/**
          *
          * @param p_plane plane used to split the face
-         * @param p_res array of at least 3 faces, amount used in functio return
-         * @param p_is_point_over array of at least 3 booleans, determining which face is over the plane, amount used in functio return
+         * @param p_res array of at least 3 faces, amount used in function return
+         * @param p_is_point_over array of at least 3 booleans, determining which face is over the plane, amount used in function return
          * @param _epsilon constant used for numerical error rounding, to add "thickness" to the plane (so coplanar points can happen)
          * @return amount of faces generated by the split, either 0 (means no split possible), 2 or 3
          */
@@ -74,8 +74,8 @@ public:
 
 	ClockDirection get_clock_dir() const; ///< todo, test if this is returning the proper clockwisity
 
-	void get_support(const Vector3 &p_normal, const Transform &p_transform, Vector3 *p_vertices, int *p_count, int p_max) const;
-	void project_range(const Vector3 &p_normal, const Transform &p_transform, real_t &r_min, real_t &r_max) const;
+	void get_support(const Vector3 &p_normal, const Transform3D &p_transform, Vector3 *p_vertices, int *p_count, int p_max) const;
+	void project_range(const Vector3 &p_normal, const Transform3D &p_transform, real_t &r_min, real_t &r_max) const;
 
 	AABB get_aabb() const {
 		AABB aabb(vertex[0], Vector3());
diff --git a/core/math/geometry_2d.h b/core/math/geometry_2d.h
index 4958b5ac6a..e1a5bfe6f2 100644
--- a/core/math/geometry_2d.h
+++ b/core/math/geometry_2d.h
@@ -32,9 +32,8 @@
 #define GEOMETRY_2D_H
 
 #include "core/math/delaunay_2d.h"
-#include "core/math/rect2.h"
 #include "core/math/triangulate.h"
-#include "core/object/object.h"
+#include "core/math/vector3i.h"
 #include "core/templates/vector.h"
 
 class Geometry2D {
@@ -362,6 +361,19 @@ public:
 		return (intersections & 1);
 	}
 
+	static bool is_segment_intersecting_polygon(const Vector2 &p_from, const Vector2 &p_to, const Vector<Vector2> &p_polygon) {
+		int c = p_polygon.size();
+		const Vector2 *p = p_polygon.ptr();
+		for (int i = 0; i < c; i++) {
+			const Vector2 &v1 = p[i];
+			const Vector2 &v2 = p[(i + 1) % c];
+			if (segment_intersects_segment(p_from, p_to, v1, v2, nullptr)) {
+				return true;
+			}
+		}
+		return false;
+	}
+
 	static real_t vec2_cross(const Point2 &O, const Point2 &A, const Point2 &B) {
 		return (real_t)(A.x - O.x) * (B.y - O.y) - (real_t)(A.y - O.y) * (B.x - O.x);
 	}
diff --git a/core/math/geometry_3d.h b/core/math/geometry_3d.h
index 4ef9b4dbe6..766689e222 100644
--- a/core/math/geometry_3d.h
+++ b/core/math/geometry_3d.h
@@ -40,7 +40,7 @@ class Geometry3D {
 
 public:
 	static void get_closest_points_between_segments(const Vector3 &p1, const Vector3 &p2, const Vector3 &q1, const Vector3 &q2, Vector3 &c1, Vector3 &c2) {
-// Do the function 'd' as defined by pb. I think is is dot product of some sort.
+// Do the function 'd' as defined by pb. I think it's a dot product of some sort.
 #define d_of(m, n, o, p) ((m.x - n.x) * (o.x - p.x) + (m.y - n.y) * (o.y - p.y) + (m.z - n.z) * (o.z - p.z))
 
 		// Calculate the parametric position on the 2 curves, mua and mub.
diff --git a/core/math/math_defs.h b/core/math/math_defs.h
index df2223fb78..c3a8f910c0 100644
--- a/core/math/math_defs.h
+++ b/core/math/math_defs.h
@@ -43,8 +43,6 @@
 #define Math_TAU 6.2831853071795864769252867666
 #define Math_PI 3.1415926535897932384626433833
 #define Math_E 2.7182818284590452353602874714
-#define Math_INF INFINITY
-#define Math_NAN NAN
 
 #ifdef DEBUG_ENABLED
 #define MATH_CHECKS
@@ -83,6 +81,26 @@ enum VAlign {
 	VALIGN_BOTTOM
 };
 
+enum InlineAlign {
+	// Image alignment points.
+	INLINE_ALIGN_TOP_TO = 0b0000,
+	INLINE_ALIGN_CENTER_TO = 0b0001,
+	INLINE_ALIGN_BOTTOM_TO = 0b0010,
+	INLINE_ALIGN_IMAGE_MASK = 0b0011,
+
+	// Text alignment points.
+	INLINE_ALIGN_TO_TOP = 0b0000,
+	INLINE_ALIGN_TO_CENTER = 0b0100,
+	INLINE_ALIGN_TO_BASELINE = 0b1000,
+	INLINE_ALIGN_TO_BOTTOM = 0b1100,
+	INLINE_ALIGN_TEXT_MASK = 0b1100,
+
+	// Presets.
+	INLINE_ALIGN_TOP = INLINE_ALIGN_TOP_TO | INLINE_ALIGN_TO_TOP,
+	INLINE_ALIGN_CENTER = INLINE_ALIGN_CENTER_TO | INLINE_ALIGN_TO_CENTER,
+	INLINE_ALIGN_BOTTOM = INLINE_ALIGN_BOTTOM_TO | INLINE_ALIGN_TO_BOTTOM
+};
+
 enum Side {
 	SIDE_LEFT,
 	SIDE_TOP,
diff --git a/core/math/math_fieldwise.cpp b/core/math/math_fieldwise.cpp
index 0985a727f2..570c57e254 100644
--- a/core/math/math_fieldwise.cpp
+++ b/core/math/math_fieldwise.cpp
@@ -88,8 +88,8 @@ Variant fieldwise_assign(const Variant &p_target, const Variant &p_source, const
 			return target;
 		}
 
-		case Variant::QUAT: {
-			SETUP_TYPE(Quat)
+		case Variant::QUATERNION: {
+			SETUP_TYPE(Quaternion)
 
 			/**/ TRY_TRANSFER_FIELD("x", x)
 			else TRY_TRANSFER_FIELD("y", y)
@@ -141,8 +141,8 @@ Variant fieldwise_assign(const Variant &p_target, const Variant &p_source, const
 			return target;
 		}
 
-		case Variant::TRANSFORM: {
-			SETUP_TYPE(Transform)
+		case Variant::TRANSFORM3D: {
+			SETUP_TYPE(Transform3D)
 
 			/**/ TRY_TRANSFER_FIELD("xx", basis.elements[0][0])
 			else TRY_TRANSFER_FIELD("xy", basis.elements[0][1])
diff --git a/core/math/math_funcs.cpp b/core/math/math_funcs.cpp
index e92bb9f4aa..bbed257f60 100644
--- a/core/math/math_funcs.cpp
+++ b/core/math/math_funcs.cpp
@@ -88,16 +88,6 @@ int Math::range_step_decimals(double p_step) {
 	return step_decimals(p_step);
 }
 
-double Math::dectime(double p_value, double p_amount, double p_step) {
-	double sgn = p_value < 0 ? -1.0 : 1.0;
-	double val = Math::abs(p_value);
-	val -= p_amount * p_step;
-	if (val < 0.0) {
-		val = 0.0;
-	}
-	return val * sgn;
-}
-
 double Math::ease(double p_x, double p_c) {
 	if (p_x < 0) {
 		p_x = 0;
diff --git a/core/math/math_funcs.h b/core/math/math_funcs.h
index c0d7649b65..4e4f566517 100644
--- a/core/math/math_funcs.h
+++ b/core/math/math_funcs.h
@@ -275,8 +275,8 @@ public:
 	static _ALWAYS_INLINE_ double db2linear(double p_db) { return Math::exp(p_db * 0.11512925464970228420089957273422); }
 	static _ALWAYS_INLINE_ float db2linear(float p_db) { return Math::exp(p_db * 0.11512925464970228420089957273422); }
 
-	static _ALWAYS_INLINE_ double round(double p_val) { return (p_val >= 0) ? Math::floor(p_val + 0.5) : -Math::floor(-p_val + 0.5); }
-	static _ALWAYS_INLINE_ float round(float p_val) { return (p_val >= 0) ? Math::floor(p_val + 0.5) : -Math::floor(-p_val + 0.5); }
+	static _ALWAYS_INLINE_ double round(double p_val) { return ::round(p_val); }
+	static _ALWAYS_INLINE_ float round(float p_val) { return ::roundf(p_val); }
 
 	static _ALWAYS_INLINE_ int64_t wrapi(int64_t value, int64_t min, int64_t max) {
 		int64_t range = max - min;
@@ -296,7 +296,6 @@ public:
 	static int step_decimals(double p_step);
 	static int range_step_decimals(double p_step);
 	static double snapped(double p_value, double p_step);
-	static double dectime(double p_value, double p_amount, double p_step);
 
 	static uint32_t larger_prime(uint32_t p_val);
 
@@ -311,20 +310,20 @@ public:
 	static float random(float from, float to);
 	static int random(int from, int to);
 
-	static _ALWAYS_INLINE_ bool is_equal_approx(real_t a, real_t b) {
+	static _ALWAYS_INLINE_ bool is_equal_approx(float a, float b) {
 		// Check for exact equality first, required to handle "infinity" values.
 		if (a == b) {
 			return true;
 		}
 		// Then check for approximate equality.
-		real_t tolerance = CMP_EPSILON * abs(a);
+		float tolerance = CMP_EPSILON * abs(a);
 		if (tolerance < CMP_EPSILON) {
 			tolerance = CMP_EPSILON;
 		}
 		return abs(a - b) < tolerance;
 	}
 
-	static _ALWAYS_INLINE_ bool is_equal_approx(real_t a, real_t b, real_t tolerance) {
+	static _ALWAYS_INLINE_ bool is_equal_approx(float a, float b, float tolerance) {
 		// Check for exact equality first, required to handle "infinity" values.
 		if (a == b) {
 			return true;
@@ -333,7 +332,33 @@ public:
 		return abs(a - b) < tolerance;
 	}
 
-	static _ALWAYS_INLINE_ bool is_zero_approx(real_t s) {
+	static _ALWAYS_INLINE_ bool is_zero_approx(float s) {
+		return abs(s) < CMP_EPSILON;
+	}
+
+	static _ALWAYS_INLINE_ bool is_equal_approx(double a, double b) {
+		// Check for exact equality first, required to handle "infinity" values.
+		if (a == b) {
+			return true;
+		}
+		// Then check for approximate equality.
+		double tolerance = CMP_EPSILON * abs(a);
+		if (tolerance < CMP_EPSILON) {
+			tolerance = CMP_EPSILON;
+		}
+		return abs(a - b) < tolerance;
+	}
+
+	static _ALWAYS_INLINE_ bool is_equal_approx(double a, double b, double tolerance) {
+		// Check for exact equality first, required to handle "infinity" values.
+		if (a == b) {
+			return true;
+		}
+		// Then check for approximate equality.
+		return abs(a - b) < tolerance;
+	}
+
+	static _ALWAYS_INLINE_ bool is_zero_approx(double s) {
 		return abs(s) < CMP_EPSILON;
 	}
 
@@ -358,28 +383,10 @@ public:
 		return u.d;
 	}
 
-	//this function should be as fast as possible and rounding mode should not matter
+	// This function should be as fast as possible and rounding mode should not matter.
 	static _ALWAYS_INLINE_ int fast_ftoi(float a) {
-		static int b;
-
-#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0603) || WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP // windows 8 phone?
-		b = (int)((a > 0.0) ? (a + 0.5) : (a - 0.5));
-
-#elif defined(_MSC_VER) && _MSC_VER < 1800
-		__asm fld a __asm fistp b
-		/*#elif defined( __GNUC__ ) && ( defined( __i386__ ) || defined( __x86_64__ ) )
-		// use AT&T inline assembly style, document that
-		// we use memory as output (=m) and input (m)
-		__asm__ __volatile__ (
-		"flds %1        \n\t"
-		"fistpl %0      \n\t"
-		: "=m" (b)
-		: "m" (a));*/
-
-#else
-		b = lrintf(a); //assuming everything but msvc 2012 or earlier has lrint
-#endif
-		return b;
+		// Assuming every supported compiler has `lrint()`.
+		return lrintf(a);
 	}
 
 	static _ALWAYS_INLINE_ uint32_t halfbits_to_floatbits(uint16_t h) {
diff --git a/core/math/plane.cpp b/core/math/plane.cpp
index f1d3bbbd54..3c78b55b90 100644
--- a/core/math/plane.cpp
+++ b/core/math/plane.cpp
@@ -175,5 +175,5 @@ bool Plane::is_equal_approx(const Plane &p_plane) const {
 }
 
 Plane::operator String() const {
-	return normal.operator String() + ", " + rtos(d);
+	return "[N: " + normal.operator String() + ", D: " + String::num_real(d, false) + "]";
 }
diff --git a/core/math/quat.cpp b/core/math/quaternion.cpp
index 6f13e04027..3f1d2c58e5 100644
--- a/core/math/quat.cpp
+++ b/core/math/quaternion.cpp
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  quat.cpp                                                             */
+/*  quaternion.cpp                                                       */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,16 +28,21 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#include "quat.h"
+#include "quaternion.h"
 
 #include "core/math/basis.h"
 #include "core/string/print_string.h"
 
+real_t Quaternion::angle_to(const Quaternion &p_to) const {
+	real_t d = dot(p_to);
+	return Math::acos(CLAMP(d * d * 2 - 1, -1, 1));
+}
+
 // get_euler_xyz returns a vector containing the Euler angles in the format
 // (ax,ay,az), where ax is the angle of rotation around x axis,
 // and similar for other axes.
 // This implementation uses XYZ convention (Z is the first rotation).
-Vector3 Quat::get_euler_xyz() const {
+Vector3 Quaternion::get_euler_xyz() const {
 	Basis m(*this);
 	return m.get_euler_xyz();
 }
@@ -46,7 +51,7 @@ Vector3 Quat::get_euler_xyz() const {
 // (ax,ay,az), where ax is the angle of rotation around x axis,
 // and similar for other axes.
 // This implementation uses YXZ convention (Z is the first rotation).
-Vector3 Quat::get_euler_yxz() const {
+Vector3 Quaternion::get_euler_yxz() const {
 #ifdef MATH_CHECKS
 	ERR_FAIL_COND_V_MSG(!is_normalized(), Vector3(0, 0, 0), "The quaternion must be normalized.");
 #endif
@@ -54,7 +59,7 @@ Vector3 Quat::get_euler_yxz() const {
 	return m.get_euler_yxz();
 }
 
-void Quat::operator*=(const Quat &p_q) {
+void Quaternion::operator*=(const Quaternion &p_q) {
 	real_t xx = w * p_q.x + x * p_q.w + y * p_q.z - z * p_q.y;
 	real_t yy = w * p_q.y + y * p_q.w + z * p_q.x - x * p_q.z;
 	real_t zz = w * p_q.z + z * p_q.w + x * p_q.y - y * p_q.x;
@@ -64,45 +69,45 @@ void Quat::operator*=(const Quat &p_q) {
 	z = zz;
 }
 
-Quat Quat::operator*(const Quat &p_q) const {
-	Quat r = *this;
+Quaternion Quaternion::operator*(const Quaternion &p_q) const {
+	Quaternion r = *this;
 	r *= p_q;
 	return r;
 }
 
-bool Quat::is_equal_approx(const Quat &p_quat) const {
-	return Math::is_equal_approx(x, p_quat.x) && Math::is_equal_approx(y, p_quat.y) && Math::is_equal_approx(z, p_quat.z) && Math::is_equal_approx(w, p_quat.w);
+bool Quaternion::is_equal_approx(const Quaternion &p_quaternion) const {
+	return Math::is_equal_approx(x, p_quaternion.x) && Math::is_equal_approx(y, p_quaternion.y) && Math::is_equal_approx(z, p_quaternion.z) && Math::is_equal_approx(w, p_quaternion.w);
 }
 
-real_t Quat::length() const {
+real_t Quaternion::length() const {
 	return Math::sqrt(length_squared());
 }
 
-void Quat::normalize() {
+void Quaternion::normalize() {
 	*this /= length();
 }
 
-Quat Quat::normalized() const {
+Quaternion Quaternion::normalized() const {
 	return *this / length();
 }
 
-bool Quat::is_normalized() const {
-	return Math::is_equal_approx(length_squared(), 1.0, UNIT_EPSILON); //use less epsilon
+bool Quaternion::is_normalized() const {
+	return Math::is_equal_approx(length_squared(), 1, (real_t)UNIT_EPSILON); //use less epsilon
 }
 
-Quat Quat::inverse() const {
+Quaternion Quaternion::inverse() const {
 #ifdef MATH_CHECKS
-	ERR_FAIL_COND_V_MSG(!is_normalized(), Quat(), "The quaternion must be normalized.");
+	ERR_FAIL_COND_V_MSG(!is_normalized(), Quaternion(), "The quaternion must be normalized.");
 #endif
-	return Quat(-x, -y, -z, w);
+	return Quaternion(-x, -y, -z, w);
 }
 
-Quat Quat::slerp(const Quat &p_to, const real_t &p_weight) const {
+Quaternion Quaternion::slerp(const Quaternion &p_to, const real_t &p_weight) const {
 #ifdef MATH_CHECKS
-	ERR_FAIL_COND_V_MSG(!is_normalized(), Quat(), "The start quaternion must be normalized.");
-	ERR_FAIL_COND_V_MSG(!p_to.is_normalized(), Quat(), "The end quaternion must be normalized.");
+	ERR_FAIL_COND_V_MSG(!is_normalized(), Quaternion(), "The start quaternion must be normalized.");
+	ERR_FAIL_COND_V_MSG(!p_to.is_normalized(), Quaternion(), "The end quaternion must be normalized.");
 #endif
-	Quat to1;
+	Quaternion to1;
 	real_t omega, cosom, sinom, scale0, scale1;
 
 	// calc cosine
@@ -137,19 +142,19 @@ Quat Quat::slerp(const Quat &p_to, const real_t &p_weight) const {
 		scale1 = p_weight;
 	}
 	// calculate final values
-	return Quat(
+	return Quaternion(
 			scale0 * x + scale1 * to1.x,
 			scale0 * y + scale1 * to1.y,
 			scale0 * z + scale1 * to1.z,
 			scale0 * w + scale1 * to1.w);
 }
 
-Quat Quat::slerpni(const Quat &p_to, const real_t &p_weight) const {
+Quaternion Quaternion::slerpni(const Quaternion &p_to, const real_t &p_weight) const {
 #ifdef MATH_CHECKS
-	ERR_FAIL_COND_V_MSG(!is_normalized(), Quat(), "The start quaternion must be normalized.");
-	ERR_FAIL_COND_V_MSG(!p_to.is_normalized(), Quat(), "The end quaternion must be normalized.");
+	ERR_FAIL_COND_V_MSG(!is_normalized(), Quaternion(), "The start quaternion must be normalized.");
+	ERR_FAIL_COND_V_MSG(!p_to.is_normalized(), Quaternion(), "The end quaternion must be normalized.");
 #endif
-	const Quat &from = *this;
+	const Quaternion &from = *this;
 
 	real_t dot = from.dot(p_to);
 
@@ -162,29 +167,29 @@ Quat Quat::slerpni(const Quat &p_to, const real_t &p_weight) const {
 		   newFactor = Math::sin(p_weight * theta) * sinT,
 		   invFactor = Math::sin((1.0 - p_weight) * theta) * sinT;
 
-	return Quat(invFactor * from.x + newFactor * p_to.x,
+	return Quaternion(invFactor * from.x + newFactor * p_to.x,
 			invFactor * from.y + newFactor * p_to.y,
 			invFactor * from.z + newFactor * p_to.z,
 			invFactor * from.w + newFactor * p_to.w);
 }
 
-Quat Quat::cubic_slerp(const Quat &p_b, const Quat &p_pre_a, const Quat &p_post_b, const real_t &p_weight) const {
+Quaternion Quaternion::cubic_slerp(const Quaternion &p_b, const Quaternion &p_pre_a, const Quaternion &p_post_b, const real_t &p_weight) const {
 #ifdef MATH_CHECKS
-	ERR_FAIL_COND_V_MSG(!is_normalized(), Quat(), "The start quaternion must be normalized.");
-	ERR_FAIL_COND_V_MSG(!p_b.is_normalized(), Quat(), "The end quaternion must be normalized.");
+	ERR_FAIL_COND_V_MSG(!is_normalized(), Quaternion(), "The start quaternion must be normalized.");
+	ERR_FAIL_COND_V_MSG(!p_b.is_normalized(), Quaternion(), "The end quaternion must be normalized.");
 #endif
 	//the only way to do slerp :|
 	real_t t2 = (1.0 - p_weight) * p_weight * 2;
-	Quat sp = this->slerp(p_b, p_weight);
-	Quat sq = p_pre_a.slerpni(p_post_b, p_weight);
+	Quaternion sp = this->slerp(p_b, p_weight);
+	Quaternion sq = p_pre_a.slerpni(p_post_b, p_weight);
 	return sp.slerpni(sq, t2);
 }
 
-Quat::operator String() const {
-	return String::num(x) + ", " + String::num(y) + ", " + String::num(z) + ", " + String::num(w);
+Quaternion::operator String() const {
+	return "(" + String::num_real(x, false) + ", " + String::num_real(y, false) + ", " + String::num_real(z, false) + ", " + String::num_real(w, false) + ")";
 }
 
-Quat::Quat(const Vector3 &p_axis, real_t p_angle) {
+Quaternion::Quaternion(const Vector3 &p_axis, real_t p_angle) {
 #ifdef MATH_CHECKS
 	ERR_FAIL_COND_MSG(!p_axis.is_normalized(), "The axis Vector3 must be normalized.");
 #endif
@@ -209,7 +214,7 @@ Quat::Quat(const Vector3 &p_axis, real_t p_angle) {
 // (ax, ay, az), where ax is the angle of rotation around x axis,
 // and similar for other axes.
 // This implementation uses YXZ convention (Z is the first rotation).
-Quat::Quat(const Vector3 &p_euler) {
+Quaternion::Quaternion(const Vector3 &p_euler) {
 	real_t half_a1 = p_euler.y * 0.5;
 	real_t half_a2 = p_euler.x * 0.5;
 	real_t half_a3 = p_euler.z * 0.5;
diff --git a/core/math/quat.h b/core/math/quaternion.h
index 9db914fe52..35324323b3 100644
--- a/core/math/quat.h
+++ b/core/math/quaternion.h
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  quat.h                                                               */
+/*  quaternion.h                                                         */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,17 +28,15 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-// Circular dependency between Vector3 and Basis :/
-#include "core/math/vector3.h"
-
-#ifndef QUAT_H
-#define QUAT_H
+#ifndef QUATERNION_H
+#define QUATERNION_H
 
 #include "core/math/math_defs.h"
 #include "core/math/math_funcs.h"
+#include "core/math/vector3.h"
 #include "core/string/ustring.h"
 
-class Quat {
+class Quaternion {
 public:
 	union {
 		struct {
@@ -57,21 +55,22 @@ public:
 		return components[idx];
 	}
 	_FORCE_INLINE_ real_t length_squared() const;
-	bool is_equal_approx(const Quat &p_quat) const;
+	bool is_equal_approx(const Quaternion &p_quaternion) const;
 	real_t length() const;
 	void normalize();
-	Quat normalized() const;
+	Quaternion normalized() const;
 	bool is_normalized() const;
-	Quat inverse() const;
-	_FORCE_INLINE_ real_t dot(const Quat &p_q) const;
+	Quaternion inverse() const;
+	_FORCE_INLINE_ real_t dot(const Quaternion &p_q) const;
+	real_t angle_to(const Quaternion &p_to) const;
 
 	Vector3 get_euler_xyz() const;
 	Vector3 get_euler_yxz() const;
 	Vector3 get_euler() const { return get_euler_yxz(); };
 
-	Quat slerp(const Quat &p_to, const real_t &p_weight) const;
-	Quat slerpni(const Quat &p_to, const real_t &p_weight) const;
-	Quat cubic_slerp(const Quat &p_b, const Quat &p_pre_a, const Quat &p_post_b, const real_t &p_weight) const;
+	Quaternion slerp(const Quaternion &p_to, const real_t &p_weight) const;
+	Quaternion slerpni(const Quaternion &p_to, const real_t &p_weight) const;
+	Quaternion cubic_slerp(const Quaternion &p_b, const Quaternion &p_pre_a, const Quaternion &p_post_b, const real_t &p_weight) const;
 
 	_FORCE_INLINE_ void get_axis_angle(Vector3 &r_axis, real_t &r_angle) const {
 		r_angle = 2 * Math::acos(w);
@@ -81,11 +80,11 @@ public:
 		r_axis.z = z * r;
 	}
 
-	void operator*=(const Quat &p_q);
-	Quat operator*(const Quat &p_q) const;
+	void operator*=(const Quaternion &p_q);
+	Quaternion operator*(const Quaternion &p_q) const;
 
-	Quat operator*(const Vector3 &v) const {
-		return Quat(w * v.x + y * v.z - z * v.y,
+	Quaternion operator*(const Vector3 &v) const {
+		return Quaternion(w * v.x + y * v.z - z * v.y,
 				w * v.y + z * v.x - x * v.z,
 				w * v.z + x * v.y - y * v.x,
 				-x * v.x - y * v.y - z * v.z);
@@ -104,42 +103,42 @@ public:
 		return inverse().xform(v);
 	}
 
-	_FORCE_INLINE_ void operator+=(const Quat &p_q);
-	_FORCE_INLINE_ void operator-=(const Quat &p_q);
+	_FORCE_INLINE_ void operator+=(const Quaternion &p_q);
+	_FORCE_INLINE_ void operator-=(const Quaternion &p_q);
 	_FORCE_INLINE_ void operator*=(const real_t &s);
 	_FORCE_INLINE_ void operator/=(const real_t &s);
-	_FORCE_INLINE_ Quat operator+(const Quat &q2) const;
-	_FORCE_INLINE_ Quat operator-(const Quat &q2) const;
-	_FORCE_INLINE_ Quat operator-() const;
-	_FORCE_INLINE_ Quat operator*(const real_t &s) const;
-	_FORCE_INLINE_ Quat operator/(const real_t &s) const;
+	_FORCE_INLINE_ Quaternion operator+(const Quaternion &q2) const;
+	_FORCE_INLINE_ Quaternion operator-(const Quaternion &q2) const;
+	_FORCE_INLINE_ Quaternion operator-() const;
+	_FORCE_INLINE_ Quaternion operator*(const real_t &s) const;
+	_FORCE_INLINE_ Quaternion operator/(const real_t &s) const;
 
-	_FORCE_INLINE_ bool operator==(const Quat &p_quat) const;
-	_FORCE_INLINE_ bool operator!=(const Quat &p_quat) const;
+	_FORCE_INLINE_ bool operator==(const Quaternion &p_quaternion) const;
+	_FORCE_INLINE_ bool operator!=(const Quaternion &p_quaternion) const;
 
 	operator String() const;
 
-	_FORCE_INLINE_ Quat() {}
+	_FORCE_INLINE_ Quaternion() {}
 
-	_FORCE_INLINE_ Quat(real_t p_x, real_t p_y, real_t p_z, real_t p_w) :
+	_FORCE_INLINE_ Quaternion(real_t p_x, real_t p_y, real_t p_z, real_t p_w) :
 			x(p_x),
 			y(p_y),
 			z(p_z),
 			w(p_w) {
 	}
 
-	Quat(const Vector3 &p_axis, real_t p_angle);
+	Quaternion(const Vector3 &p_axis, real_t p_angle);
 
-	Quat(const Vector3 &p_euler);
+	Quaternion(const Vector3 &p_euler);
 
-	Quat(const Quat &p_q) :
+	Quaternion(const Quaternion &p_q) :
 			x(p_q.x),
 			y(p_q.y),
 			z(p_q.z),
 			w(p_q.w) {
 	}
 
-	Quat &operator=(const Quat &p_q) {
+	Quaternion &operator=(const Quaternion &p_q) {
 		x = p_q.x;
 		y = p_q.y;
 		z = p_q.z;
@@ -147,7 +146,7 @@ public:
 		return *this;
 	}
 
-	Quat(const Vector3 &v0, const Vector3 &v1) // shortest arc
+	Quaternion(const Vector3 &v0, const Vector3 &v1) // shortest arc
 	{
 		Vector3 c = v0.cross(v1);
 		real_t d = v0.dot(v1);
@@ -169,72 +168,72 @@ public:
 	}
 };
 
-real_t Quat::dot(const Quat &p_q) const {
+real_t Quaternion::dot(const Quaternion &p_q) const {
 	return x * p_q.x + y * p_q.y + z * p_q.z + w * p_q.w;
 }
 
-real_t Quat::length_squared() const {
+real_t Quaternion::length_squared() const {
 	return dot(*this);
 }
 
-void Quat::operator+=(const Quat &p_q) {
+void Quaternion::operator+=(const Quaternion &p_q) {
 	x += p_q.x;
 	y += p_q.y;
 	z += p_q.z;
 	w += p_q.w;
 }
 
-void Quat::operator-=(const Quat &p_q) {
+void Quaternion::operator-=(const Quaternion &p_q) {
 	x -= p_q.x;
 	y -= p_q.y;
 	z -= p_q.z;
 	w -= p_q.w;
 }
 
-void Quat::operator*=(const real_t &s) {
+void Quaternion::operator*=(const real_t &s) {
 	x *= s;
 	y *= s;
 	z *= s;
 	w *= s;
 }
 
-void Quat::operator/=(const real_t &s) {
+void Quaternion::operator/=(const real_t &s) {
 	*this *= 1.0 / s;
 }
 
-Quat Quat::operator+(const Quat &q2) const {
-	const Quat &q1 = *this;
-	return Quat(q1.x + q2.x, q1.y + q2.y, q1.z + q2.z, q1.w + q2.w);
+Quaternion Quaternion::operator+(const Quaternion &q2) const {
+	const Quaternion &q1 = *this;
+	return Quaternion(q1.x + q2.x, q1.y + q2.y, q1.z + q2.z, q1.w + q2.w);
 }
 
-Quat Quat::operator-(const Quat &q2) const {
-	const Quat &q1 = *this;
-	return Quat(q1.x - q2.x, q1.y - q2.y, q1.z - q2.z, q1.w - q2.w);
+Quaternion Quaternion::operator-(const Quaternion &q2) const {
+	const Quaternion &q1 = *this;
+	return Quaternion(q1.x - q2.x, q1.y - q2.y, q1.z - q2.z, q1.w - q2.w);
 }
 
-Quat Quat::operator-() const {
-	const Quat &q2 = *this;
-	return Quat(-q2.x, -q2.y, -q2.z, -q2.w);
+Quaternion Quaternion::operator-() const {
+	const Quaternion &q2 = *this;
+	return Quaternion(-q2.x, -q2.y, -q2.z, -q2.w);
 }
 
-Quat Quat::operator*(const real_t &s) const {
-	return Quat(x * s, y * s, z * s, w * s);
+Quaternion Quaternion::operator*(const real_t &s) const {
+	return Quaternion(x * s, y * s, z * s, w * s);
 }
 
-Quat Quat::operator/(const real_t &s) const {
+Quaternion Quaternion::operator/(const real_t &s) const {
 	return *this * (1.0 / s);
 }
 
-bool Quat::operator==(const Quat &p_quat) const {
-	return x == p_quat.x && y == p_quat.y && z == p_quat.z && w == p_quat.w;
+bool Quaternion::operator==(const Quaternion &p_quaternion) const {
+	return x == p_quaternion.x && y == p_quaternion.y && z == p_quaternion.z && w == p_quaternion.w;
 }
 
-bool Quat::operator!=(const Quat &p_quat) const {
-	return x != p_quat.x || y != p_quat.y || z != p_quat.z || w != p_quat.w;
+bool Quaternion::operator!=(const Quaternion &p_quaternion) const {
+	return x != p_quaternion.x || y != p_quaternion.y || z != p_quaternion.z || w != p_quaternion.w;
 }
 
-_FORCE_INLINE_ Quat operator*(const real_t &p_real, const Quat &p_quat) {
-	return p_quat * p_real;
+_FORCE_INLINE_ Quaternion operator*(const real_t &p_real, const Quaternion &p_quaternion) {
+	return p_quaternion * p_real;
 }
 
-#endif // QUAT_H
+#endif // QUATERNION_H
diff --git a/core/math/quick_hull.cpp b/core/math/quick_hull.cpp
index fe18cc3d41..0960fe19a6 100644
--- a/core/math/quick_hull.cpp
+++ b/core/math/quick_hull.cpp
@@ -112,7 +112,7 @@ Error QuickHull::build(const Vector<Vector3> &p_points, Geometry3D::MeshData &r_
 		}
 	}
 
-	//fourth vertex is the one  most further away from the plane
+	//fourth vertex is the one most further away from the plane
 
 	{
 		real_t maxd = 0;
@@ -192,9 +192,9 @@ Error QuickHull::build(const Vector<Vector3> &p_points, Geometry3D::MeshData &r_
 			continue;
 		}
 
-		for (List<Face>::Element *E = faces.front(); E; E = E->next()) {
-			if (E->get().plane.distance_to(p_points[i]) > over_tolerance) {
-				E->get().points_over.push_back(i);
+		for (Face &E : faces) {
+			if (E.plane.distance_to(p_points[i]) > over_tolerance) {
+				E.points_over.push_back(i);
 				break;
 			}
 		}
@@ -292,8 +292,8 @@ Error QuickHull::build(const Vector<Vector3> &p_points, Geometry3D::MeshData &r_
 
 		//distribute points into new faces
 
-		for (List<List<Face>::Element *>::Element *F = lit_faces.front(); F; F = F->next()) {
-			Face &lf = F->get()->get();
+		for (List<Face>::Element *&F : lit_faces) {
+			Face &lf = F->get();
 
 			for (int i = 0; i < lf.points_over.size(); i++) {
 				if (lf.points_over[i] == f.points_over[next]) { //do not add current one
@@ -301,8 +301,8 @@ Error QuickHull::build(const Vector<Vector3> &p_points, Geometry3D::MeshData &r_
 				}
 
 				Vector3 p = p_points[lf.points_over[i]];
-				for (List<List<Face>::Element *>::Element *E = new_faces.front(); E; E = E->next()) {
-					Face &f2 = E->get()->get();
+				for (List<Face>::Element *&E : new_faces) {
+					Face &f2 = E->get();
 					if (f2.plane.distance_to(p) > over_tolerance) {
 						f2.points_over.push_back(lf.points_over[i]);
 						break;
@@ -320,10 +320,10 @@ Error QuickHull::build(const Vector<Vector3> &p_points, Geometry3D::MeshData &r_
 
 		//put faces that contain no points on the front
 
-		for (List<List<Face>::Element *>::Element *E = new_faces.front(); E; E = E->next()) {
-			Face &f2 = E->get()->get();
+		for (List<Face>::Element *&E : new_faces) {
+			Face &f2 = E->get();
 			if (f2.points_over.size() == 0) {
-				faces.move_to_front(E->get());
+				faces.move_to_front(E);
 			}
 		}
 
@@ -336,19 +336,19 @@ Error QuickHull::build(const Vector<Vector3> &p_points, Geometry3D::MeshData &r_
 	Map<Edge, RetFaceConnect> ret_edges;
 	List<Geometry3D::MeshData::Face> ret_faces;
 
-	for (List<Face>::Element *E = faces.front(); E; E = E->next()) {
+	for (const Face &E : faces) {
 		Geometry3D::MeshData::Face f;
-		f.plane = E->get().plane;
+		f.plane = E.plane;
 
 		for (int i = 0; i < 3; i++) {
-			f.indices.push_back(E->get().vertices[i]);
+			f.indices.push_back(E.vertices[i]);
 		}
 
 		List<Geometry3D::MeshData::Face>::Element *F = ret_faces.push_back(f);
 
 		for (int i = 0; i < 3; i++) {
-			uint32_t a = E->get().vertices[i];
-			uint32_t b = E->get().vertices[(i + 1) % 3];
+			uint32_t a = E.vertices[i];
+			uint32_t b = E.vertices[(i + 1) % 3];
 			Edge e(a, b);
 
 			Map<Edge, RetFaceConnect>::Element *G = ret_edges.find(e);
@@ -439,8 +439,8 @@ Error QuickHull::build(const Vector<Vector3> &p_points, Geometry3D::MeshData &r_
 	r_mesh.faces.resize(ret_faces.size());
 
 	int idx = 0;
-	for (List<Geometry3D::MeshData::Face>::Element *E = ret_faces.front(); E; E = E->next()) {
-		r_mesh.faces.write[idx++] = E->get();
+	for (const Geometry3D::MeshData::Face &E : ret_faces) {
+		r_mesh.faces.write[idx++] = E;
 	}
 	r_mesh.edges.resize(ret_edges.size());
 	idx = 0;
diff --git a/core/math/random_number_generator.h b/core/math/random_number_generator.h
index a396c2b7d7..06cd3999f3 100644
--- a/core/math/random_number_generator.h
+++ b/core/math/random_number_generator.h
@@ -32,10 +32,10 @@
 #define RANDOM_NUMBER_GENERATOR_H
 
 #include "core/math/random_pcg.h"
-#include "core/object/reference.h"
+#include "core/object/ref_counted.h"
 
-class RandomNumberGenerator : public Reference {
-	GDCLASS(RandomNumberGenerator, Reference);
+class RandomNumberGenerator : public RefCounted {
+	GDCLASS(RandomNumberGenerator, RefCounted);
 
 protected:
 	RandomPCG randbase;
diff --git a/core/math/rect2.cpp b/core/math/rect2.cpp
index 60c44999f7..f64bf560c8 100644
--- a/core/math/rect2.cpp
+++ b/core/math/rect2.cpp
@@ -263,3 +263,11 @@ next4:
 
 	return true;
 }
+
+Rect2::operator String() const {
+	return "[P: " + position.operator String() + ", S: " + size + "]";
+}
+
+Rect2i::operator String() const {
+	return "[P: " + position.operator String() + ", S: " + size + "]";
+}
diff --git a/core/math/rect2.h b/core/math/rect2.h
index 512499bdb2..ab0b489b4a 100644
--- a/core/math/rect2.h
+++ b/core/math/rect2.h
@@ -182,13 +182,17 @@ struct Rect2 {
 
 	inline Rect2 grow(real_t p_amount) const {
 		Rect2 g = *this;
-		g.position.x -= p_amount;
-		g.position.y -= p_amount;
-		g.size.width += p_amount * 2;
-		g.size.height += p_amount * 2;
+		g.grow_by(p_amount);
 		return g;
 	}
 
+	inline void grow_by(real_t p_amount) {
+		position.x -= p_amount;
+		position.y -= p_amount;
+		size.width += p_amount * 2;
+		size.height += p_amount * 2;
+	}
+
 	inline Rect2 grow_side(Side p_side, real_t p_amount) const {
 		Rect2 g = *this;
 		g = g.grow_individual((SIDE_LEFT == p_side) ? p_amount : 0,
@@ -316,7 +320,7 @@ struct Rect2 {
 		return position + size;
 	}
 
-	operator String() const { return String(position) + ", " + String(size); }
+	operator String() const;
 
 	Rect2() {}
 	Rect2(real_t p_x, real_t p_y, real_t p_width, real_t p_height) :
@@ -494,7 +498,7 @@ struct Rect2i {
 		return position + size;
 	}
 
-	operator String() const { return String(position) + ", " + String(size); }
+	operator String() const;
 
 	operator Rect2() const { return Rect2(position, size); }
 
diff --git a/core/math/transform_2d.cpp b/core/math/transform_2d.cpp
index 4a521b96ae..16934d67df 100644
--- a/core/math/transform_2d.cpp
+++ b/core/math/transform_2d.cpp
@@ -158,6 +158,13 @@ bool Transform2D::is_equal_approx(const Transform2D &p_transform) const {
 	return elements[0].is_equal_approx(p_transform.elements[0]) && elements[1].is_equal_approx(p_transform.elements[1]) && elements[2].is_equal_approx(p_transform.elements[2]);
 }
 
+Transform2D Transform2D::looking_at(const Vector2 &p_target) const {
+	Transform2D return_trans = Transform2D(get_rotation(), get_origin());
+	Vector2 target_position = affine_inverse().xform(p_target);
+	return_trans.set_rotation(return_trans.get_rotation() + (target_position * get_scale()).angle());
+	return return_trans;
+}
+
 bool Transform2D::operator==(const Transform2D &p_transform) const {
 	for (int i = 0; i < 3; i++) {
 		if (elements[i] != p_transform.elements[i]) {
@@ -269,6 +276,20 @@ Transform2D Transform2D::interpolate_with(const Transform2D &p_transform, real_t
 	return res;
 }
 
+void Transform2D::operator*=(const real_t p_val) {
+	elements[0] *= p_val;
+	elements[1] *= p_val;
+	elements[2] *= p_val;
+}
+
+Transform2D Transform2D::operator*(const real_t p_val) const {
+	Transform2D ret(*this);
+	ret *= p_val;
+	return ret;
+}
+
 Transform2D::operator String() const {
-	return String(String() + elements[0] + ", " + elements[1] + ", " + elements[2]);
+	return "[X: " + elements[0].operator String() +
+		   ", Y: " + elements[1].operator String() +
+		   ", O: " + elements[2].operator String() + "]";
 }
diff --git a/core/math/transform_2d.h b/core/math/transform_2d.h
index 327d0f244f..34cfd0c1a9 100644
--- a/core/math/transform_2d.h
+++ b/core/math/transform_2d.h
@@ -100,11 +100,15 @@ struct Transform2D {
 	Transform2D orthonormalized() const;
 	bool is_equal_approx(const Transform2D &p_transform) const;
 
+	Transform2D looking_at(const Vector2 &p_target) const;
+
 	bool operator==(const Transform2D &p_transform) const;
 	bool operator!=(const Transform2D &p_transform) const;
 
 	void operator*=(const Transform2D &p_transform);
 	Transform2D operator*(const Transform2D &p_transform) const;
+	void operator*=(const real_t p_val);
+	Transform2D operator*(const real_t p_val) const;
 
 	Transform2D interpolate_with(const Transform2D &p_transform, real_t p_c) const;
 
diff --git a/core/math/transform.cpp b/core/math/transform_3d.cpp
index d4d7ff6d28..4f4943c8ef 100644
--- a/core/math/transform.cpp
+++ b/core/math/transform_3d.cpp
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  transform.cpp                                                        */
+/*  transform_3d.cpp                                                     */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -28,185 +28,171 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#include "transform.h"
+#include "transform_3d.h"
 
 #include "core/math/math_funcs.h"
 #include "core/string/print_string.h"
 
-void Transform::affine_invert() {
+void Transform3D::affine_invert() {
 	basis.invert();
 	origin = basis.xform(-origin);
 }
 
-Transform Transform::affine_inverse() const {
-	Transform ret = *this;
+Transform3D Transform3D::affine_inverse() const {
+	Transform3D ret = *this;
 	ret.affine_invert();
 	return ret;
 }
 
-void Transform::invert() {
+void Transform3D::invert() {
 	basis.transpose();
 	origin = basis.xform(-origin);
 }
 
-Transform Transform::inverse() const {
+Transform3D Transform3D::inverse() const {
 	// FIXME: this function assumes the basis is a rotation matrix, with no scaling.
-	// Transform::affine_inverse can handle matrices with scaling, so GDScript should eventually use that.
-	Transform ret = *this;
+	// Transform3D::affine_inverse can handle matrices with scaling, so GDScript should eventually use that.
+	Transform3D ret = *this;
 	ret.invert();
 	return ret;
 }
 
-void Transform::rotate(const Vector3 &p_axis, real_t p_phi) {
+void Transform3D::rotate(const Vector3 &p_axis, real_t p_phi) {
 	*this = rotated(p_axis, p_phi);
 }
 
-Transform Transform::rotated(const Vector3 &p_axis, real_t p_phi) const {
-	return Transform(Basis(p_axis, p_phi), Vector3()) * (*this);
+Transform3D Transform3D::rotated(const Vector3 &p_axis, real_t p_phi) const {
+	return Transform3D(Basis(p_axis, p_phi), Vector3()) * (*this);
 }
 
-void Transform::rotate_basis(const Vector3 &p_axis, real_t p_phi) {
+void Transform3D::rotate_basis(const Vector3 &p_axis, real_t p_phi) {
 	basis.rotate(p_axis, p_phi);
 }
 
-Transform Transform::looking_at(const Vector3 &p_target, const Vector3 &p_up) const {
-	Transform t = *this;
-	t.set_look_at(origin, p_target, p_up);
+Transform3D Transform3D::looking_at(const Vector3 &p_target, const Vector3 &p_up) const {
+	Transform3D t = *this;
+	t.basis = Basis::looking_at(p_target - origin, p_up);
 	return t;
 }
 
-void Transform::set_look_at(const Vector3 &p_eye, const Vector3 &p_target, const Vector3 &p_up) {
-#ifdef MATH_CHECKS
-	ERR_FAIL_COND(p_eye == p_target);
-	ERR_FAIL_COND(p_up.length() == 0);
-#endif
-	// Reference: MESA source code
-	Vector3 v_x, v_y, v_z;
-
-	/* Make rotation matrix */
-
-	/* Z vector */
-	v_z = p_eye - p_target;
-
-	v_z.normalize();
-
-	v_y = p_up;
-
-	v_x = v_y.cross(v_z);
-#ifdef MATH_CHECKS
-	ERR_FAIL_COND(v_x.length() == 0);
-#endif
-
-	/* Recompute Y = Z cross X */
-	v_y = v_z.cross(v_x);
-
-	v_x.normalize();
-	v_y.normalize();
-
-	basis.set(v_x, v_y, v_z);
-
+void Transform3D::set_look_at(const Vector3 &p_eye, const Vector3 &p_target, const Vector3 &p_up) {
+	basis = Basis::looking_at(p_target - p_eye, p_up);
 	origin = p_eye;
 }
 
-Transform Transform::interpolate_with(const Transform &p_transform, real_t p_c) const {
+Transform3D Transform3D::interpolate_with(const Transform3D &p_transform, real_t p_c) const {
 	/* not sure if very "efficient" but good enough? */
 
 	Vector3 src_scale = basis.get_scale();
-	Quat src_rot = basis.get_rotation_quat();
+	Quaternion src_rot = basis.get_rotation_quaternion();
 	Vector3 src_loc = origin;
 
 	Vector3 dst_scale = p_transform.basis.get_scale();
-	Quat dst_rot = p_transform.basis.get_rotation_quat();
+	Quaternion dst_rot = p_transform.basis.get_rotation_quaternion();
 	Vector3 dst_loc = p_transform.origin;
 
-	Transform interp;
-	interp.basis.set_quat_scale(src_rot.slerp(dst_rot, p_c).normalized(), src_scale.lerp(dst_scale, p_c));
+	Transform3D interp;
+	interp.basis.set_quaternion_scale(src_rot.slerp(dst_rot, p_c).normalized(), src_scale.lerp(dst_scale, p_c));
 	interp.origin = src_loc.lerp(dst_loc, p_c);
 
 	return interp;
 }
 
-void Transform::scale(const Vector3 &p_scale) {
+void Transform3D::scale(const Vector3 &p_scale) {
 	basis.scale(p_scale);
 	origin *= p_scale;
 }
 
-Transform Transform::scaled(const Vector3 &p_scale) const {
-	Transform t = *this;
+Transform3D Transform3D::scaled(const Vector3 &p_scale) const {
+	Transform3D t = *this;
 	t.scale(p_scale);
 	return t;
 }
 
-void Transform::scale_basis(const Vector3 &p_scale) {
+void Transform3D::scale_basis(const Vector3 &p_scale) {
 	basis.scale(p_scale);
 }
 
-void Transform::translate(real_t p_tx, real_t p_ty, real_t p_tz) {
+void Transform3D::translate(real_t p_tx, real_t p_ty, real_t p_tz) {
 	translate(Vector3(p_tx, p_ty, p_tz));
 }
 
-void Transform::translate(const Vector3 &p_translation) {
+void Transform3D::translate(const Vector3 &p_translation) {
 	for (int i = 0; i < 3; i++) {
 		origin[i] += basis[i].dot(p_translation);
 	}
 }
 
-Transform Transform::translated(const Vector3 &p_translation) const {
-	Transform t = *this;
+Transform3D Transform3D::translated(const Vector3 &p_translation) const {
+	Transform3D t = *this;
 	t.translate(p_translation);
 	return t;
 }
 
-void Transform::orthonormalize() {
+void Transform3D::orthonormalize() {
 	basis.orthonormalize();
 }
 
-Transform Transform::orthonormalized() const {
-	Transform _copy = *this;
+Transform3D Transform3D::orthonormalized() const {
+	Transform3D _copy = *this;
 	_copy.orthonormalize();
 	return _copy;
 }
 
-bool Transform::is_equal_approx(const Transform &p_transform) const {
+bool Transform3D::is_equal_approx(const Transform3D &p_transform) const {
 	return basis.is_equal_approx(p_transform.basis) && origin.is_equal_approx(p_transform.origin);
 }
 
-bool Transform::operator==(const Transform &p_transform) const {
+bool Transform3D::operator==(const Transform3D &p_transform) const {
 	return (basis == p_transform.basis && origin == p_transform.origin);
 }
 
-bool Transform::operator!=(const Transform &p_transform) const {
+bool Transform3D::operator!=(const Transform3D &p_transform) const {
 	return (basis != p_transform.basis || origin != p_transform.origin);
 }
 
-void Transform::operator*=(const Transform &p_transform) {
+void Transform3D::operator*=(const Transform3D &p_transform) {
 	origin = xform(p_transform.origin);
 	basis *= p_transform.basis;
 }
 
-Transform Transform::operator*(const Transform &p_transform) const {
-	Transform t = *this;
+Transform3D Transform3D::operator*(const Transform3D &p_transform) const {
+	Transform3D t = *this;
 	t *= p_transform;
 	return t;
 }
 
-Transform::operator String() const {
-	return basis.operator String() + " - " + origin.operator String();
+void Transform3D::operator*=(const real_t p_val) {
+	origin *= p_val;
+	basis *= p_val;
+}
+
+Transform3D Transform3D::operator*(const real_t p_val) const {
+	Transform3D ret(*this);
+	ret *= p_val;
+	return ret;
+}
+
+Transform3D::operator String() const {
+	return "[X: " + basis.get_axis(0).operator String() +
+		   ", Y: " + basis.get_axis(1).operator String() +
+		   ", Z: " + basis.get_axis(2).operator String() +
+		   ", O: " + origin.operator String() + "]";
 }
 
-Transform::Transform(const Basis &p_basis, const Vector3 &p_origin) :
+Transform3D::Transform3D(const Basis &p_basis, const Vector3 &p_origin) :
 		basis(p_basis),
 		origin(p_origin) {
 }
 
-Transform::Transform(const Vector3 &p_x, const Vector3 &p_y, const Vector3 &p_z, const Vector3 &p_origin) :
+Transform3D::Transform3D(const Vector3 &p_x, const Vector3 &p_y, const Vector3 &p_z, const Vector3 &p_origin) :
 		origin(p_origin) {
 	basis.set_axis(0, p_x);
 	basis.set_axis(1, p_y);
 	basis.set_axis(2, p_z);
 }
 
-Transform::Transform(real_t xx, real_t xy, real_t xz, real_t yx, real_t yy, real_t yz, real_t zx, real_t zy, real_t zz, real_t ox, real_t oy, real_t oz) {
+Transform3D::Transform3D(real_t xx, real_t xy, real_t xz, real_t yx, real_t yy, real_t yz, real_t zx, real_t zy, real_t zz, real_t ox, real_t oy, real_t oz) {
 	basis = Basis(xx, xy, xz, yx, yy, yz, zx, zy, zz);
 	origin = Vector3(ox, oy, oz);
 }
diff --git a/core/math/transform.h b/core/math/transform_3d.h
index 1c05dbe554..cadfdc13d1 100644
--- a/core/math/transform.h
+++ b/core/math/transform_3d.h
@@ -1,5 +1,5 @@
 /*************************************************************************/
-/*  transform.h                                                          */
+/*  transform_3d.h                                                       */
 /*************************************************************************/
 /*                       This file is part of:                           */
 /*                           GODOT ENGINE                                */
@@ -35,31 +35,31 @@
 #include "core/math/basis.h"
 #include "core/math/plane.h"
 
-class Transform {
+class Transform3D {
 public:
 	Basis basis;
 	Vector3 origin;
 
 	void invert();
-	Transform inverse() const;
+	Transform3D inverse() const;
 
 	void affine_invert();
-	Transform affine_inverse() const;
+	Transform3D affine_inverse() const;
 
-	Transform rotated(const Vector3 &p_axis, real_t p_phi) const;
+	Transform3D rotated(const Vector3 &p_axis, real_t p_phi) const;
 
 	void rotate(const Vector3 &p_axis, real_t p_phi);
 	void rotate_basis(const Vector3 &p_axis, real_t p_phi);
 
 	void set_look_at(const Vector3 &p_eye, const Vector3 &p_target, const Vector3 &p_up = Vector3(0, 1, 0));
-	Transform looking_at(const Vector3 &p_target, const Vector3 &p_up = Vector3(0, 1, 0)) const;
+	Transform3D looking_at(const Vector3 &p_target, const Vector3 &p_up = Vector3(0, 1, 0)) const;
 
 	void scale(const Vector3 &p_scale);
-	Transform scaled(const Vector3 &p_scale) const;
+	Transform3D scaled(const Vector3 &p_scale) const;
 	void scale_basis(const Vector3 &p_scale);
 	void translate(real_t p_tx, real_t p_ty, real_t p_tz);
 	void translate(const Vector3 &p_translation);
-	Transform translated(const Vector3 &p_translation) const;
+	Transform3D translated(const Vector3 &p_translation) const;
 
 	const Basis &get_basis() const { return basis; }
 	void set_basis(const Basis &p_basis) { basis = p_basis; }
@@ -68,32 +68,42 @@ public:
 	void set_origin(const Vector3 &p_origin) { origin = p_origin; }
 
 	void orthonormalize();
-	Transform orthonormalized() const;
-	bool is_equal_approx(const Transform &p_transform) const;
+	Transform3D orthonormalized() const;
+	bool is_equal_approx(const Transform3D &p_transform) const;
 
-	bool operator==(const Transform &p_transform) const;
-	bool operator!=(const Transform &p_transform) const;
+	bool operator==(const Transform3D &p_transform) const;
+	bool operator!=(const Transform3D &p_transform) const;
 
 	_FORCE_INLINE_ Vector3 xform(const Vector3 &p_vector) const;
+	_FORCE_INLINE_ AABB xform(const AABB &p_aabb) const;
+	_FORCE_INLINE_ Vector<Vector3> xform(const Vector<Vector3> &p_array) const;
+
+	// NOTE: These are UNSAFE with non-uniform scaling, and will produce incorrect results.
+	// They use the transpose.
+	// For safe inverse transforms, xform by the affine_inverse.
 	_FORCE_INLINE_ Vector3 xform_inv(const Vector3 &p_vector) const;
+	_FORCE_INLINE_ AABB xform_inv(const AABB &p_aabb) const;
+	_FORCE_INLINE_ Vector<Vector3> xform_inv(const Vector<Vector3> &p_array) const;
 
+	// Safe with non-uniform scaling (uses affine_inverse).
 	_FORCE_INLINE_ Plane xform(const Plane &p_plane) const;
 	_FORCE_INLINE_ Plane xform_inv(const Plane &p_plane) const;
 
-	_FORCE_INLINE_ AABB xform(const AABB &p_aabb) const;
-	_FORCE_INLINE_ AABB xform_inv(const AABB &p_aabb) const;
+	// These fast versions use precomputed affine inverse, and should be used in bottleneck areas where
+	// multiple planes are to be transformed.
+	_FORCE_INLINE_ Plane xform_fast(const Plane &p_plane, const Basis &p_basis_inverse_transpose) const;
+	static _FORCE_INLINE_ Plane xform_inv_fast(const Plane &p_plane, const Transform3D &p_inverse, const Basis &p_basis_transpose);
 
-	_FORCE_INLINE_ Vector<Vector3> xform(const Vector<Vector3> &p_array) const;
-	_FORCE_INLINE_ Vector<Vector3> xform_inv(const Vector<Vector3> &p_array) const;
+	void operator*=(const Transform3D &p_transform);
+	Transform3D operator*(const Transform3D &p_transform) const;
+	void operator*=(const real_t p_val);
+	Transform3D operator*(const real_t p_val) const;
 
-	void operator*=(const Transform &p_transform);
-	Transform operator*(const Transform &p_transform) const;
+	Transform3D interpolate_with(const Transform3D &p_transform, real_t p_c) const;
 
-	Transform interpolate_with(const Transform &p_transform, real_t p_c) const;
-
-	_FORCE_INLINE_ Transform inverse_xform(const Transform &t) const {
+	_FORCE_INLINE_ Transform3D inverse_xform(const Transform3D &t) const {
 		Vector3 v = t.origin - origin;
-		return Transform(basis.transpose_xform(t.basis),
+		return Transform3D(basis.transpose_xform(t.basis),
 				basis.xform(v));
 	}
 
@@ -106,20 +116,20 @@ public:
 
 	operator String() const;
 
-	Transform() {}
-	Transform(const Basis &p_basis, const Vector3 &p_origin = Vector3());
-	Transform(const Vector3 &p_x, const Vector3 &p_y, const Vector3 &p_z, const Vector3 &p_origin);
-	Transform(real_t xx, real_t xy, real_t xz, real_t yx, real_t yy, real_t yz, real_t zx, real_t zy, real_t zz, real_t ox, real_t oy, real_t oz);
+	Transform3D() {}
+	Transform3D(const Basis &p_basis, const Vector3 &p_origin = Vector3());
+	Transform3D(const Vector3 &p_x, const Vector3 &p_y, const Vector3 &p_z, const Vector3 &p_origin);
+	Transform3D(real_t xx, real_t xy, real_t xz, real_t yx, real_t yy, real_t yz, real_t zx, real_t zy, real_t zz, real_t ox, real_t oy, real_t oz);
 };
 
-_FORCE_INLINE_ Vector3 Transform::xform(const Vector3 &p_vector) const {
+_FORCE_INLINE_ Vector3 Transform3D::xform(const Vector3 &p_vector) const {
 	return Vector3(
 			basis[0].dot(p_vector) + origin.x,
 			basis[1].dot(p_vector) + origin.y,
 			basis[2].dot(p_vector) + origin.z);
 }
 
-_FORCE_INLINE_ Vector3 Transform::xform_inv(const Vector3 &p_vector) const {
+_FORCE_INLINE_ Vector3 Transform3D::xform_inv(const Vector3 &p_vector) const {
 	Vector3 v = p_vector - origin;
 
 	return Vector3(
@@ -128,33 +138,23 @@ _FORCE_INLINE_ Vector3 Transform::xform_inv(const Vector3 &p_vector) const {
 			(basis.elements[0][2] * v.x) + (basis.elements[1][2] * v.y) + (basis.elements[2][2] * v.z));
 }
 
-_FORCE_INLINE_ Plane Transform::xform(const Plane &p_plane) const {
-	Vector3 point = p_plane.normal * p_plane.d;
-	Vector3 point_dir = point + p_plane.normal;
-	point = xform(point);
-	point_dir = xform(point_dir);
-
-	Vector3 normal = point_dir - point;
-	normal.normalize();
-	real_t d = normal.dot(point);
-
-	return Plane(normal, d);
+// Neither the plane regular xform or xform_inv are particularly efficient,
+// as they do a basis inverse. For xforming a large number
+// of planes it is better to pre-calculate the inverse transpose basis once
+// and reuse it for each plane, by using the 'fast' version of the functions.
+_FORCE_INLINE_ Plane Transform3D::xform(const Plane &p_plane) const {
+	Basis b = basis.inverse();
+	b.transpose();
+	return xform_fast(p_plane, b);
 }
 
-_FORCE_INLINE_ Plane Transform::xform_inv(const Plane &p_plane) const {
-	Vector3 point = p_plane.normal * p_plane.d;
-	Vector3 point_dir = point + p_plane.normal;
-	point = xform_inv(point);
-	point_dir = xform_inv(point_dir);
-
-	Vector3 normal = point_dir - point;
-	normal.normalize();
-	real_t d = normal.dot(point);
-
-	return Plane(normal, d);
+_FORCE_INLINE_ Plane Transform3D::xform_inv(const Plane &p_plane) const {
+	Transform3D inv = affine_inverse();
+	Basis basis_transpose = basis.transposed();
+	return xform_inv_fast(p_plane, inv, basis_transpose);
 }
 
-_FORCE_INLINE_ AABB Transform::xform(const AABB &p_aabb) const {
+_FORCE_INLINE_ AABB Transform3D::xform(const AABB &p_aabb) const {
 	/* http://dev.theomader.com/transform-bounding-boxes/ */
 	Vector3 min = p_aabb.position;
 	Vector3 max = p_aabb.position + p_aabb.size;
@@ -179,7 +179,7 @@ _FORCE_INLINE_ AABB Transform::xform(const AABB &p_aabb) const {
 	return r_aabb;
 }
 
-_FORCE_INLINE_ AABB Transform::xform_inv(const AABB &p_aabb) const {
+_FORCE_INLINE_ AABB Transform3D::xform_inv(const AABB &p_aabb) const {
 	/* define vertices */
 	Vector3 vertices[8] = {
 		Vector3(p_aabb.position.x + p_aabb.size.x, p_aabb.position.y + p_aabb.size.y, p_aabb.position.z + p_aabb.size.z),
@@ -203,7 +203,7 @@ _FORCE_INLINE_ AABB Transform::xform_inv(const AABB &p_aabb) const {
 	return ret;
 }
 
-Vector<Vector3> Transform::xform(const Vector<Vector3> &p_array) const {
+Vector<Vector3> Transform3D::xform(const Vector<Vector3> &p_array) const {
 	Vector<Vector3> array;
 	array.resize(p_array.size());
 
@@ -216,7 +216,7 @@ Vector<Vector3> Transform::xform(const Vector<Vector3> &p_array) const {
 	return array;
 }
 
-Vector<Vector3> Transform::xform_inv(const Vector<Vector3> &p_array) const {
+Vector<Vector3> Transform3D::xform_inv(const Vector<Vector3> &p_array) const {
 	Vector<Vector3> array;
 	array.resize(p_array.size());
 
@@ -229,4 +229,37 @@ Vector<Vector3> Transform::xform_inv(const Vector<Vector3> &p_array) const {
 	return array;
 }
 
+_FORCE_INLINE_ Plane Transform3D::xform_fast(const Plane &p_plane, const Basis &p_basis_inverse_transpose) const {
+	// Transform a single point on the plane.
+	Vector3 point = p_plane.normal * p_plane.d;
+	point = xform(point);
+
+	// Use inverse transpose for correct normals with non-uniform scaling.
+	Vector3 normal = p_basis_inverse_transpose.xform(p_plane.normal);
+	normal.normalize();
+
+	real_t d = normal.dot(point);
+	return Plane(normal, d);
+}
+
+_FORCE_INLINE_ Plane Transform3D::xform_inv_fast(const Plane &p_plane, const Transform3D &p_inverse, const Basis &p_basis_transpose) {
+	// Transform a single point on the plane.
+	Vector3 point = p_plane.normal * p_plane.d;
+	point = p_inverse.xform(point);
+
+	// Note that instead of precalculating the transpose, an alternative
+	// would be to use the transpose for the basis transform.
+	// However that would be less SIMD friendly (requiring a swizzle).
+	// So the cost is one extra precalced value in the calling code.
+	// This is probably worth it, as this could be used in bottleneck areas. And
+	// where it is not a bottleneck, the non-fast method is fine.
+
+	// Use transpose for correct normals with non-uniform scaling.
+	Vector3 normal = p_basis_transpose.xform(p_plane.normal);
+	normal.normalize();
+
+	real_t d = normal.dot(point);
+	return Plane(normal, d);
+}
+
 #endif // TRANSFORM_H
diff --git a/core/math/triangle_mesh.cpp b/core/math/triangle_mesh.cpp
index 23c0c686a2..bf06c848c5 100644
--- a/core/math/triangle_mesh.cpp
+++ b/core/math/triangle_mesh.cpp
@@ -32,9 +32,9 @@
 
 #include "core/templates/sort_array.h"
 
-int TriangleMesh::_create_bvh(BVH *p_bvh, BVH **p_bb, int p_from, int p_size, int p_depth, int &max_depth, int &max_alloc) {
-	if (p_depth > max_depth) {
-		max_depth = p_depth;
+int TriangleMesh::_create_bvh(BVH *p_bvh, BVH **p_bb, int p_from, int p_size, int p_depth, int &r_max_depth, int &r_max_alloc) {
+	if (p_depth > r_max_depth) {
+		r_max_depth = p_depth;
 	}
 
 	if (p_size == 1) {
@@ -70,10 +70,10 @@ int TriangleMesh::_create_bvh(BVH *p_bvh, BVH **p_bb, int p_from, int p_size, in
 		} break;
 	}
 
-	int left = _create_bvh(p_bvh, p_bb, p_from, p_size / 2, p_depth + 1, max_depth, max_alloc);
-	int right = _create_bvh(p_bvh, p_bb, p_from + p_size / 2, p_size - p_size / 2, p_depth + 1, max_depth, max_alloc);
+	int left = _create_bvh(p_bvh, p_bb, p_from, p_size / 2, p_depth + 1, r_max_depth, r_max_alloc);
+	int right = _create_bvh(p_bvh, p_bb, p_from + p_size / 2, p_size - p_size / 2, p_depth + 1, r_max_depth, r_max_alloc);
 
-	int index = max_alloc++;
+	int index = r_max_alloc++;
 	BVH *_new = &p_bvh[index];
 	_new->aabb = aabb;
 	_new->center = aabb.position + aabb.size * 0.5;
@@ -600,7 +600,7 @@ bool TriangleMesh::inside_convex_shape(const Plane *p_planes, int p_plane_count,
 	const Vector3 *vertexptr = vertices.ptr();
 	const BVH *bvhptr = bvh.ptr();
 
-	Transform scale(Basis().scaled(p_scale));
+	Transform3D scale(Basis().scaled(p_scale));
 
 	int pos = bvh.size() - 1;
 
diff --git a/core/math/triangle_mesh.h b/core/math/triangle_mesh.h
index 1d1dbc114b..463b0dd5c8 100644
--- a/core/math/triangle_mesh.h
+++ b/core/math/triangle_mesh.h
@@ -32,10 +32,10 @@
 #define TRIANGLE_MESH_H
 
 #include "core/math/face3.h"
-#include "core/object/reference.h"
+#include "core/object/ref_counted.h"
 
-class TriangleMesh : public Reference {
-	GDCLASS(TriangleMesh, Reference);
+class TriangleMesh : public RefCounted {
+	GDCLASS(TriangleMesh, RefCounted);
 
 	struct Triangle {
 		Vector3 normal;
diff --git a/core/math/vector2.cpp b/core/math/vector2.cpp
index 5129ed336e..54abc1b7f2 100644
--- a/core/math/vector2.cpp
+++ b/core/math/vector2.cpp
@@ -59,7 +59,7 @@ Vector2 Vector2::normalized() const {
 
 bool Vector2::is_normalized() const {
 	// use length_squared() instead of length() to avoid sqrt(), makes it more stringent.
-	return Math::is_equal_approx(length_squared(), 1.0, UNIT_EPSILON);
+	return Math::is_equal_approx(length_squared(), 1, (real_t)UNIT_EPSILON);
 }
 
 real_t Vector2::distance_to(const Vector2 &p_vector2) const {
@@ -102,7 +102,7 @@ Vector2 Vector2::round() const {
 	return Vector2(Math::round(x), Math::round(y));
 }
 
-Vector2 Vector2::rotated(real_t p_by) const {
+Vector2 Vector2::rotated(const real_t p_by) const {
 	real_t sine = Math::sin(p_by);
 	real_t cosi = Math::cos(p_by);
 	return Vector2(
@@ -122,14 +122,20 @@ Vector2 Vector2::project(const Vector2 &p_to) const {
 	return p_to * (dot(p_to) / p_to.length_squared());
 }
 
+Vector2 Vector2::clamp(const Vector2 &p_min, const Vector2 &p_max) const {
+	return Vector2(
+			CLAMP(x, p_min.x, p_max.x),
+			CLAMP(y, p_min.y, p_max.y));
+}
+
 Vector2 Vector2::snapped(const Vector2 &p_step) const {
 	return Vector2(
 			Math::snapped(x, p_step.x),
 			Math::snapped(y, p_step.y));
 }
 
-Vector2 Vector2::clamped(real_t p_len) const {
-	real_t l = length();
+Vector2 Vector2::limit_length(const real_t p_len) const {
+	const real_t l = length();
 	Vector2 v = *this;
 	if (l > 0 && p_len < l) {
 		v /= l;
@@ -139,7 +145,7 @@ Vector2 Vector2::clamped(real_t p_len) const {
 	return v;
 }
 
-Vector2 Vector2::cubic_interpolate(const Vector2 &p_b, const Vector2 &p_pre_a, const Vector2 &p_post_b, real_t p_weight) const {
+Vector2 Vector2::cubic_interpolate(const Vector2 &p_b, const Vector2 &p_pre_a, const Vector2 &p_post_b, const real_t p_weight) const {
 	Vector2 p0 = p_pre_a;
 	Vector2 p1 = *this;
 	Vector2 p2 = p_b;
@@ -187,8 +193,18 @@ bool Vector2::is_equal_approx(const Vector2 &p_v) const {
 	return Math::is_equal_approx(x, p_v.x) && Math::is_equal_approx(y, p_v.y);
 }
 
+Vector2::operator String() const {
+	return "(" + String::num_real(x, false) + ", " + String::num_real(y, false) + ")";
+}
+
 /* Vector2i */
 
+Vector2i Vector2i::clamp(const Vector2i &p_min, const Vector2i &p_max) const {
+	return Vector2i(
+			CLAMP(x, p_min.x, p_max.x),
+			CLAMP(y, p_min.y, p_max.y));
+}
+
 Vector2i Vector2i::operator+(const Vector2i &p_v) const {
 	return Vector2i(x + p_v.x, y + p_v.y);
 }
@@ -257,3 +273,7 @@ bool Vector2i::operator==(const Vector2i &p_vec2) const {
 bool Vector2i::operator!=(const Vector2i &p_vec2) const {
 	return x != p_vec2.x || y != p_vec2.y;
 }
+
+Vector2i::operator String() const {
+	return "(" + itos(x) + ", " + itos(y) + ")";
+}
diff --git a/core/math/vector2.h b/core/math/vector2.h
index edc6e3a3ef..330b4741b1 100644
--- a/core/math/vector2.h
+++ b/core/math/vector2.h
@@ -37,18 +37,26 @@
 struct Vector2i;
 
 struct Vector2 {
+	static const int AXIS_COUNT = 2;
+
 	enum Axis {
 		AXIS_X,
 		AXIS_Y,
 	};
 
 	union {
-		real_t x = 0;
-		real_t width;
-	};
-	union {
-		real_t y = 0;
-		real_t height;
+		struct {
+			union {
+				real_t x;
+				real_t width;
+			};
+			union {
+				real_t y;
+				real_t height;
+			};
+		};
+
+		real_t coord[2] = { 0 };
 	};
 
 	_FORCE_INLINE_ real_t &operator[](int p_idx) {
@@ -58,12 +66,25 @@ struct Vector2 {
 		return p_idx ? y : x;
 	}
 
+	_FORCE_INLINE_ void set_all(const real_t p_value) {
+		x = y = p_value;
+	}
+
+	_FORCE_INLINE_ int min_axis() const {
+		return x < y ? 0 : 1;
+	}
+
+	_FORCE_INLINE_ int max_axis() const {
+		return x < y ? 1 : 0;
+	}
+
 	void normalize();
 	Vector2 normalized() const;
 	bool is_normalized() const;
 
 	real_t length() const;
 	real_t length_squared() const;
+	Vector2 limit_length(const real_t p_len = 1.0) const;
 
 	Vector2 min(const Vector2 &p_vector2) const {
 		return Vector2(MIN(x, p_vector2.x), MIN(y, p_vector2.y));
@@ -85,13 +106,11 @@ struct Vector2 {
 	Vector2 posmodv(const Vector2 &p_modv) const;
 	Vector2 project(const Vector2 &p_to) const;
 
-	Vector2 plane_project(real_t p_d, const Vector2 &p_vec) const;
+	Vector2 plane_project(const real_t p_d, const Vector2 &p_vec) const;
 
-	Vector2 clamped(real_t p_len) const;
-
-	_FORCE_INLINE_ Vector2 lerp(const Vector2 &p_to, real_t p_weight) const;
-	_FORCE_INLINE_ Vector2 slerp(const Vector2 &p_to, real_t p_weight) const;
-	Vector2 cubic_interpolate(const Vector2 &p_b, const Vector2 &p_pre_a, const Vector2 &p_post_b, real_t p_weight) const;
+	_FORCE_INLINE_ Vector2 lerp(const Vector2 &p_to, const real_t p_weight) const;
+	_FORCE_INLINE_ Vector2 slerp(const Vector2 &p_to, const real_t p_weight) const;
+	Vector2 cubic_interpolate(const Vector2 &p_b, const Vector2 &p_pre_a, const Vector2 &p_post_b, const real_t p_weight) const;
 	Vector2 move_toward(const Vector2 &p_to, const real_t p_delta) const;
 
 	Vector2 slide(const Vector2 &p_normal) const;
@@ -133,7 +152,7 @@ struct Vector2 {
 		return Vector2(Math::abs(x), Math::abs(y));
 	}
 
-	Vector2 rotated(real_t p_by) const;
+	Vector2 rotated(const real_t p_by) const;
 	Vector2 orthogonal() const {
 		return Vector2(y, -x);
 	}
@@ -143,34 +162,35 @@ struct Vector2 {
 	Vector2 ceil() const;
 	Vector2 round() const;
 	Vector2 snapped(const Vector2 &p_by) const;
+	Vector2 clamp(const Vector2 &p_min, const Vector2 &p_max) const;
 	real_t aspect() const { return width / height; }
 
-	operator String() const { return String::num(x) + ", " + String::num(y); }
+	operator String() const;
 
 	_FORCE_INLINE_ Vector2() {}
-	_FORCE_INLINE_ Vector2(real_t p_x, real_t p_y) {
+	_FORCE_INLINE_ Vector2(const real_t p_x, const real_t p_y) {
 		x = p_x;
 		y = p_y;
 	}
 };
 
-_FORCE_INLINE_ Vector2 Vector2::plane_project(real_t p_d, const Vector2 &p_vec) const {
+_FORCE_INLINE_ Vector2 Vector2::plane_project(const real_t p_d, const Vector2 &p_vec) const {
 	return p_vec - *this * (dot(p_vec) - p_d);
 }
 
-_FORCE_INLINE_ Vector2 operator*(float p_scalar, const Vector2 &p_vec) {
+_FORCE_INLINE_ Vector2 operator*(const float p_scalar, const Vector2 &p_vec) {
 	return p_vec * p_scalar;
 }
 
-_FORCE_INLINE_ Vector2 operator*(double p_scalar, const Vector2 &p_vec) {
+_FORCE_INLINE_ Vector2 operator*(const double p_scalar, const Vector2 &p_vec) {
 	return p_vec * p_scalar;
 }
 
-_FORCE_INLINE_ Vector2 operator*(int32_t p_scalar, const Vector2 &p_vec) {
+_FORCE_INLINE_ Vector2 operator*(const int32_t p_scalar, const Vector2 &p_vec) {
 	return p_vec * p_scalar;
 }
 
-_FORCE_INLINE_ Vector2 operator*(int64_t p_scalar, const Vector2 &p_vec) {
+_FORCE_INLINE_ Vector2 operator*(const int64_t p_scalar, const Vector2 &p_vec) {
 	return p_vec * p_scalar;
 }
 
@@ -230,7 +250,7 @@ _FORCE_INLINE_ bool Vector2::operator!=(const Vector2 &p_vec2) const {
 	return x != p_vec2.x || y != p_vec2.y;
 }
 
-Vector2 Vector2::lerp(const Vector2 &p_to, real_t p_weight) const {
+Vector2 Vector2::lerp(const Vector2 &p_to, const real_t p_weight) const {
 	Vector2 res = *this;
 
 	res.x += (p_weight * (p_to.x - x));
@@ -239,7 +259,7 @@ Vector2 Vector2::lerp(const Vector2 &p_to, real_t p_weight) const {
 	return res;
 }
 
-Vector2 Vector2::slerp(const Vector2 &p_to, real_t p_weight) const {
+Vector2 Vector2::slerp(const Vector2 &p_to, const real_t p_weight) const {
 #ifdef MATH_CHECKS
 	ERR_FAIL_COND_V_MSG(!is_normalized(), Vector2(), "The start Vector2 must be normalized.");
 #endif
@@ -280,6 +300,14 @@ struct Vector2i {
 		return p_idx ? y : x;
 	}
 
+	_FORCE_INLINE_ int min_axis() const {
+		return x < y ? 0 : 1;
+	}
+
+	_FORCE_INLINE_ int max_axis() const {
+		return x < y ? 1 : 0;
+	}
+
 	Vector2i min(const Vector2i &p_vector2i) const {
 		return Vector2(MIN(x, p_vector2i.x), MIN(y, p_vector2i.y));
 	}
@@ -318,8 +346,9 @@ struct Vector2i {
 	real_t aspect() const { return width / (real_t)height; }
 	Vector2i sign() const { return Vector2i(SGN(x), SGN(y)); }
 	Vector2i abs() const { return Vector2i(ABS(x), ABS(y)); }
+	Vector2i clamp(const Vector2i &p_min, const Vector2i &p_max) const;
 
-	operator String() const { return String::num(x) + ", " + String::num(y); }
+	operator String() const;
 
 	operator Vector2() const { return Vector2(x, y); }
 
@@ -328,7 +357,7 @@ struct Vector2i {
 		x = (int32_t)p_vec2.x;
 		y = (int32_t)p_vec2.y;
 	}
-	inline Vector2i(int32_t p_x, int32_t p_y) {
+	inline Vector2i(const int32_t p_x, const int32_t p_y) {
 		x = p_x;
 		y = p_y;
 	}
diff --git a/core/math/vector3.cpp b/core/math/vector3.cpp
index f0629d3db8..401c3ccd9c 100644
--- a/core/math/vector3.cpp
+++ b/core/math/vector3.cpp
@@ -32,47 +32,57 @@
 
 #include "core/math/basis.h"
 
-void Vector3::rotate(const Vector3 &p_axis, real_t p_phi) {
+void Vector3::rotate(const Vector3 &p_axis, const real_t p_phi) {
 	*this = Basis(p_axis, p_phi).xform(*this);
 }
 
-Vector3 Vector3::rotated(const Vector3 &p_axis, real_t p_phi) const {
+Vector3 Vector3::rotated(const Vector3 &p_axis, const real_t p_phi) const {
 	Vector3 r = *this;
 	r.rotate(p_axis, p_phi);
 	return r;
 }
 
-void Vector3::set_axis(int p_axis, real_t p_value) {
+void Vector3::set_axis(const int p_axis, const real_t p_value) {
 	ERR_FAIL_INDEX(p_axis, 3);
 	coord[p_axis] = p_value;
 }
 
-real_t Vector3::get_axis(int p_axis) const {
+real_t Vector3::get_axis(const int p_axis) const {
 	ERR_FAIL_INDEX_V(p_axis, 3, 0);
 	return operator[](p_axis);
 }
 
-int Vector3::min_axis() const {
-	return x < y ? (x < z ? 0 : 2) : (y < z ? 1 : 2);
+Vector3 Vector3::clamp(const Vector3 &p_min, const Vector3 &p_max) const {
+	return Vector3(
+			CLAMP(x, p_min.x, p_max.x),
+			CLAMP(y, p_min.y, p_max.y),
+			CLAMP(z, p_min.z, p_max.z));
 }
 
-int Vector3::max_axis() const {
-	return x < y ? (y < z ? 2 : 1) : (x < z ? 2 : 0);
-}
-
-void Vector3::snap(Vector3 p_step) {
+void Vector3::snap(const Vector3 p_step) {
 	x = Math::snapped(x, p_step.x);
 	y = Math::snapped(y, p_step.y);
 	z = Math::snapped(z, p_step.z);
 }
 
-Vector3 Vector3::snapped(Vector3 p_step) const {
+Vector3 Vector3::snapped(const Vector3 p_step) const {
 	Vector3 v = *this;
 	v.snap(p_step);
 	return v;
 }
 
-Vector3 Vector3::cubic_interpolate(const Vector3 &p_b, const Vector3 &p_pre_a, const Vector3 &p_post_b, real_t p_weight) const {
+Vector3 Vector3::limit_length(const real_t p_len) const {
+	const real_t l = length();
+	Vector3 v = *this;
+	if (l > 0 && p_len < l) {
+		v /= l;
+		v *= p_len;
+	}
+
+	return v;
+}
+
+Vector3 Vector3::cubic_interpolate(const Vector3 &p_b, const Vector3 &p_pre_a, const Vector3 &p_post_b, const real_t p_weight) const {
 	Vector3 p0 = p_pre_a;
 	Vector3 p1 = *this;
 	Vector3 p2 = p_b;
@@ -116,5 +126,5 @@ bool Vector3::is_equal_approx(const Vector3 &p_v) const {
 }
 
 Vector3::operator String() const {
-	return (rtos(x) + ", " + rtos(y) + ", " + rtos(z));
+	return "(" + String::num_real(x, false) + ", " + String::num_real(y, false) + ", " + String::num_real(z, false) + ")";
 }
diff --git a/core/math/vector3.h b/core/math/vector3.h
index 377581bb45..6a4c42f41b 100644
--- a/core/math/vector3.h
+++ b/core/math/vector3.h
@@ -38,6 +38,8 @@
 class Basis;
 
 struct Vector3 {
+	static const int AXIS_COUNT = 3;
+
 	enum Axis {
 		AXIS_X,
 		AXIS_Y,
@@ -54,19 +56,28 @@ struct Vector3 {
 		real_t coord[3] = { 0 };
 	};
 
-	_FORCE_INLINE_ const real_t &operator[](int p_axis) const {
+	_FORCE_INLINE_ const real_t &operator[](const int p_axis) const {
 		return coord[p_axis];
 	}
 
-	_FORCE_INLINE_ real_t &operator[](int p_axis) {
+	_FORCE_INLINE_ real_t &operator[](const int p_axis) {
 		return coord[p_axis];
 	}
 
-	void set_axis(int p_axis, real_t p_value);
-	real_t get_axis(int p_axis) const;
+	void set_axis(const int p_axis, const real_t p_value);
+	real_t get_axis(const int p_axis) const;
+
+	_FORCE_INLINE_ void set_all(const real_t p_value) {
+		x = y = z = p_value;
+	}
+
+	_FORCE_INLINE_ int min_axis() const {
+		return x < y ? (x < z ? 0 : 2) : (y < z ? 1 : 2);
+	}
 
-	int min_axis() const;
-	int max_axis() const;
+	_FORCE_INLINE_ int max_axis() const {
+		return x < y ? (y < z ? 2 : 1) : (x < z ? 2 : 0);
+	}
 
 	_FORCE_INLINE_ real_t length() const;
 	_FORCE_INLINE_ real_t length_squared() const;
@@ -75,20 +86,21 @@ struct Vector3 {
 	_FORCE_INLINE_ Vector3 normalized() const;
 	_FORCE_INLINE_ bool is_normalized() const;
 	_FORCE_INLINE_ Vector3 inverse() const;
+	Vector3 limit_length(const real_t p_len = 1.0) const;
 
 	_FORCE_INLINE_ void zero();
 
-	void snap(Vector3 p_val);
-	Vector3 snapped(Vector3 p_val) const;
+	void snap(const Vector3 p_val);
+	Vector3 snapped(const Vector3 p_val) const;
 
-	void rotate(const Vector3 &p_axis, real_t p_phi);
-	Vector3 rotated(const Vector3 &p_axis, real_t p_phi) const;
+	void rotate(const Vector3 &p_axis, const real_t p_phi);
+	Vector3 rotated(const Vector3 &p_axis, const real_t p_phi) const;
 
 	/* Static Methods between 2 vector3s */
 
-	_FORCE_INLINE_ Vector3 lerp(const Vector3 &p_to, real_t p_weight) const;
-	_FORCE_INLINE_ Vector3 slerp(const Vector3 &p_to, real_t p_weight) const;
-	Vector3 cubic_interpolate(const Vector3 &p_b, const Vector3 &p_pre_a, const Vector3 &p_post_b, real_t p_weight) const;
+	_FORCE_INLINE_ Vector3 lerp(const Vector3 &p_to, const real_t p_weight) const;
+	_FORCE_INLINE_ Vector3 slerp(const Vector3 &p_to, const real_t p_weight) const;
+	Vector3 cubic_interpolate(const Vector3 &p_b, const Vector3 &p_pre_a, const Vector3 &p_post_b, const real_t p_weight) const;
 	Vector3 move_toward(const Vector3 &p_to, const real_t p_delta) const;
 
 	_FORCE_INLINE_ Vector3 cross(const Vector3 &p_b) const;
@@ -101,6 +113,7 @@ struct Vector3 {
 	_FORCE_INLINE_ Vector3 sign() const;
 	_FORCE_INLINE_ Vector3 ceil() const;
 	_FORCE_INLINE_ Vector3 round() const;
+	Vector3 clamp(const Vector3 &p_min, const Vector3 &p_max) const;
 
 	_FORCE_INLINE_ real_t distance_to(const Vector3 &p_to) const;
 	_FORCE_INLINE_ real_t distance_squared_to(const Vector3 &p_to) const;
@@ -130,10 +143,10 @@ struct Vector3 {
 	_FORCE_INLINE_ Vector3 &operator/=(const Vector3 &p_v);
 	_FORCE_INLINE_ Vector3 operator/(const Vector3 &p_v) const;
 
-	_FORCE_INLINE_ Vector3 &operator*=(real_t p_scalar);
-	_FORCE_INLINE_ Vector3 operator*(real_t p_scalar) const;
-	_FORCE_INLINE_ Vector3 &operator/=(real_t p_scalar);
-	_FORCE_INLINE_ Vector3 operator/(real_t p_scalar) const;
+	_FORCE_INLINE_ Vector3 &operator*=(const real_t p_scalar);
+	_FORCE_INLINE_ Vector3 operator*(const real_t p_scalar) const;
+	_FORCE_INLINE_ Vector3 &operator/=(const real_t p_scalar);
+	_FORCE_INLINE_ Vector3 operator/(const real_t p_scalar) const;
 
 	_FORCE_INLINE_ Vector3 operator-() const;
 
@@ -155,7 +168,7 @@ struct Vector3 {
 		y = p_ivec.y;
 		z = p_ivec.z;
 	}
-	_FORCE_INLINE_ Vector3(real_t p_x, real_t p_y, real_t p_z) {
+	_FORCE_INLINE_ Vector3(const real_t p_x, const real_t p_y, const real_t p_z) {
 		x = p_x;
 		y = p_y;
 		z = p_z;
@@ -195,14 +208,14 @@ Vector3 Vector3::round() const {
 	return Vector3(Math::round(x), Math::round(y), Math::round(z));
 }
 
-Vector3 Vector3::lerp(const Vector3 &p_to, real_t p_weight) const {
+Vector3 Vector3::lerp(const Vector3 &p_to, const real_t p_weight) const {
 	return Vector3(
 			x + (p_weight * (p_to.x - x)),
 			y + (p_weight * (p_to.y - y)),
 			z + (p_weight * (p_to.z - z)));
 }
 
-Vector3 Vector3::slerp(const Vector3 &p_to, real_t p_weight) const {
+Vector3 Vector3::slerp(const Vector3 &p_to, const real_t p_weight) const {
 	real_t theta = angle_to(p_to);
 	return rotated(cross(p_to).normalized(), theta * p_weight);
 }
@@ -290,29 +303,41 @@ Vector3 Vector3::operator/(const Vector3 &p_v) const {
 	return Vector3(x / p_v.x, y / p_v.y, z / p_v.z);
 }
 
-Vector3 &Vector3::operator*=(real_t p_scalar) {
+Vector3 &Vector3::operator*=(const real_t p_scalar) {
 	x *= p_scalar;
 	y *= p_scalar;
 	z *= p_scalar;
 	return *this;
 }
 
-_FORCE_INLINE_ Vector3 operator*(real_t p_scalar, const Vector3 &p_vec) {
+_FORCE_INLINE_ Vector3 operator*(const float p_scalar, const Vector3 &p_vec) {
+	return p_vec * p_scalar;
+}
+
+_FORCE_INLINE_ Vector3 operator*(const double p_scalar, const Vector3 &p_vec) {
+	return p_vec * p_scalar;
+}
+
+_FORCE_INLINE_ Vector3 operator*(const int32_t p_scalar, const Vector3 &p_vec) {
+	return p_vec * p_scalar;
+}
+
+_FORCE_INLINE_ Vector3 operator*(const int64_t p_scalar, const Vector3 &p_vec) {
 	return p_vec * p_scalar;
 }
 
-Vector3 Vector3::operator*(real_t p_scalar) const {
+Vector3 Vector3::operator*(const real_t p_scalar) const {
 	return Vector3(x * p_scalar, y * p_scalar, z * p_scalar);
 }
 
-Vector3 &Vector3::operator/=(real_t p_scalar) {
+Vector3 &Vector3::operator/=(const real_t p_scalar) {
 	x /= p_scalar;
 	y /= p_scalar;
 	z /= p_scalar;
 	return *this;
 }
 
-Vector3 Vector3::operator/(real_t p_scalar) const {
+Vector3 Vector3::operator/(const real_t p_scalar) const {
 	return Vector3(x / p_scalar, y / p_scalar, z / p_scalar);
 }
 
@@ -412,7 +437,7 @@ Vector3 Vector3::normalized() const {
 
 bool Vector3::is_normalized() const {
 	// use length_squared() instead of length() to avoid sqrt(), makes it more stringent.
-	return Math::is_equal_approx(length_squared(), 1.0, UNIT_EPSILON);
+	return Math::is_equal_approx(length_squared(), 1, (real_t)UNIT_EPSILON);
 }
 
 Vector3 Vector3::inverse() const {
diff --git a/core/math/vector3i.cpp b/core/math/vector3i.cpp
index 167fa3221d..d3a57af77c 100644
--- a/core/math/vector3i.cpp
+++ b/core/math/vector3i.cpp
@@ -30,12 +30,12 @@
 
 #include "vector3i.h"
 
-void Vector3i::set_axis(int p_axis, int32_t p_value) {
+void Vector3i::set_axis(const int p_axis, const int32_t p_value) {
 	ERR_FAIL_INDEX(p_axis, 3);
 	coord[p_axis] = p_value;
 }
 
-int32_t Vector3i::get_axis(int p_axis) const {
+int32_t Vector3i::get_axis(const int p_axis) const {
 	ERR_FAIL_INDEX_V(p_axis, 3, 0);
 	return operator[](p_axis);
 }
@@ -48,6 +48,13 @@ int Vector3i::max_axis() const {
 	return x < y ? (y < z ? 2 : 1) : (x < z ? 2 : 0);
 }
 
+Vector3i Vector3i::clamp(const Vector3i &p_min, const Vector3i &p_max) const {
+	return Vector3i(
+			CLAMP(x, p_min.x, p_max.x),
+			CLAMP(y, p_min.y, p_max.y),
+			CLAMP(z, p_min.z, p_max.z));
+}
+
 Vector3i::operator String() const {
-	return (itos(x) + ", " + itos(y) + ", " + itos(z));
+	return "(" + itos(x) + ", " + itos(y) + ", " + itos(z) + ")";
 }
diff --git a/core/math/vector3i.h b/core/math/vector3i.h
index b0411fb62e..9308d09045 100644
--- a/core/math/vector3i.h
+++ b/core/math/vector3i.h
@@ -51,16 +51,16 @@ struct Vector3i {
 		int32_t coord[3] = { 0 };
 	};
 
-	_FORCE_INLINE_ const int32_t &operator[](int p_axis) const {
+	_FORCE_INLINE_ const int32_t &operator[](const int p_axis) const {
 		return coord[p_axis];
 	}
 
-	_FORCE_INLINE_ int32_t &operator[](int p_axis) {
+	_FORCE_INLINE_ int32_t &operator[](const int p_axis) {
 		return coord[p_axis];
 	}
 
-	void set_axis(int p_axis, int32_t p_value);
-	int32_t get_axis(int p_axis) const;
+	void set_axis(const int p_axis, const int32_t p_value);
+	int32_t get_axis(const int p_axis) const;
 
 	int min_axis() const;
 	int max_axis() const;
@@ -69,6 +69,7 @@ struct Vector3i {
 
 	_FORCE_INLINE_ Vector3i abs() const;
 	_FORCE_INLINE_ Vector3i sign() const;
+	Vector3i clamp(const Vector3i &p_min, const Vector3i &p_max) const;
 
 	/* Operators */
 
@@ -83,12 +84,12 @@ struct Vector3i {
 	_FORCE_INLINE_ Vector3i &operator%=(const Vector3i &p_v);
 	_FORCE_INLINE_ Vector3i operator%(const Vector3i &p_v) const;
 
-	_FORCE_INLINE_ Vector3i &operator*=(int32_t p_scalar);
-	_FORCE_INLINE_ Vector3i operator*(int32_t p_scalar) const;
-	_FORCE_INLINE_ Vector3i &operator/=(int32_t p_scalar);
-	_FORCE_INLINE_ Vector3i operator/(int32_t p_scalar) const;
-	_FORCE_INLINE_ Vector3i &operator%=(int32_t p_scalar);
-	_FORCE_INLINE_ Vector3i operator%(int32_t p_scalar) const;
+	_FORCE_INLINE_ Vector3i &operator*=(const int32_t p_scalar);
+	_FORCE_INLINE_ Vector3i operator*(const int32_t p_scalar) const;
+	_FORCE_INLINE_ Vector3i &operator/=(const int32_t p_scalar);
+	_FORCE_INLINE_ Vector3i operator/(const int32_t p_scalar) const;
+	_FORCE_INLINE_ Vector3i &operator%=(const int32_t p_scalar);
+	_FORCE_INLINE_ Vector3i operator%(const int32_t p_scalar) const;
 
 	_FORCE_INLINE_ Vector3i operator-() const;
 
@@ -102,7 +103,7 @@ struct Vector3i {
 	operator String() const;
 
 	_FORCE_INLINE_ Vector3i() {}
-	_FORCE_INLINE_ Vector3i(int32_t p_x, int32_t p_y, int32_t p_z) {
+	_FORCE_INLINE_ Vector3i(const int32_t p_x, const int32_t p_y, const int32_t p_z) {
 		x = p_x;
 		y = p_y;
 		z = p_z;
@@ -174,40 +175,52 @@ Vector3i Vector3i::operator%(const Vector3i &p_v) const {
 	return Vector3i(x % p_v.x, y % p_v.y, z % p_v.z);
 }
 
-Vector3i &Vector3i::operator*=(int32_t p_scalar) {
+Vector3i &Vector3i::operator*=(const int32_t p_scalar) {
 	x *= p_scalar;
 	y *= p_scalar;
 	z *= p_scalar;
 	return *this;
 }
 
-_FORCE_INLINE_ Vector3i operator*(int32_t p_scalar, const Vector3i &p_vec) {
-	return p_vec * p_scalar;
+_FORCE_INLINE_ Vector3i operator*(const int32_t p_scalar, const Vector3i &p_vector) {
+	return p_vector * p_scalar;
 }
 
-Vector3i Vector3i::operator*(int32_t p_scalar) const {
+_FORCE_INLINE_ Vector3i operator*(const int64_t p_scalar, const Vector3i &p_vector) {
+	return p_vector * p_scalar;
+}
+
+_FORCE_INLINE_ Vector3i operator*(const float p_scalar, const Vector3i &p_vector) {
+	return p_vector * p_scalar;
+}
+
+_FORCE_INLINE_ Vector3i operator*(const double p_scalar, const Vector3i &p_vector) {
+	return p_vector * p_scalar;
+}
+
+Vector3i Vector3i::operator*(const int32_t p_scalar) const {
 	return Vector3i(x * p_scalar, y * p_scalar, z * p_scalar);
 }
 
-Vector3i &Vector3i::operator/=(int32_t p_scalar) {
+Vector3i &Vector3i::operator/=(const int32_t p_scalar) {
 	x /= p_scalar;
 	y /= p_scalar;
 	z /= p_scalar;
 	return *this;
 }
 
-Vector3i Vector3i::operator/(int32_t p_scalar) const {
+Vector3i Vector3i::operator/(const int32_t p_scalar) const {
 	return Vector3i(x / p_scalar, y / p_scalar, z / p_scalar);
 }
 
-Vector3i &Vector3i::operator%=(int32_t p_scalar) {
+Vector3i &Vector3i::operator%=(const int32_t p_scalar) {
 	x %= p_scalar;
 	y %= p_scalar;
 	z %= p_scalar;
 	return *this;
 }
 
-Vector3i Vector3i::operator%(int32_t p_scalar) const {
+Vector3i Vector3i::operator%(const int32_t p_scalar) const {
 	return Vector3i(x % p_scalar, y % p_scalar, z % p_scalar);
 }