4 files changed, 67 insertions, 10 deletions
diff --git a/core/math/random_number_generator.h b/core/math/random_number_generator.h
index 6b6bcdd2cd..a6182a4b33 100644
--- a/core/math/random_number_generator.h
+++ b/core/math/random_number_generator.h
@@ -59,7 +59,10 @@ public:
 
 	_FORCE_INLINE_ int randi_range(int from, int to) {
 		unsigned int ret = randbase.rand();
-		return ret % (to - from + 1) + from;
+		if (to < from)
+			return ret % (from - to + 1) + to;
+		else
+			return ret % (to - from + 1) + from;
 	}
 
 	RandomNumberGenerator();
diff --git a/core/math/random_pcg.cpp b/core/math/random_pcg.cpp
index 8351bd138e..00c0af515d 100644
--- a/core/math/random_pcg.cpp
+++ b/core/math/random_pcg.cpp
@@ -43,13 +43,9 @@ void RandomPCG::randomize() {
 }
 
 double RandomPCG::random(double p_from, double p_to) {
-	unsigned int r = rand();
-	double ret = (double)r / (double)RANDOM_MAX;
-	return (ret) * (p_to - p_from) + p_from;
+	return randd() * (p_to - p_from) + p_from;
 }
 
 float RandomPCG::random(float p_from, float p_to) {
-	unsigned int r = rand();
-	float ret = (float)r / (float)RANDOM_MAX;
-	return (ret) * (p_to - p_from) + p_from;
+	return randf() * (p_to - p_from) + p_from;
 }
diff --git a/core/math/random_pcg.h b/core/math/random_pcg.h
index 0d1b311c0d..aa25914638 100644
--- a/core/math/random_pcg.h
+++ b/core/math/random_pcg.h
@@ -37,6 +37,28 @@
 
 #include "thirdparty/misc/pcg.h"
 
+#if defined(__GNUC__) || (_llvm_has_builtin(__builtin_clz))
+#define CLZ32(x) __builtin_clz(x)
+#elif defined(_MSC_VER)
+#include "intrin.h"
+static int __bsr_clz32(uint32_t x) {
+	unsigned long index;
+	_BitScanReverse(&index, x);
+	return 31 - index;
+}
+#define CLZ32(x) __bsr_clz32(x)
+#else
+#endif
+
+#if defined(__GNUC__) || (_llvm_has_builtin(__builtin_ldexp) && _llvm_has_builtin(__builtin_ldexpf))
+#define LDEXP(s, e) __builtin_ldexp(s, e)
+#define LDEXPF(s, e) __builtin_ldexpf(s, e)
+#else
+#include "math.h"
+#define LDEXP(s, e) ldexp(s, e)
+#define LDEXPF(s, e) ldexp(s, e)
+#endif
+
 class RandomPCG {
 	pcg32_random_t pcg;
 	uint64_t current_seed; // seed with this to get the same state
@@ -60,8 +82,44 @@ public:
 		current_seed = pcg.state;
 		return pcg32_random_r(&pcg);
 	}
-	_FORCE_INLINE_ double randd() { return (double)rand() / (double)RANDOM_MAX; }
-	_FORCE_INLINE_ float randf() { return (float)rand() / (float)RANDOM_MAX; }
+
+	// Obtaining floating point numbers in [0, 1] range with "good enough" uniformity.
+	// These functions sample the output of rand() as the fraction part of an infinite binary number,
+	// with some tricks applied to reduce ops and branching:
+	// 1. Instead of shifting to the first 1 and connecting random bits, we simply set the MSB and LSB to 1.
+	//    Provided that the RNG is actually uniform bit by bit, this should have the exact same effect.
+	// 2. In order to compensate for exponent info loss, we count zeros from another random number,
+	//    and just add that to the initial offset.
+	//    This has the same probability as counting and shifting an actual bit stream: 2^-n for n zeroes.
+	// For all numbers above 2^-96 (2^-64 for floats), the functions should be uniform.
+	// However, all numbers below that threshold are floored to 0.
+	// The thresholds are chosen to minimize rand() calls while keeping the numbers within a totally subjective quality standard.
+	// If clz or ldexp isn't available, fall back to bit truncation for performance, sacrificing uniformity.
+	_FORCE_INLINE_ double randd() {
+#if defined(CLZ32)
+		uint32_t proto_exp_offset = rand();
+		if (unlikely(proto_exp_offset == 0)) {
+			return 0;
+		}
+		uint64_t significand = (((uint64_t)rand()) << 32) | rand() | 0x8000000000000001U;
+		return LDEXP((double)significand, -64 - CLZ32(proto_exp_offset));
+#else
+#pragma message("RandomPCG::randd - intrinsic clz is not available, falling back to bit truncation")
+		return (double)(((((uint64_t)rand()) << 32) | rand()) & 0x1FFFFFFFFFFFFFU) / (double)0x1FFFFFFFFFFFFFU;
+#endif
+	}
+	_FORCE_INLINE_ float randf() {
+#if defined(CLZ32)
+		uint32_t proto_exp_offset = rand();
+		if (unlikely(proto_exp_offset == 0)) {
+			return 0;
+		}
+		return LDEXPF((float)(rand() | 0x80000001), -32 - CLZ32(proto_exp_offset));
+#else
+#pragma message("RandomPCG::randf - intrinsic clz is not available, falling back to bit truncation")
+		return (float)(rand() & 0xFFFFFF) / (float)0xFFFFFF;
+#endif
+	}
 
 	_FORCE_INLINE_ double randfn(double p_mean, double p_deviation) {
 		return p_mean + p_deviation * (cos(Math_TAU * randd()) * sqrt(-2.0 * log(randd()))); // Box-Muller transform
diff --git a/core/math/vector3.h b/core/math/vector3.h
index 6423147282..811a207138 100644
--- a/core/math/vector3.h
+++ b/core/math/vector3.h
@@ -224,7 +224,7 @@ Vector3 Vector3::slerp(const Vector3 &p_b, real_t p_t) const {
 #endif
 
 	real_t theta = angle_to(p_b);
-	return rotated(cross(p_b), theta * p_t);
+	return rotated(cross(p_b).normalized(), theta * p_t);
 }
 
 real_t Vector3::distance_to(const Vector3 &p_b) const {