Improved uniformity of RandomPCG::randf.

When generating single precision floats, Godot casts a uint32_t to float, causing uniformity loss. This new randf, inspired by T. R. Campbell's random_real, samples the output of rand as the fraction part of an infinite binary number, with some tricks to reduce ops and branching. This method provides "good enough" uniformity at decent speed, for floats greater than 2^-64. Smaller numbers are floored to 0.
author: toasteater <48371905+toasteater@users.noreply.github.com> 2019-03-18 11:41:16 +0800
committer: toasteater <48371905+toasteater@users.noreply.github.com> 2019-03-19 18:14:58 +0800
commit: 5f1b9a23132408160cf0b99e1e162413ab0e382b (patch)
tree: 631b214908b1af14f18872555d0cbe83886c9c43
parent: df7d3708c5b535c3696943322a14ec19a175e30c (diff)
2 files changed, 62 insertions, 8 deletions
diff --git a/core/math/random_pcg.cpp b/core/math/random_pcg.cpp
index 45467b32b2..b82b24ac4d 100644
--- a/core/math/random_pcg.cpp
+++ b/core/math/random_pcg.cpp
@@ -44,13 +44,9 @@ void RandomPCG::randomize() {
 }
 
 double RandomPCG::random(double p_from, double p_to) {
-	unsigned int r = rand();
-	double ret = (double)r / (double)RANDOM_MAX;
-	return (ret) * (p_to - p_from) + p_from;
+	return randd() * (p_to - p_from) + p_from;
 }
 
 float RandomPCG::random(float p_from, float p_to) {
-	unsigned int r = rand();
-	float ret = (float)r / (float)RANDOM_MAX;
-	return (ret) * (p_to - p_from) + p_from;
+	return randf() * (p_to - p_from) + p_from;
 }
diff --git a/core/math/random_pcg.h b/core/math/random_pcg.h
index 230eb9a11b..e48b1b9551 100644
--- a/core/math/random_pcg.h
+++ b/core/math/random_pcg.h
@@ -35,6 +35,28 @@
 
 #include "thirdparty/misc/pcg.h"
 
+#if defined(__GNUC__) || (_llvm_has_builtin(__builtin_clz))
+#define CLZ32(x) __builtin_clz(x)
+#elif defined(_MSC_VER)
+#include "intrin.h"
+static int __bsr_clz32(uint32_t x) {
+	unsigned long index;
+	_BitScanReverse(&index, x);
+	return 31 - index;
+}
+#define CLZ32(x) __bsr_clz32(x)
+#else
+#endif
+
+#if defined(__GNUC__) || (_llvm_has_builtin(__builtin_ldexp) && _llvm_has_builtin(__builtin_ldexpf))
+#define LDEXP(s, e) __builtin_ldexp(s, e)
+#define LDEXPF(s, e) __builtin_ldexpf(s, e)
+#else
+#include "math.h"
+#define LDEXP(s, e) ldexp(s, e)
+#define LDEXPF(s, e) ldexp(s, e)
+#endif
+
 class RandomPCG {
 	pcg32_random_t pcg;
 	uint64_t current_seed; // seed with this to get the same state
@@ -58,8 +80,44 @@ public:
 		current_seed = pcg.state;
 		return pcg32_random_r(&pcg);
 	}
-	_FORCE_INLINE_ double randd() { return (double)rand() / (double)RANDOM_MAX; }
-	_FORCE_INLINE_ float randf() { return (float)rand() / (float)RANDOM_MAX; }
+
+	// Obtaining floating point numbers in [0, 1] range with "good enough" uniformity.
+	// These functions sample the output of rand() as the fraction part of an infinite binary number,
+	// with some tricks applied to reduce ops and branching:
+	// 1. Instead of shifting to the first 1 and connecting random bits, we simply set the MSB and LSB to 1.
+	//    Provided that the RNG is actually uniform bit by bit, this should have the exact same effect.
+	// 2. In order to compensate for exponent info loss, we count zeros from another random number,
+	//    and just add that to the initial offset.
+	//    This has the same probability as counting and shifting an actual bit stream: 2^-n for n zeroes.
+	// For all numbers above 2^-96 (2^-64 for floats), the functions should be uniform.
+	// However, all numbers below that threshold are floored to 0.
+	// The thresholds are chosen to minimize rand() calls while keeping the numbers within a totally subjective quality standard.
+	// If clz or ldexp isn't available, fall back to bit truncation for performance, sacrificing uniformity.
+	_FORCE_INLINE_ double randd() {
+#if defined(CLZ32)
+		uint32_t proto_exp_offset = rand();
+		if (unlikely(proto_exp_offset == 0)) {
+			return 0;
+		}
+		uint64_t significand = (((uint64_t)rand()) << 32) | rand() | 0x8000000000000001U;
+		return LDEXP((double)significand, -64 - CLZ32(proto_exp_offset));
+#else
+#pragma message("RandomPCG::randd - intrinsic clz is not available, falling back to bit truncation")
+		return (double)(((((uint64_t)rand()) << 32) | rand()) & 0x1FFFFFFFFFFFFFU) / (double)0x1FFFFFFFFFFFFFU;
+#endif
+	}
+	_FORCE_INLINE_ float randf() {
+#if defined(CLZ32)
+		uint32_t proto_exp_offset = rand();
+		if (unlikely(proto_exp_offset == 0)) {
+			return 0;
+		}
+		return LDEXPF((float)(rand() | 0x80000001), -32 - CLZ32(proto_exp_offset));
+#else
+#pragma message("RandomPCG::randf - intrinsic clz is not available, falling back to bit truncation")
+		return (float)(rand() & 0xFFFFFF) / (float)0xFFFFFF;
+#endif
+	}
 
 	double random(double p_from, double p_to);
 	float random(float p_from, float p_to);
author	toasteater <48371905+toasteater@users.noreply.github.com>	2019-03-18 11:41:16 +0800
committer	toasteater <48371905+toasteater@users.noreply.github.com>	2019-03-19 18:14:58 +0800
commit	5f1b9a23132408160cf0b99e1e162413ab0e382b (patch)
tree	631b214908b1af14f18872555d0cbe83886c9c43
parent	df7d3708c5b535c3696943322a14ec19a175e30c (diff)