summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHein-Pieter van Braam <hp@tmm.cx>2017-12-20 12:04:52 +0100
committerHein-Pieter van Braam <hp@tmm.cx>2017-12-20 12:09:09 +0100
commitccbb5923ac12f38b9bc47448a45340dd0534eabc (patch)
tree252133f1971dfa15fe9219df7f883d42b8838e67
parentb08735f20987a6188fd54c5e2dcd072dc316b413 (diff)
Fix lightmapper rng
In our previous attempts to fix the lightmapper we may have inadvertently introduced the same issue we were trying to fix. It appears that rand() will on some platforms introduce a mutex making it slower and on others may have a per-thread state that would need to be initialized with srand() on each thread. This slows down the lightbaking further. This sets up a separate rng state for each OpenMP thread by calling rand() only in the single-threaded part of the code. We then keep a vector of states. I believe this solves our problems.
-rw-r--r--scene/3d/voxel_light_baker.cpp51
-rw-r--r--scene/3d/voxel_light_baker.h2
2 files changed, 36 insertions, 17 deletions
diff --git a/scene/3d/voxel_light_baker.cpp b/scene/3d/voxel_light_baker.cpp
index bc3f62cfd1..4c52d0bb1b 100644
--- a/scene/3d/voxel_light_baker.cpp
+++ b/scene/3d/voxel_light_baker.cpp
@@ -32,6 +32,10 @@
#include "os/os.h"
#include <stdlib.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
#define FINDMINMAX(x0, x1, x2, min, max) \
min = max = x0; \
if (x1 < min) min = x1; \
@@ -1675,19 +1679,17 @@ Vector3 VoxelLightBaker::_compute_pixel_light_at_pos(const Vector3 &p_pos, const
return accum;
}
-uint32_t xorshiftstate[] = { 123 }; // anything non-zero will do here
-
-_ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *seed) {
+_ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
/* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */
- uint32_t x = *seed;
+ uint32_t x = *state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
- *seed = x;
+ *state = x;
return x;
}
-Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
+Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) {
int samples_per_quality[3] = { 48, 128, 512 };
@@ -1709,16 +1711,11 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
const Light *light = bake_light.ptr();
const Cell *cells = bake_cells.ptr();
- uint32_t seed = 0;
- while (seed == 0) {
- seed = rand(); //system rand is thread safe, do not replace by Math:: random.
- }
-
for (int i = 0; i < samples; i++) {
- float random_angle1 = (((xorshift32(&seed) % 65535) / 65535.0) * 2.0 - 1.0) * spread;
+ float random_angle1 = (((xorshift32(rng_state) % 65535) / 65535.0) * 2.0 - 1.0) * spread;
Vector3 axis(0, sin(random_angle1), cos(random_angle1));
- float random_angle2 = ((xorshift32(&seed) % 65535) / 65535.0) * Math_PI * 2.0;
+ float random_angle2 = ((xorshift32(rng_state) % 65535) / 65535.0) * Math_PI * 2.0;
Basis rot(Vector3(0, 0, 1), random_angle2);
axis = rot.xform(axis);
@@ -1852,21 +1849,43 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
_plot_triangle(uv, vertex, normal, lightmap.ptrw(), width, height);
}
}
- //step 3 perform voxel cone trace on lightmap pixels
+ //step 3 perform voxel cone trace on lightmap pixels
{
LightMap *lightmap_ptr = lightmap.ptrw();
uint64_t begin_time = OS::get_singleton()->get_ticks_usec();
volatile int lines = 0;
+ // make sure our OS-level rng is seeded
+ srand(OS::get_singleton()->get_ticks_usec());
+
+ // setup an RNG state for each OpenMP thread
+ uint32_t threadcount = 1;
+ uint32_t threadnum = 0;
+#ifdef _OPENMP
+ threadcount = omp_get_max_threads();
+#endif
+ Vector<uint32_t> rng_states;
+ rng_states.resize(threadcount);
+ for (uint32_t i = 0; i < threadcount; i++) {
+ do {
+ rng_states[i] = rand();
+ } while (rng_states[i] == 0);
+ }
+ uint32_t *rng_states_p = rng_states.ptrw();
+
for (int i = 0; i < height; i++) {
//print_line("bake line " + itos(i) + " / " + itos(height));
#ifdef _OPENMP
-#pragma omp parallel for schedule(dynamic, 1)
+#pragma omp parallel for schedule(dynamic, 1) private(threadnum)
#endif
for (int j = 0; j < width; j++) {
+#ifdef _OPENMP
+ threadnum = omp_get_thread_num();
+#endif
+
//if (i == 125 && j == 280) {
LightMap *pixel = &lightmap_ptr[i * width + j];
@@ -1879,7 +1898,7 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
} break;
case BAKE_MODE_RAY_TRACE: {
- pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
+ pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy;
} break;
// pixel->light = Vector3(1, 1, 1);
//}
diff --git a/scene/3d/voxel_light_baker.h b/scene/3d/voxel_light_baker.h
index d6ba414a46..7db31f8a67 100644
--- a/scene/3d/voxel_light_baker.h
+++ b/scene/3d/voxel_light_baker.h
@@ -148,7 +148,7 @@ private:
_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
- _FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
+ _FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state);
public:
void begin_bake(int p_subdiv, const AABB &p_bounds);