summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--SConstruct1
-rw-r--r--core/os/threaded_array_processor.cpp2
-rw-r--r--core/os/threaded_array_processor.h80
-rw-r--r--modules/opus/config.py7
-rw-r--r--platform/osx/detect.py3
-rw-r--r--platform/windows/detect.py5
-rw-r--r--platform/x11/detect.py4
-rw-r--r--scene/3d/baked_lightmap.cpp4
-rw-r--r--scene/3d/voxel_light_baker.cpp76
-rw-r--r--scene/3d/voxel_light_baker.h5
10 files changed, 119 insertions, 68 deletions
diff --git a/SConstruct b/SConstruct
index dbce94f296..88b29695cb 100644
--- a/SConstruct
+++ b/SConstruct
@@ -168,7 +168,6 @@ opts.Add(BoolVariable('vsproj', "Generate Visual Studio Project.", False))
opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no')))
opts.Add(BoolVariable('progress', "Show a progress indicator during build", True))
opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False))
-opts.Add(BoolVariable('openmp', "If yes, enable OpenMP", True))
opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel')))
# Thirdparty libraries
diff --git a/core/os/threaded_array_processor.cpp b/core/os/threaded_array_processor.cpp
new file mode 100644
index 0000000000..8e92508ea5
--- /dev/null
+++ b/core/os/threaded_array_processor.cpp
@@ -0,0 +1,2 @@
+#include "threaded_array_processor.h"
+
diff --git a/core/os/threaded_array_processor.h b/core/os/threaded_array_processor.h
new file mode 100644
index 0000000000..e584fbb193
--- /dev/null
+++ b/core/os/threaded_array_processor.h
@@ -0,0 +1,80 @@
+#ifndef THREADED_ARRAY_PROCESSOR_H
+#define THREADED_ARRAY_PROCESSOR_H
+
+#include "os/mutex.h"
+#include "os/os.h"
+#include "os/thread.h"
+#include "safe_refcount.h"
+#include "thread_safe.h"
+
+template <class C, class U>
+struct ThreadArrayProcessData {
+ uint32_t elements;
+ uint32_t index;
+ C *instance;
+ U userdata;
+ void (C::*method)(uint32_t, U);
+
+ void process(uint32_t p_index) {
+ (instance->*method)(p_index, userdata);
+ }
+};
+
+#ifndef NO_THREADS
+
+template <class T>
+void process_array_thread(void *ud) {
+
+ T &data = *(T *)ud;
+ while (true) {
+ uint32_t index = atomic_increment(&data.index);
+ if (index >= data.elements)
+ break;
+ data.process(index);
+ }
+}
+
+template <class C, class M, class U>
+void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
+
+ ThreadArrayProcessData<C, U> data;
+ data.method = p_method;
+ data.instance = p_instance;
+ data.userdata = p_userdata;
+ data.index = 0;
+ data.elements = p_elements;
+ data.process(data.index); //process first, let threads increment for next
+
+ Vector<Thread *> threads;
+
+ threads.resize(OS::get_singleton()->get_processor_count());
+
+ for (int i = 0; i < threads.size(); i++) {
+ threads[i] = Thread::create(process_array_thread<ThreadArrayProcessData<C, U> >, &data);
+ }
+
+ for (int i = 0; i < threads.size(); i++) {
+ Thread::wait_to_finish(threads[i]);
+ memdelete(threads[i]);
+ }
+}
+
+#else
+
+template <class C, class M, class U>
+void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
+
+ ThreadArrayProcessData<C, U> data;
+ data.method = p_method;
+ data.instance = p_instance;
+ data.userdata = p_userdata;
+ data.index = 0;
+ data.elements = p_elements;
+ for (uint32_t i = 0; i < p_elements; i++) {
+ data.process(i);
+ }
+}
+
+#endif
+
+#endif // THREADED_ARRAY_PROCESSOR_H
diff --git a/modules/opus/config.py b/modules/opus/config.py
index 60f8d838d6..0ee1b1b7b4 100644
--- a/modules/opus/config.py
+++ b/modules/opus/config.py
@@ -1,5 +1,10 @@
def can_build(platform):
- return True
+ # Sorry guys, do not enable this unless you can figure out a way
+ # to get Opus to not do any memory allocation or system calls
+ # in the audio thread.
+ # Currently the implementation even reads files from the audio thread,
+ # and this is not how audio programming works.
+ return False
def configure(env):
pass
diff --git a/platform/osx/detect.py b/platform/osx/detect.py
index 5b04ab8826..2e686fbee4 100644
--- a/platform/osx/detect.py
+++ b/platform/osx/detect.py
@@ -82,9 +82,6 @@ def configure(env):
env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib"
env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as"
env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define
- if env['tools'] and env['openmp']:
- env.Append(CPPFLAGS=['-fopenmp'])
- env.Append(LINKFLAGS=['-fopenmp'])
else: # osxcross build
root = os.environ.get("OSXCROSS_ROOT", 0)
diff --git a/platform/windows/detect.py b/platform/windows/detect.py
index e216868bd8..3b8de2caf4 100644
--- a/platform/windows/detect.py
+++ b/platform/windows/detect.py
@@ -191,8 +191,6 @@ def configure(env):
if (env["use_lto"]):
env.Append(CCFLAGS=['/GL'])
env.Append(LINKFLAGS=['/LTCG'])
- if env['tools'] and env['openmp']:
- env.Append(CPPFLAGS=['/openmp'])
env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")])
env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")])
@@ -270,9 +268,6 @@ def configure(env):
env.Append(CCFLAGS=['-flto'])
env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))])
- if env['tools'] and env['openmp']:
- env.Append(CPPFLAGS=['-fopenmp'])
- env.Append(LINKFLAGS=['-fopenmp'])
## Compile flags
diff --git a/platform/x11/detect.py b/platform/x11/detect.py
index 98ae9a8658..cb45fed1be 100644
--- a/platform/x11/detect.py
+++ b/platform/x11/detect.py
@@ -265,9 +265,5 @@ def configure(env):
env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu'])
- if env['tools'] and env['openmp']:
- env.Append(CPPFLAGS=['-fopenmp'])
- env.Append(LINKFLAGS=['-fopenmp'])
-
if env['use_static_cpp']:
env.Append(LINKFLAGS=['-static-libstdc++'])
diff --git a/scene/3d/baked_lightmap.cpp b/scene/3d/baked_lightmap.cpp
index 9a77626296..8c282a31b8 100644
--- a/scene/3d/baked_lightmap.cpp
+++ b/scene/3d/baked_lightmap.cpp
@@ -772,8 +772,8 @@ void BakedLightmap::_bind_methods() {
BakedLightmap::BakedLightmap() {
extents = Vector3(10, 10, 10);
- bake_cell_size = 0.1;
- capture_cell_size = 0.25;
+ bake_cell_size = 0.25;
+ capture_cell_size = 0.5;
bake_quality = BAKE_QUALITY_MEDIUM;
bake_mode = BAKE_MODE_CONE_TRACE;
diff --git a/scene/3d/voxel_light_baker.cpp b/scene/3d/voxel_light_baker.cpp
index bf0f801e32..17aa649dff 100644
--- a/scene/3d/voxel_light_baker.cpp
+++ b/scene/3d/voxel_light_baker.cpp
@@ -30,11 +30,9 @@
#include "voxel_light_baker.h"
#include "os/os.h"
+#include "os/threaded_array_processor.h"
#include <stdlib.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
#define FINDMINMAX(x0, x1, x2, min, max) \
min = max = x0; \
@@ -1689,7 +1687,7 @@ _ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
return x;
}
-Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) {
+Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
int samples_per_quality[3] = { 48, 128, 512 };
@@ -1711,8 +1709,7 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
const Light *light = bake_light.ptr();
const Cell *cells = bake_cells.ptr();
- // Prevent false sharing when running on OpenMP
- uint32_t local_rng_state = *rng_state;
+ uint32_t local_rng_state = rand(); //needs to be fixed again
for (int i = 0; i < samples; i++) {
@@ -1796,10 +1793,30 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
}
// Make sure we don't reset this thread's RNG state
- *rng_state = local_rng_state;
+
return accum / samples;
}
+void VoxelLightBaker::_lightmap_bake_point(uint32_t p_x, LightMap *p_line) {
+
+
+ LightMap *pixel = &p_line[p_x];
+ if (pixel->pos == Vector3())
+ return;
+ //print_line("pos: " + pixel->pos + " normal " + pixel->normal);
+ switch (bake_mode) {
+ case BAKE_MODE_CONE_TRACE: {
+ pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
+ } break;
+ case BAKE_MODE_RAY_TRACE: {
+ pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
+ } break;
+ // pixel->light = Vector3(1, 1, 1);
+ //}
+ }
+
+}
+
Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) {
//transfer light information to a lightmap
@@ -1862,53 +1879,10 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
volatile int lines = 0;
// make sure our OS-level rng is seeded
- srand(OS::get_singleton()->get_ticks_usec());
-
- // setup an RNG state for each OpenMP thread
- uint32_t threadcount = 1;
- uint32_t threadnum = 0;
-#ifdef _OPENMP
- threadcount = omp_get_max_threads();
-#endif
- Vector<uint32_t> rng_states;
- rng_states.resize(threadcount);
- for (uint32_t i = 0; i < threadcount; i++) {
- do {
- rng_states[i] = rand();
- } while (rng_states[i] == 0);
- }
- uint32_t *rng_states_p = rng_states.ptrw();
for (int i = 0; i < height; i++) {
- //print_line("bake line " + itos(i) + " / " + itos(height));
-#ifdef _OPENMP
-#pragma omp parallel for schedule(dynamic, 1) private(threadnum)
-#endif
- for (int j = 0; j < width; j++) {
-
-#ifdef _OPENMP
- threadnum = omp_get_thread_num();
-#endif
-
- //if (i == 125 && j == 280) {
-
- LightMap *pixel = &lightmap_ptr[i * width + j];
- if (pixel->pos == Vector3())
- continue; //unused, skipe
-
- //print_line("pos: " + pixel->pos + " normal " + pixel->normal);
- switch (bake_mode) {
- case BAKE_MODE_CONE_TRACE: {
- pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
- } break;
- case BAKE_MODE_RAY_TRACE: {
- pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy;
- } break;
- // pixel->light = Vector3(1, 1, 1);
- //}
- }
- }
+ thread_process_array(width,this,&VoxelLightBaker::_lightmap_bake_point,&lightmap_ptr[i*width]);
lines = MAX(lines, i); //for multithread
if (p_bake_time_func) {
diff --git a/scene/3d/voxel_light_baker.h b/scene/3d/voxel_light_baker.h
index 7db31f8a67..68e11c356b 100644
--- a/scene/3d/voxel_light_baker.h
+++ b/scene/3d/voxel_light_baker.h
@@ -148,9 +148,12 @@ private:
_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
- _FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state);
+ _FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
+
+ void _lightmap_bake_point(uint32_t p_x, LightMap *p_line);
public:
+
void begin_bake(int p_subdiv, const AABB &p_bounds);
void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material);
void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);