diff options
Diffstat (limited to 'modules/raycast')
-rw-r--r-- | modules/raycast/SCsub | 163 | ||||
-rw-r--r-- | modules/raycast/config.py | 7 | ||||
-rw-r--r-- | modules/raycast/godot_update_embree.py | 17 | ||||
-rw-r--r-- | modules/raycast/lightmap_raycaster.cpp | 35 | ||||
-rw-r--r-- | modules/raycast/raycast_occlusion_cull.cpp | 10 | ||||
-rw-r--r-- | modules/raycast/raycast_occlusion_cull.h | 18 |
6 files changed, 128 insertions, 122 deletions
diff --git a/modules/raycast/SCsub b/modules/raycast/SCsub index 68e9df5263..6e7b3e7b8d 100644 --- a/modules/raycast/SCsub +++ b/modules/raycast/SCsub @@ -3,84 +3,95 @@ Import("env") Import("env_modules") -embree_src = [ - "common/sys/sysinfo.cpp", - "common/sys/alloc.cpp", - "common/sys/filename.cpp", - "common/sys/library.cpp", - "common/sys/thread.cpp", - "common/sys/string.cpp", - "common/sys/regression.cpp", - "common/sys/mutex.cpp", - "common/sys/condition.cpp", - "common/sys/barrier.cpp", - "common/math/constants.cpp", - "common/simd/sse.cpp", - "common/lexers/stringstream.cpp", - "common/lexers/tokenstream.cpp", - "common/tasking/taskschedulerinternal.cpp", - "common/algorithms/parallel_for.cpp", - "common/algorithms/parallel_reduce.cpp", - "common/algorithms/parallel_prefix_sum.cpp", - "common/algorithms/parallel_for_for.cpp", - "common/algorithms/parallel_for_for_prefix_sum.cpp", - "common/algorithms/parallel_partition.cpp", - "common/algorithms/parallel_sort.cpp", - "common/algorithms/parallel_set.cpp", - "common/algorithms/parallel_map.cpp", - "common/algorithms/parallel_filter.cpp", - "kernels/common/device.cpp", - "kernels/common/stat.cpp", - "kernels/common/acceln.cpp", - "kernels/common/accelset.cpp", - "kernels/common/state.cpp", - "kernels/common/rtcore.cpp", - "kernels/common/rtcore_builder.cpp", - "kernels/common/scene.cpp", - "kernels/common/alloc.cpp", - "kernels/common/geometry.cpp", - "kernels/common/scene_triangle_mesh.cpp", - "kernels/geometry/primitive4.cpp", - "kernels/builders/primrefgen.cpp", - "kernels/bvh/bvh.cpp", - "kernels/bvh/bvh_statistics.cpp", - "kernels/bvh/bvh4_factory.cpp", - "kernels/bvh/bvh8_factory.cpp", - "kernels/bvh/bvh_collider.cpp", - "kernels/bvh/bvh_rotate.cpp", - "kernels/bvh/bvh_refit.cpp", - "kernels/bvh/bvh_builder.cpp", - "kernels/bvh/bvh_builder_morton.cpp", - "kernels/bvh/bvh_builder_sah.cpp", - "kernels/bvh/bvh_builder_sah_spatial.cpp", - "kernels/bvh/bvh_builder_sah_mb.cpp", - "kernels/bvh/bvh_builder_twolevel.cpp", - "kernels/bvh/bvh_intersector1_bvh4.cpp", -] - -embree_dir = "#thirdparty/embree-aarch64/" - -env_embree = env_modules.Clone() -embree_sources = [embree_dir + file for file in embree_src] -env_embree.Prepend(CPPPATH=[embree_dir, embree_dir + "include"]) -env_embree.Append(CPPFLAGS=["-DEMBREE_TARGET_SSE2", "-DEMBREE_LOWEST_ISA", "-DTASKING_INTERNAL", "-DNDEBUG"]) - -if not env_embree.msvc: - env_embree.Append(CPPFLAGS=["-msse2", "-mxsave"]) +env_raycast = env_modules.Clone() + +# Thirdparty source files + +thirdparty_obj = [] + +if env["builtin_embree"]: + thirdparty_dir = "#thirdparty/embree/" + + embree_src = [ + "common/sys/sysinfo.cpp", + "common/sys/alloc.cpp", + "common/sys/filename.cpp", + "common/sys/library.cpp", + "common/sys/thread.cpp", + "common/sys/string.cpp", + "common/sys/regression.cpp", + "common/sys/mutex.cpp", + "common/sys/condition.cpp", + "common/sys/barrier.cpp", + "common/math/constants.cpp", + "common/simd/sse.cpp", + "common/lexers/stringstream.cpp", + "common/lexers/tokenstream.cpp", + "common/tasking/taskschedulerinternal.cpp", + "kernels/common/device.cpp", + "kernels/common/stat.cpp", + "kernels/common/acceln.cpp", + "kernels/common/accelset.cpp", + "kernels/common/state.cpp", + "kernels/common/rtcore.cpp", + "kernels/common/rtcore_builder.cpp", + "kernels/common/scene.cpp", + "kernels/common/alloc.cpp", + "kernels/common/geometry.cpp", + "kernels/common/scene_triangle_mesh.cpp", + "kernels/geometry/primitive4.cpp", + "kernels/builders/primrefgen.cpp", + "kernels/bvh/bvh.cpp", + "kernels/bvh/bvh_statistics.cpp", + "kernels/bvh/bvh4_factory.cpp", + "kernels/bvh/bvh8_factory.cpp", + "kernels/bvh/bvh_collider.cpp", + "kernels/bvh/bvh_rotate.cpp", + "kernels/bvh/bvh_refit.cpp", + "kernels/bvh/bvh_builder.cpp", + "kernels/bvh/bvh_builder_morton.cpp", + "kernels/bvh/bvh_builder_sah.cpp", + "kernels/bvh/bvh_builder_sah_spatial.cpp", + "kernels/bvh/bvh_builder_sah_mb.cpp", + "kernels/bvh/bvh_builder_twolevel.cpp", + "kernels/bvh/bvh_intersector1_bvh4.cpp", + ] + + thirdparty_sources = [thirdparty_dir + file for file in embree_src] + + env_raycast.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "include"]) + env_raycast.Append(CPPDEFINES=["EMBREE_TARGET_SSE2", "EMBREE_LOWEST_ISA", "TASKING_INTERNAL", "NDEBUG"]) + + if not env.msvc: + if env["arch"] in ["x86", "x86_64"]: + env_raycast.Append(CPPFLAGS=["-msse2", "-mxsave"]) + + if env["platform"] == "windows": + env_raycast.Append(CPPFLAGS=["-mstackrealign"]) + if env["platform"] == "windows": - env_embree.Append(CPPFLAGS=["-mstackrealign"]) + if env.msvc: + env.Append(LINKFLAGS=["psapi.lib"]) + else: + env.Append(LIBS=["psapi"]) -if env["platform"] == "windows": - if env.msvc: - env.Append(LINKFLAGS=["psapi.lib"]) - env_embree.Append(CPPFLAGS=["-D__SSE2__", "-D__SSE__"]) - else: - env.Append(LIBS=["psapi"]) + env_thirdparty = env_raycast.Clone() + env_thirdparty.disable_warnings() + env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) -env_embree.disable_warnings() -env_embree.add_source_files(env.modules_sources, embree_sources) + if not env["arch"] in ["x86", "x86_64"] or env.msvc: + # Embree needs those, it will automatically use SSE2NEON in ARM + env_thirdparty.Append(CPPDEFINES=["__SSE2__", "__SSE__"]) -env_raycast = env_modules.Clone() -env_raycast.Prepend(CPPPATH=[embree_dir, embree_dir + "include", embree_dir + "common"]) + env.modules_sources += thirdparty_obj + + +# Godot source files + +module_obj = [] + +env_raycast.add_source_files(module_obj, "*.cpp") +env.modules_sources += module_obj -env_raycast.add_source_files(env.modules_sources, "*.cpp") +# Needed to force rebuilding the module files when the thirdparty library is updated. +env.Depends(module_obj, thirdparty_obj) diff --git a/modules/raycast/config.py b/modules/raycast/config.py index 26493da41b..5de36c5322 100644 --- a/modules/raycast/config.py +++ b/modules/raycast/config.py @@ -1,10 +1,15 @@ def can_build(env, platform): + # Depends on Embree library, which only supports x86_64 and aarch64. + if platform == "android": - return env["android_arch"] in ["arm64v8", "x86", "x86_64"] + return env["android_arch"] in ["arm64v8", "x86_64"] if platform == "javascript": return False # No SIMD support yet + if env["bits"] == "32": + return False + return True diff --git a/modules/raycast/godot_update_embree.py b/modules/raycast/godot_update_embree.py index db4fa95c21..31a25a318f 100644 --- a/modules/raycast/godot_update_embree.py +++ b/modules/raycast/godot_update_embree.py @@ -11,6 +11,7 @@ include_dirs = [ "common/algorithms", "common/lexers", "common/simd", + "common/simd/arm", "include/embree3", "kernels/subdiv", "kernels/geometry", @@ -32,16 +33,6 @@ cpp_files = [ "common/lexers/stringstream.cpp", "common/lexers/tokenstream.cpp", "common/tasking/taskschedulerinternal.cpp", - "common/algorithms/parallel_for.cpp", - "common/algorithms/parallel_reduce.cpp", - "common/algorithms/parallel_prefix_sum.cpp", - "common/algorithms/parallel_for_for.cpp", - "common/algorithms/parallel_for_for_prefix_sum.cpp", - "common/algorithms/parallel_partition.cpp", - "common/algorithms/parallel_sort.cpp", - "common/algorithms/parallel_set.cpp", - "common/algorithms/parallel_map.cpp", - "common/algorithms/parallel_filter.cpp", "kernels/common/device.cpp", "kernels/common/stat.cpp", "kernels/common/acceln.cpp", @@ -74,11 +65,11 @@ cpp_files = [ os.chdir("../../thirdparty") -dir_name = "embree-aarch64" +dir_name = "embree" if os.path.exists(dir_name): shutil.rmtree(dir_name) -subprocess.run(["git", "clone", "https://github.com/lighttransport/embree-aarch64.git", "embree-tmp"]) +subprocess.run(["git", "clone", "https://github.com/embree/embree.git", "embree-tmp"]) os.chdir("embree-tmp") commit_hash = str(subprocess.check_output(["git", "rev-parse", "HEAD"], universal_newlines=True)).strip() @@ -197,7 +188,7 @@ with open("CMakeLists.txt", "r") as cmake_file: with open(os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w") as config_file: config_file.write( f""" -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/modules/raycast/lightmap_raycaster.cpp b/modules/raycast/lightmap_raycaster.cpp index 9039622d3d..0583acc119 100644 --- a/modules/raycast/lightmap_raycaster.cpp +++ b/modules/raycast/lightmap_raycaster.cpp @@ -32,13 +32,9 @@ #include "lightmap_raycaster.h" -// From Embree. -#include <math/vec2.h> -#include <math/vec3.h> - +#ifdef __SSE2__ #include <pmmintrin.h> - -using namespace embree; +#endif LightmapRaycaster *LightmapRaycasterEmbree::create_embree_raycaster() { return memnew(LightmapRaycasterEmbree); @@ -127,25 +123,24 @@ void LightmapRaycasterEmbree::add_mesh(const Vector<Vector3> &p_vertices, const ERR_FAIL_COND(vertex_count % 3 != 0); ERR_FAIL_COND(vertex_count != p_uv2s.size()); + ERR_FAIL_COND(!p_normals.is_empty() && vertex_count != p_normals.size()); - Vec3fa *embree_vertices = (Vec3fa *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, sizeof(Vec3fa), vertex_count); - Vec2fa *embree_light_uvs = (Vec2fa *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 0, RTC_FORMAT_FLOAT2, sizeof(Vec2fa), vertex_count); - uint32_t *embree_triangles = (uint32_t *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(uint32_t) * 3, vertex_count / 3); + Vector3 *embree_vertices = (Vector3 *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, sizeof(Vector3), vertex_count); + memcpy(embree_vertices, p_vertices.ptr(), sizeof(Vector3) * vertex_count); - Vec3fa *embree_normals = nullptr; - if (!p_normals.is_empty()) { - embree_normals = (Vec3fa *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 1, RTC_FORMAT_FLOAT3, sizeof(Vec3fa), vertex_count); - } + Vector2 *embree_light_uvs = (Vector2 *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 0, RTC_FORMAT_FLOAT2, sizeof(Vector2), vertex_count); + memcpy(embree_light_uvs, p_uv2s.ptr(), sizeof(Vector2) * vertex_count); + uint32_t *embree_triangles = (uint32_t *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(uint32_t) * 3, vertex_count / 3); for (int i = 0; i < vertex_count; i++) { - embree_vertices[i] = Vec3fa(p_vertices[i].x, p_vertices[i].y, p_vertices[i].z); - embree_light_uvs[i] = Vec2fa(p_uv2s[i].x, p_uv2s[i].y); - if (embree_normals != nullptr) { - embree_normals[i] = Vec3fa(p_normals[i].x, p_normals[i].y, p_normals[i].z); - } embree_triangles[i] = i; } + if (!p_normals.is_empty()) { + Vector3 *embree_normals = (Vector3 *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 1, RTC_FORMAT_FLOAT3, sizeof(Vector3), vertex_count); + memcpy(embree_normals, p_normals.ptr(), sizeof(Vector3) * vertex_count); + } + rtcCommitGeometry(embree_mesh); rtcSetGeometryIntersectFilterFunction(embree_mesh, filter_function); rtcSetGeometryUserData(embree_mesh, this); @@ -178,8 +173,10 @@ void embree_error_handler(void *p_user_data, RTCError p_code, const char *p_str) } LightmapRaycasterEmbree::LightmapRaycasterEmbree() { +#ifdef __SSE2__ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif embree_device = rtcNewDevice(nullptr); rtcSetDeviceErrorFunction(embree_device, &embree_error_handler, nullptr); @@ -187,8 +184,10 @@ LightmapRaycasterEmbree::LightmapRaycasterEmbree() { } LightmapRaycasterEmbree::~LightmapRaycasterEmbree() { +#ifdef __SSE2__ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF); +#endif if (embree_scene != nullptr) { rtcReleaseScene(embree_scene); diff --git a/modules/raycast/raycast_occlusion_cull.cpp b/modules/raycast/raycast_occlusion_cull.cpp index 66558efa8c..88c0145ebc 100644 --- a/modules/raycast/raycast_occlusion_cull.cpp +++ b/modules/raycast/raycast_occlusion_cull.cpp @@ -64,7 +64,7 @@ void RaycastOcclusionCull::RaycastHZBuffer::resize(const Size2i &p_size) { camera_ray_masks.resize(ray_packets_count * TILE_SIZE * TILE_SIZE); } -void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool) { +void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool) { CameraRayThreadData td; td.camera_matrix = p_cam_projection; td.camera_transform = p_cam_transform; @@ -82,7 +82,7 @@ void RaycastOcclusionCull::RaycastHZBuffer::_camera_rays_threaded(uint32_t p_thr _generate_camera_rays(p_data->camera_transform, p_data->camera_matrix, p_data->camera_orthogonal, from, to); } -void RaycastOcclusionCull::RaycastHZBuffer::_generate_camera_rays(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, int p_from, int p_to) { +void RaycastOcclusionCull::RaycastHZBuffer::_generate_camera_rays(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, int p_from, int p_to) { Size2i buffer_size = sizes[0]; CameraMatrix inv_camera_matrix = p_cam_projection.inverse(); @@ -227,7 +227,7 @@ void RaycastOcclusionCull::remove_scenario(RID p_scenario) { scenario.removed = true; } -void RaycastOcclusionCull::scenario_set_instance(RID p_scenario, RID p_instance, RID p_occluder, const Transform &p_xform, bool p_enabled) { +void RaycastOcclusionCull::scenario_set_instance(RID p_scenario, RID p_instance, RID p_occluder, const Transform3D &p_xform, bool p_enabled) { ERR_FAIL_COND(!scenarios.has(p_scenario)); Scenario &scenario = scenarios[p_scenario]; @@ -345,7 +345,7 @@ void RaycastOcclusionCull::Scenario::_transform_vertices_thread(uint32_t p_threa _transform_vertices_range(p_data->read, p_data->write, p_data->xform, from, to); } -void RaycastOcclusionCull::Scenario::_transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform &p_xform, int p_from, int p_to) { +void RaycastOcclusionCull::Scenario::_transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to) { for (int i = p_from; i < p_to; i++) { p_write[i] = p_xform.xform(p_read[i]); } @@ -491,7 +491,7 @@ void RaycastOcclusionCull::buffer_set_size(RID p_buffer, const Vector2i &p_size) buffers[p_buffer].resize(p_size); } -void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) { +void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) { if (!buffers.has(p_buffer)) { return; } diff --git a/modules/raycast/raycast_occlusion_cull.h b/modules/raycast/raycast_occlusion_cull.h index acaceb9459..85710a790c 100644 --- a/modules/raycast/raycast_occlusion_cull.h +++ b/modules/raycast/raycast_occlusion_cull.h @@ -34,7 +34,7 @@ #include "core/io/image.h" #include "core/math/camera_matrix.h" #include "core/object/object.h" -#include "core/object/reference.h" +#include "core/object/ref_counted.h" #include "core/templates/local_vector.h" #include "core/templates/rid_owner.h" #include "scene/resources/mesh.h" @@ -52,14 +52,14 @@ public: struct CameraRayThreadData { CameraMatrix camera_matrix; - Transform camera_transform; + Transform3D camera_transform; bool camera_orthogonal; int thread_count; Size2i buffer_size; }; void _camera_rays_threaded(uint32_t p_thread, CameraRayThreadData *p_data); - void _generate_camera_rays(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, int p_from, int p_to); + void _generate_camera_rays(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, int p_from, int p_to); public: LocalVector<RayPacket> camera_rays; @@ -69,7 +69,7 @@ public: virtual void clear() override; virtual void resize(const Size2i &p_size) override; void sort_rays(); - void update_camera_rays(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool); + void update_camera_rays(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool); }; private: @@ -99,7 +99,7 @@ private: RID occluder; LocalVector<uint32_t> indices; LocalVector<Vector3> xformed_vertices; - Transform xform; + Transform3D xform; bool enabled = true; bool removed = false; }; @@ -113,7 +113,7 @@ private: struct TransformThreadData { uint32_t thread_count; uint32_t vertex_count; - Transform xform; + Transform3D xform; const Vector3 *read; Vector3 *write; }; @@ -134,7 +134,7 @@ private: void _update_dirty_instance_thread(int p_idx, RID *p_instances); void _update_dirty_instance(int p_idx, RID *p_instances, ThreadWorkPool *p_thread_pool); void _transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data); - void _transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform &p_xform, int p_from, int p_to); + void _transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to); static void _commit_scene(void *p_ud); bool update(ThreadWorkPool &p_thread_pool); @@ -164,7 +164,7 @@ public: virtual void add_scenario(RID p_scenario) override; virtual void remove_scenario(RID p_scenario) override; - virtual void scenario_set_instance(RID p_scenario, RID p_instance, RID p_occluder, const Transform &p_xform, bool p_enabled) override; + virtual void scenario_set_instance(RID p_scenario, RID p_instance, RID p_occluder, const Transform3D &p_xform, bool p_enabled) override; virtual void scenario_remove_instance(RID p_scenario, RID p_instance) override; virtual void add_buffer(RID p_buffer) override; @@ -172,7 +172,7 @@ public: virtual HZBuffer *buffer_get_ptr(RID p_buffer) override; virtual void buffer_set_scenario(RID p_buffer, RID p_scenario) override; virtual void buffer_set_size(RID p_buffer, const Vector2i &p_size) override; - virtual void buffer_update(RID p_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) override; + virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) override; virtual RID buffer_get_debug_texture(RID p_buffer) override; virtual void set_build_quality(RS::ViewportOcclusionCullingBuildQuality p_quality) override; |