diff options
Diffstat (limited to 'modules/raycast')
-rw-r--r-- | modules/raycast/SCsub | 18 | ||||
-rw-r--r-- | modules/raycast/config.py | 2 | ||||
-rw-r--r-- | modules/raycast/godot_update_embree.py | 12 | ||||
-rw-r--r-- | modules/raycast/lightmap_raycaster.cpp | 9 | ||||
-rw-r--r-- | modules/raycast/lightmap_raycaster.h | 2 | ||||
-rw-r--r-- | modules/raycast/raycast_occlusion_cull.cpp | 209 | ||||
-rw-r--r-- | modules/raycast/raycast_occlusion_cull.h | 31 | ||||
-rw-r--r-- | modules/raycast/register_types.cpp | 5 | ||||
-rw-r--r-- | modules/raycast/static_raycaster.cpp | 137 | ||||
-rw-r--r-- | modules/raycast/static_raycaster.h | 64 |
10 files changed, 382 insertions, 107 deletions
diff --git a/modules/raycast/SCsub b/modules/raycast/SCsub index 6e7b3e7b8d..ef4c598194 100644 --- a/modules/raycast/SCsub +++ b/modules/raycast/SCsub @@ -55,6 +55,9 @@ if env["builtin_embree"]: "kernels/bvh/bvh_builder_sah_mb.cpp", "kernels/bvh/bvh_builder_twolevel.cpp", "kernels/bvh/bvh_intersector1_bvh4.cpp", + "kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp", + "kernels/bvh/bvh_intersector_stream_bvh4.cpp", + "kernels/bvh/bvh_intersector_stream_filters.cpp", ] thirdparty_sources = [thirdparty_dir + file for file in embree_src] @@ -76,6 +79,7 @@ if env["builtin_embree"]: env.Append(LIBS=["psapi"]) env_thirdparty = env_raycast.Clone() + env_thirdparty.force_optimization_on_debug() env_thirdparty.disable_warnings() env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) @@ -83,6 +87,20 @@ if env["builtin_embree"]: # Embree needs those, it will automatically use SSE2NEON in ARM env_thirdparty.Append(CPPDEFINES=["__SSE2__", "__SSE__"]) + if not env.msvc: + env_thirdparty.Append( + CPPFLAGS=[ + "-fno-strict-overflow", + "-fno-delete-null-pointer-checks", + "-fwrapv", + "-fsigned-char", + "-fno-strict-aliasing", + "-fno-tree-vectorize", + "-fvisibility=hidden", + "-fvisibility-inlines-hidden", + ] + ) + env.modules_sources += thirdparty_obj diff --git a/modules/raycast/config.py b/modules/raycast/config.py index 5de36c5322..7e8b3e9840 100644 --- a/modules/raycast/config.py +++ b/modules/raycast/config.py @@ -1,5 +1,7 @@ def can_build(env, platform): # Depends on Embree library, which only supports x86_64 and aarch64. + if env["arch"].startswith("rv") or env["arch"].startswith("ppc"): + return False if platform == "android": return env["android_arch"] in ["arm64v8", "x86_64"] diff --git a/modules/raycast/godot_update_embree.py b/modules/raycast/godot_update_embree.py index 31a25a318f..e31d88b741 100644 --- a/modules/raycast/godot_update_embree.py +++ b/modules/raycast/godot_update_embree.py @@ -61,6 +61,11 @@ cpp_files = [ "kernels/bvh/bvh_builder_twolevel.cpp", "kernels/bvh/bvh_intersector1.cpp", "kernels/bvh/bvh_intersector1_bvh4.cpp", + "kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp", + "kernels/bvh/bvh_intersector_stream_bvh4.cpp", + "kernels/bvh/bvh_intersector_stream_filters.cpp", + "kernels/bvh/bvh_intersector_hybrid.cpp", + "kernels/bvh/bvh_intersector_stream.cpp", ] os.chdir("../../thirdparty") @@ -117,7 +122,7 @@ with open(os.path.join(dest_dir, "kernels/config.h"), "w") as config_file: /* #undef EMBREE_GEOMETRY_INSTANCE */ /* #undef EMBREE_GEOMETRY_GRID */ /* #undef EMBREE_GEOMETRY_POINT */ -/* #undef EMBREE_RAY_PACKETS */ +#define EMBREE_RAY_PACKETS /* #undef EMBREE_COMPACT_POLYS */ #define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0 @@ -249,3 +254,8 @@ with open(os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w") as con os.chdir("..") shutil.rmtree("embree-tmp") + +subprocess.run(["git", "restore", "embree/patches"]) + +for patch in os.listdir("embree/patches"): + subprocess.run(["git", "apply", "embree/patches/" + patch]) diff --git a/modules/raycast/lightmap_raycaster.cpp b/modules/raycast/lightmap_raycaster.cpp index 0583acc119..fdcf509da8 100644 --- a/modules/raycast/lightmap_raycaster.cpp +++ b/modules/raycast/lightmap_raycaster.cpp @@ -168,7 +168,7 @@ void LightmapRaycasterEmbree::clear_mesh_filter() { filter_meshes.clear(); } -void embree_error_handler(void *p_user_data, RTCError p_code, const char *p_str) { +void embree_lm_error_handler(void *p_user_data, RTCError p_code, const char *p_str) { print_error("Embree error: " + String(p_str)); } @@ -179,16 +179,11 @@ LightmapRaycasterEmbree::LightmapRaycasterEmbree() { #endif embree_device = rtcNewDevice(nullptr); - rtcSetDeviceErrorFunction(embree_device, &embree_error_handler, nullptr); + rtcSetDeviceErrorFunction(embree_device, &embree_lm_error_handler, nullptr); embree_scene = rtcNewScene(embree_device); } LightmapRaycasterEmbree::~LightmapRaycasterEmbree() { -#ifdef __SSE2__ - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF); -#endif - if (embree_scene != nullptr) { rtcReleaseScene(embree_scene); } diff --git a/modules/raycast/lightmap_raycaster.h b/modules/raycast/lightmap_raycaster.h index 4c3de27837..290b0a1cf3 100644 --- a/modules/raycast/lightmap_raycaster.h +++ b/modules/raycast/lightmap_raycaster.h @@ -30,9 +30,9 @@ #ifdef TOOLS_ENABLED +#include "core/io/image.h" #include "core/object/object.h" #include "scene/3d/lightmapper.h" -#include "scene/resources/mesh.h" #include <embree3/rtcore.h> diff --git a/modules/raycast/raycast_occlusion_cull.cpp b/modules/raycast/raycast_occlusion_cull.cpp index 88c0145ebc..75491c98e5 100644 --- a/modules/raycast/raycast_occlusion_cull.cpp +++ b/modules/raycast/raycast_occlusion_cull.cpp @@ -41,9 +41,14 @@ RaycastOcclusionCull *RaycastOcclusionCull::raycast_singleton = nullptr; void RaycastOcclusionCull::RaycastHZBuffer::clear() { HZBuffer::clear(); - camera_rays.clear(); + if (camera_rays_unaligned_buffer) { + memfree(camera_rays_unaligned_buffer); + camera_rays_unaligned_buffer = nullptr; + camera_rays = nullptr; + } camera_ray_masks.clear(); - packs_size = Size2i(); + camera_rays_tile_count = 0; + tile_grid_size = Size2i(); } void RaycastOcclusionCull::RaycastHZBuffer::resize(const Size2i &p_size) { @@ -58,100 +63,112 @@ void RaycastOcclusionCull::RaycastHZBuffer::resize(const Size2i &p_size) { HZBuffer::resize(p_size); - packs_size = Size2i(Math::ceil(p_size.x / (float)TILE_SIZE), Math::ceil(p_size.y / (float)TILE_SIZE)); - int ray_packets_count = packs_size.x * packs_size.y; - camera_rays.resize(ray_packets_count); - camera_ray_masks.resize(ray_packets_count * TILE_SIZE * TILE_SIZE); + tile_grid_size = Size2i(Math::ceil(p_size.x / (float)TILE_SIZE), Math::ceil(p_size.y / (float)TILE_SIZE)); + camera_rays_tile_count = tile_grid_size.x * tile_grid_size.y; + + if (camera_rays_unaligned_buffer) { + memfree(camera_rays_unaligned_buffer); + } + + const int alignment = 64; // Embree requires ray packets to be 64-aligned + camera_rays_unaligned_buffer = (uint8_t *)memalloc(camera_rays_tile_count * sizeof(CameraRayTile) + alignment); + camera_rays = (CameraRayTile *)(camera_rays_unaligned_buffer + alignment - (((uint64_t)camera_rays_unaligned_buffer) % alignment)); + + camera_ray_masks.resize(camera_rays_tile_count * TILE_RAYS); + memset(camera_ray_masks.ptr(), ~0, camera_rays_tile_count * TILE_RAYS * sizeof(uint32_t)); } void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool) { CameraRayThreadData td; - td.camera_matrix = p_cam_projection; - td.camera_transform = p_cam_transform; - td.camera_orthogonal = p_cam_orthogonal; td.thread_count = p_thread_work_pool.get_thread_count(); + td.z_near = p_cam_projection.get_z_near(); + td.z_far = p_cam_projection.get_z_far() * 1.05f; + td.camera_pos = p_cam_transform.origin; + td.camera_dir = -p_cam_transform.basis.get_axis(2); + td.camera_orthogonal = p_cam_orthogonal; + + CameraMatrix inv_camera_matrix = p_cam_projection.inverse(); + Vector3 camera_corner_proj = Vector3(-1.0f, -1.0f, -1.0f); + Vector3 camera_corner_view = inv_camera_matrix.xform(camera_corner_proj); + td.pixel_corner = p_cam_transform.xform(camera_corner_view); + + Vector3 top_corner_proj = Vector3(-1.0f, 1.0f, -1.0f); + Vector3 top_corner_view = inv_camera_matrix.xform(top_corner_proj); + Vector3 top_corner_world = p_cam_transform.xform(top_corner_view); + + Vector3 left_corner_proj = Vector3(1.0f, -1.0f, -1.0f); + Vector3 left_corner_view = inv_camera_matrix.xform(left_corner_proj); + Vector3 left_corner_world = p_cam_transform.xform(left_corner_view); + + td.pixel_u_interp = left_corner_world - td.pixel_corner; + td.pixel_v_interp = top_corner_world - td.pixel_corner; + + debug_tex_range = td.z_far; + p_thread_work_pool.do_work(td.thread_count, this, &RaycastHZBuffer::_camera_rays_threaded, &td); } -void RaycastOcclusionCull::RaycastHZBuffer::_camera_rays_threaded(uint32_t p_thread, RaycastOcclusionCull::RaycastHZBuffer::CameraRayThreadData *p_data) { - uint32_t packs_total = camera_rays.size(); +void RaycastOcclusionCull::RaycastHZBuffer::_camera_rays_threaded(uint32_t p_thread, const CameraRayThreadData *p_data) { + uint32_t total_tiles = camera_rays_tile_count; uint32_t total_threads = p_data->thread_count; - uint32_t from = p_thread * packs_total / total_threads; - uint32_t to = (p_thread + 1 == total_threads) ? packs_total : ((p_thread + 1) * packs_total / total_threads); - _generate_camera_rays(p_data->camera_transform, p_data->camera_matrix, p_data->camera_orthogonal, from, to); + uint32_t from = p_thread * total_tiles / total_threads; + uint32_t to = (p_thread + 1 == total_threads) ? total_tiles : ((p_thread + 1) * total_tiles / total_threads); + _generate_camera_rays(p_data, from, to); } -void RaycastOcclusionCull::RaycastHZBuffer::_generate_camera_rays(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, int p_from, int p_to) { - Size2i buffer_size = sizes[0]; - - CameraMatrix inv_camera_matrix = p_cam_projection.inverse(); - float z_far = p_cam_projection.get_z_far() * 1.05f; - debug_tex_range = z_far; - - RayPacket *ray_packets = camera_rays.ptr(); - uint32_t *ray_masks = camera_ray_masks.ptr(); +void RaycastOcclusionCull::RaycastHZBuffer::_generate_camera_rays(const CameraRayThreadData *p_data, int p_from, int p_to) { + const Size2i &buffer_size = sizes[0]; for (int i = p_from; i < p_to; i++) { - RayPacket &packet = ray_packets[i]; - int tile_x = (i % packs_size.x) * TILE_SIZE; - int tile_y = (i / packs_size.x) * TILE_SIZE; + CameraRayTile &tile = camera_rays[i]; + int tile_x = (i % tile_grid_size.x) * TILE_SIZE; + int tile_y = (i / tile_grid_size.x) * TILE_SIZE; for (int j = 0; j < TILE_RAYS; j++) { - float x = tile_x + j % TILE_SIZE; - float y = tile_y + j / TILE_SIZE; + int x = tile_x + j % TILE_SIZE; + int y = tile_y + j / TILE_SIZE; - ray_masks[i * TILE_RAYS + j] = ~0U; + float u = (float(x) + 0.5f) / buffer_size.x; + float v = (float(y) + 0.5f) / buffer_size.y; + Vector3 pixel_pos = p_data->pixel_corner + u * p_data->pixel_u_interp + v * p_data->pixel_v_interp; - if (x >= buffer_size.x || y >= buffer_size.y) { - ray_masks[i * TILE_RAYS + j] = 0U; - } else { - float u = x / (buffer_size.x - 1); - float v = y / (buffer_size.y - 1); - u = u * 2.0f - 1.0f; - v = v * 2.0f - 1.0f; - - Plane pixel_proj = Plane(u, v, -1.0, 1.0); - Plane pixel_view = inv_camera_matrix.xform4(pixel_proj); - Vector3 pixel_world = p_cam_transform.xform(pixel_view.normal); - - Vector3 dir; - if (p_cam_orthogonal) { - dir = -p_cam_transform.basis.get_axis(2); - } else { - dir = (pixel_world - p_cam_transform.origin).normalized(); - } - - packet.ray.org_x[j] = pixel_world.x; - packet.ray.org_y[j] = pixel_world.y; - packet.ray.org_z[j] = pixel_world.z; + tile.ray.tnear[j] = p_data->z_near; - packet.ray.dir_x[j] = dir.x; - packet.ray.dir_y[j] = dir.y; - packet.ray.dir_z[j] = dir.z; + Vector3 dir; + if (p_data->camera_orthogonal) { + dir = -p_data->camera_dir; + tile.ray.org_x[j] = pixel_pos.x - dir.x * p_data->z_near; + tile.ray.org_y[j] = pixel_pos.y - dir.y * p_data->z_near; + tile.ray.org_z[j] = pixel_pos.z - dir.z * p_data->z_near; + } else { + dir = (pixel_pos - p_data->camera_pos).normalized(); + tile.ray.org_x[j] = p_data->camera_pos.x; + tile.ray.org_y[j] = p_data->camera_pos.y; + tile.ray.org_z[j] = p_data->camera_pos.z; + tile.ray.tnear[j] /= dir.dot(p_data->camera_dir); + } - packet.ray.tnear[j] = 0.0f; + tile.ray.dir_x[j] = dir.x; + tile.ray.dir_y[j] = dir.y; + tile.ray.dir_z[j] = dir.z; - packet.ray.time[j] = 0.0f; + tile.ray.tfar[j] = p_data->z_far; + tile.ray.time[j] = 0.0f; - packet.ray.flags[j] = 0; - packet.ray.mask[j] = -1; - packet.hit.geomID[j] = RTC_INVALID_GEOMETRY_ID; - } - - packet.ray.tfar[j] = z_far; + tile.ray.flags[j] = 0; + tile.ray.mask[j] = ~0U; + tile.hit.geomID[j] = RTC_INVALID_GEOMETRY_ID; } } } -void RaycastOcclusionCull::RaycastHZBuffer::sort_rays() { - if (is_empty()) { - return; - } +void RaycastOcclusionCull::RaycastHZBuffer::sort_rays(const Vector3 &p_camera_dir, bool p_orthogonal) { + ERR_FAIL_COND(is_empty()); Size2i buffer_size = sizes[0]; - for (int i = 0; i < packs_size.y; i++) { - for (int j = 0; j < packs_size.x; j++) { + for (int i = 0; i < tile_grid_size.y; i++) { + for (int j = 0; j < tile_grid_size.x; j++) { for (int tile_i = 0; tile_i < TILE_SIZE; tile_i++) { for (int tile_j = 0; tile_j < TILE_SIZE; tile_j++) { int x = j * TILE_SIZE + tile_j; @@ -160,14 +177,30 @@ void RaycastOcclusionCull::RaycastHZBuffer::sort_rays() { continue; } int k = tile_i * TILE_SIZE + tile_j; - int packet_index = i * packs_size.x + j; - mips[0][y * buffer_size.x + x] = camera_rays[packet_index].ray.tfar[k]; + int tile_index = i * tile_grid_size.x + j; + float d = camera_rays[tile_index].ray.tfar[k]; + + if (!p_orthogonal) { + const float &dir_x = camera_rays[tile_index].ray.dir_x[k]; + const float &dir_y = camera_rays[tile_index].ray.dir_y[k]; + const float &dir_z = camera_rays[tile_index].ray.dir_z[k]; + float cos_theta = p_camera_dir.x * dir_x + p_camera_dir.y * dir_y + p_camera_dir.z * dir_z; + d *= cos_theta; + } + + mips[0][y * buffer_size.x + x] = d; } } } } } +RaycastOcclusionCull::RaycastHZBuffer::~RaycastHZBuffer() { + if (camera_rays_unaligned_buffer) { + memfree(camera_rays_unaligned_buffer); + } +} + //////////////////////////////////////////////////////// bool RaycastOcclusionCull::is_occluder(RID p_rid) { @@ -184,7 +217,7 @@ void RaycastOcclusionCull::occluder_initialize(RID p_occluder) { } void RaycastOcclusionCull::occluder_set_mesh(RID p_occluder, const PackedVector3Array &p_vertices, const PackedInt32Array &p_indices) { - Occluder *occluder = occluder_owner.getornull(p_occluder); + Occluder *occluder = occluder_owner.get_or_null(p_occluder); ERR_FAIL_COND(!occluder); occluder->vertices = p_vertices; @@ -205,7 +238,7 @@ void RaycastOcclusionCull::occluder_set_mesh(RID p_occluder, const PackedVector3 } void RaycastOcclusionCull::free_occluder(RID p_occluder) { - Occluder *occluder = occluder_owner.getornull(p_occluder); + Occluder *occluder = occluder_owner.get_or_null(p_occluder); ERR_FAIL_COND(!occluder); memdelete(occluder); occluder_owner.free(p_occluder); @@ -245,7 +278,7 @@ void RaycastOcclusionCull::scenario_set_instance(RID p_scenario, RID p_instance, bool changed = false; if (instance.occluder != p_occluder) { - Occluder *old_occluder = occluder_owner.getornull(instance.occluder); + Occluder *old_occluder = occluder_owner.get_or_null(instance.occluder); if (old_occluder) { old_occluder->users.erase(InstanceID(p_scenario, p_instance)); } @@ -253,7 +286,7 @@ void RaycastOcclusionCull::scenario_set_instance(RID p_scenario, RID p_instance, instance.occluder = p_occluder; if (p_occluder.is_valid()) { - Occluder *occluder = occluder_owner.getornull(p_occluder); + Occluder *occluder = occluder_owner.get_or_null(p_occluder); ERR_FAIL_COND(!occluder); occluder->users.insert(InstanceID(p_scenario, p_instance)); } @@ -285,7 +318,7 @@ void RaycastOcclusionCull::scenario_remove_instance(RID p_scenario, RID p_instan OccluderInstance &instance = scenario.instances[p_instance]; if (!instance.removed) { - Occluder *occluder = occluder_owner.getornull(instance.occluder); + Occluder *occluder = occluder_owner.get_or_null(instance.occluder); if (occluder) { occluder->users.erase(InstanceID(p_scenario, p_instance)); } @@ -307,7 +340,7 @@ void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_in return; } - Occluder *occ = raycast_singleton->occluder_owner.getornull(occ_inst->occluder); + Occluder *occ = raycast_singleton->occluder_owner.get_or_null(occ_inst->occluder); if (!occ) { return; @@ -423,7 +456,7 @@ bool RaycastOcclusionCull::Scenario::update(ThreadWorkPool &p_thread_pool) { const RID *inst_rid = nullptr; while ((inst_rid = instances.next(inst_rid))) { OccluderInstance *occ_inst = instances.getptr(*inst_rid); - Occluder *occ = raycast_singleton->occluder_owner.getornull(occ_inst->occluder); + Occluder *occ = raycast_singleton->occluder_owner.get_or_null(occ_inst->occluder); if (!occ || !occ_inst->enabled) { continue; @@ -451,7 +484,7 @@ void RaycastOcclusionCull::Scenario::_raycast(uint32_t p_idx, const RaycastThrea rtcIntersect16((const int *)&p_raycast_data->masks[p_idx * TILE_RAYS], ebr_scene[current_scene_idx], &ctx, &p_raycast_data->rays[p_idx]); } -void RaycastOcclusionCull::Scenario::raycast(LocalVector<RayPacket> &r_rays, const LocalVector<uint32_t> p_valid_masks, ThreadWorkPool &p_thread_pool) const { +void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count, ThreadWorkPool &p_thread_pool) const { ERR_FAIL_COND(singleton == nullptr); if (raycast_singleton->ebr_device == nullptr) { return; // Embree is initialized on demand when there is some scenario with occluders in it. @@ -462,10 +495,10 @@ void RaycastOcclusionCull::Scenario::raycast(LocalVector<RayPacket> &r_rays, con } RaycastThreadData td; - td.rays = r_rays.ptr(); - td.masks = p_valid_masks.ptr(); + td.rays = r_rays; + td.masks = p_valid_masks; - p_thread_pool.do_work(r_rays.size(), this, &Scenario::_raycast, &td); + p_thread_pool.do_work(p_tile_count, this, &Scenario::_raycast, &td); } //////////////////////////////////////////////////////// @@ -513,8 +546,8 @@ void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_ buffer.update_camera_rays(p_cam_transform, p_cam_projection, p_cam_orthogonal, p_thread_pool); - scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks, p_thread_pool); - buffer.sort_rays(); + scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count, p_thread_pool); + buffer.sort_rays(-p_cam_transform.basis.get_axis(2), p_cam_orthogonal); buffer.update_mips(); } @@ -569,13 +602,15 @@ RaycastOcclusionCull::~RaycastOcclusionCull() { scenario.commit_thread->wait_to_finish(); memdelete(scenario.commit_thread); } + + for (int i = 0; i < 2; i++) { + if (scenario.ebr_scene[i]) { + rtcReleaseScene(scenario.ebr_scene[i]); + } + } } if (ebr_device != nullptr) { -#ifdef __SSE2__ - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF); -#endif rtcReleaseDevice(ebr_device); } diff --git a/modules/raycast/raycast_occlusion_cull.h b/modules/raycast/raycast_occlusion_cull.h index 85710a790c..ea96df5ff6 100644 --- a/modules/raycast/raycast_occlusion_cull.h +++ b/modules/raycast/raycast_occlusion_cull.h @@ -43,33 +43,42 @@ #include <embree3/rtcore.h> class RaycastOcclusionCull : public RendererSceneOcclusionCull { - typedef RTCRayHit16 RayPacket; + typedef RTCRayHit16 CameraRayTile; public: class RaycastHZBuffer : public HZBuffer { private: - Size2i packs_size; + Size2i tile_grid_size; struct CameraRayThreadData { - CameraMatrix camera_matrix; - Transform3D camera_transform; - bool camera_orthogonal; int thread_count; + float z_near; + float z_far; + Vector3 camera_dir; + Vector3 camera_pos; + Vector3 pixel_corner; + Vector3 pixel_u_interp; + Vector3 pixel_v_interp; + bool camera_orthogonal; Size2i buffer_size; }; - void _camera_rays_threaded(uint32_t p_thread, CameraRayThreadData *p_data); - void _generate_camera_rays(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, int p_from, int p_to); + void _camera_rays_threaded(uint32_t p_thread, const CameraRayThreadData *p_data); + void _generate_camera_rays(const CameraRayThreadData *p_data, int p_from, int p_to); public: - LocalVector<RayPacket> camera_rays; + unsigned int camera_rays_tile_count = 0; + uint8_t *camera_rays_unaligned_buffer = nullptr; + CameraRayTile *camera_rays = nullptr; LocalVector<uint32_t> camera_ray_masks; RID scenario_rid; virtual void clear() override; virtual void resize(const Size2i &p_size) override; - void sort_rays(); + void sort_rays(const Vector3 &p_camera_dir, bool p_orthogonal); void update_camera_rays(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool); + + ~RaycastHZBuffer(); }; private: @@ -106,7 +115,7 @@ private: struct Scenario { struct RaycastThreadData { - RayPacket *rays; + CameraRayTile *rays; const uint32_t *masks; }; @@ -139,7 +148,7 @@ private: bool update(ThreadWorkPool &p_thread_pool); void _raycast(uint32_t p_thread, const RaycastThreadData *p_raycast_data) const; - void raycast(LocalVector<RayPacket> &r_rays, const LocalVector<uint32_t> p_valid_masks, ThreadWorkPool &p_thread_pool) const; + void raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count, ThreadWorkPool &p_thread_pool) const; }; static RaycastOcclusionCull *raycast_singleton; diff --git a/modules/raycast/register_types.cpp b/modules/raycast/register_types.cpp index 78ca91309f..ed99e635e1 100644 --- a/modules/raycast/register_types.cpp +++ b/modules/raycast/register_types.cpp @@ -32,12 +32,14 @@ #include "lightmap_raycaster.h" #include "raycast_occlusion_cull.h" +#include "static_raycaster.h" RaycastOcclusionCull *raycast_occlusion_cull = nullptr; void register_raycast_types() { #ifdef TOOLS_ENABLED LightmapRaycasterEmbree::make_default_raycaster(); + StaticRaycasterEmbree::make_default_raycaster(); #endif raycast_occlusion_cull = memnew(RaycastOcclusionCull); } @@ -46,4 +48,7 @@ void unregister_raycast_types() { if (raycast_occlusion_cull) { memdelete(raycast_occlusion_cull); } +#ifdef TOOLS_ENABLED + StaticRaycasterEmbree::free(); +#endif } diff --git a/modules/raycast/static_raycaster.cpp b/modules/raycast/static_raycaster.cpp new file mode 100644 index 0000000000..2ba65eebf8 --- /dev/null +++ b/modules/raycast/static_raycaster.cpp @@ -0,0 +1,137 @@ +/*************************************************************************/ +/* static_raycaster.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifdef TOOLS_ENABLED + +#include "static_raycaster.h" + +#ifdef __SSE2__ +#include <pmmintrin.h> +#endif + +RTCDevice StaticRaycasterEmbree::embree_device; + +StaticRaycaster *StaticRaycasterEmbree::create_embree_raycaster() { + return memnew(StaticRaycasterEmbree); +} + +void StaticRaycasterEmbree::make_default_raycaster() { + create_function = create_embree_raycaster; +} + +void StaticRaycasterEmbree::free() { + if (embree_device) { + rtcReleaseDevice(embree_device); + } +} + +bool StaticRaycasterEmbree::intersect(Ray &r_ray) { + RTCIntersectContext context; + rtcInitIntersectContext(&context); + rtcIntersect1(embree_scene, &context, (RTCRayHit *)&r_ray); + return r_ray.geomID != RTC_INVALID_GEOMETRY_ID; +} + +void StaticRaycasterEmbree::intersect(Vector<Ray> &r_rays) { + Ray *rays = r_rays.ptrw(); + for (int i = 0; i < r_rays.size(); ++i) { + intersect(rays[i]); + } +} + +void StaticRaycasterEmbree::add_mesh(const PackedVector3Array &p_vertices, const PackedInt32Array &p_indices, unsigned int p_id) { + RTCGeometry embree_mesh = rtcNewGeometry(embree_device, RTC_GEOMETRY_TYPE_TRIANGLE); + + int vertex_count = p_vertices.size(); + + Vector3 *embree_vertices = (Vector3 *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, sizeof(Vector3), vertex_count); + memcpy(embree_vertices, p_vertices.ptr(), sizeof(Vector3) * vertex_count); + + if (p_indices.is_empty()) { + ERR_FAIL_COND(vertex_count % 3 != 0); + uint32_t *embree_triangles = (uint32_t *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(uint32_t) * 3, vertex_count / 3); + for (int i = 0; i < vertex_count; i++) { + embree_triangles[i] = i; + } + } else { + uint32_t *embree_triangles = (uint32_t *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(uint32_t) * 3, p_indices.size() / 3); + memcpy(embree_triangles, p_indices.ptr(), sizeof(uint32_t) * p_indices.size()); + } + + rtcCommitGeometry(embree_mesh); + rtcAttachGeometryByID(embree_scene, embree_mesh, p_id); + rtcReleaseGeometry(embree_mesh); +} + +void StaticRaycasterEmbree::commit() { + rtcCommitScene(embree_scene); +} + +void StaticRaycasterEmbree::set_mesh_filter(const Set<int> &p_mesh_ids) { + for (Set<int>::Element *E = p_mesh_ids.front(); E; E = E->next()) { + rtcDisableGeometry(rtcGetGeometry(embree_scene, E->get())); + } + rtcCommitScene(embree_scene); + filter_meshes = p_mesh_ids; +} + +void StaticRaycasterEmbree::clear_mesh_filter() { + for (Set<int>::Element *E = filter_meshes.front(); E; E = E->next()) { + rtcEnableGeometry(rtcGetGeometry(embree_scene, E->get())); + } + rtcCommitScene(embree_scene); + filter_meshes.clear(); +} + +void embree_error_handler(void *p_user_data, RTCError p_code, const char *p_str) { + print_error("Embree error: " + String(p_str)); +} + +StaticRaycasterEmbree::StaticRaycasterEmbree() { +#ifdef __SSE2__ + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif + + if (!embree_device) { + embree_device = rtcNewDevice(nullptr); + rtcSetDeviceErrorFunction(embree_device, &embree_error_handler, nullptr); + } + + embree_scene = rtcNewScene(embree_device); +} + +StaticRaycasterEmbree::~StaticRaycasterEmbree() { + if (embree_scene != nullptr) { + rtcReleaseScene(embree_scene); + } +} + +#endif diff --git a/modules/raycast/static_raycaster.h b/modules/raycast/static_raycaster.h new file mode 100644 index 0000000000..6b13ecf690 --- /dev/null +++ b/modules/raycast/static_raycaster.h @@ -0,0 +1,64 @@ +/*************************************************************************/ +/* static_raycaster.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifdef TOOLS_ENABLED + +#include "core/math/static_raycaster.h" + +#include <embree3/rtcore.h> + +class StaticRaycasterEmbree : public StaticRaycaster { + GDCLASS(StaticRaycasterEmbree, StaticRaycaster); + +private: + static RTCDevice embree_device; + RTCScene embree_scene; + + Set<int> filter_meshes; + +public: + virtual bool intersect(Ray &p_ray) override; + virtual void intersect(Vector<Ray> &r_rays) override; + + virtual void add_mesh(const PackedVector3Array &p_vertices, const PackedInt32Array &p_indices, unsigned int p_id) override; + virtual void commit() override; + + virtual void set_mesh_filter(const Set<int> &p_mesh_ids) override; + virtual void clear_mesh_filter() override; + + static StaticRaycaster *create_embree_raycaster(); + static void make_default_raycaster(); + static void free(); + + StaticRaycasterEmbree(); + ~StaticRaycasterEmbree(); +}; + +#endif |