From c7255388e185e9f6d4363fc6d6c5cce17e944ba1 Mon Sep 17 00:00:00 2001 From: Juan Linietsky Date: Sat, 23 Jul 2022 19:12:41 +0200 Subject: Remove ThreadWorkPool, replace by WorkerThreadPool The former needs to be allocated once per usage. The later is shared for all threads, which is more efficient. It can also be better debugged. --- modules/navigation/nav_map.cpp | 13 ++------- modules/navigation/nav_map.h | 5 +--- modules/raycast/raycast_occlusion_cull.cpp | 43 +++++++++++++++++------------ modules/raycast/raycast_occlusion_cull.h | 11 ++++---- modules/text_server_adv/text_server_adv.cpp | 7 ++--- modules/text_server_adv/text_server_adv.h | 7 ++--- modules/text_server_fb/text_server_fb.cpp | 6 ++-- modules/text_server_fb/text_server_fb.h | 5 +--- 8 files changed, 44 insertions(+), 53 deletions(-) (limited to 'modules') diff --git a/modules/navigation/nav_map.cpp b/modules/navigation/nav_map.cpp index 17d6e0a0a1..46daa54239 100644 --- a/modules/navigation/nav_map.cpp +++ b/modules/navigation/nav_map.cpp @@ -30,9 +30,9 @@ #include "nav_map.h" +#include "core/object/worker_thread_pool.h" #include "nav_region.h" #include "rvo_agent.h" - #include #define THREE_POINTS_CROSS_PRODUCT(m_a, m_b, m_c) (((m_c) - (m_a)).cross((m_b) - (m_a))) @@ -683,14 +683,8 @@ void NavMap::compute_single_step(uint32_t index, RvoAgent **agent) { void NavMap::step(real_t p_deltatime) { deltatime = p_deltatime; if (controlled_agents.size() > 0) { - if (step_work_pool.get_thread_count() == 0) { - step_work_pool.init(); - } - step_work_pool.do_work( - controlled_agents.size(), - this, - &NavMap::compute_single_step, - controlled_agents.data()); + WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &NavMap::compute_single_step, controlled_agents.data(), controlled_agents.size(), -1, true, SNAME("NavigationMapAgents")); + WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); } } @@ -736,5 +730,4 @@ NavMap::NavMap() { } NavMap::~NavMap() { - step_work_pool.finish(); } diff --git a/modules/navigation/nav_map.h b/modules/navigation/nav_map.h index 2036dbecd7..98a5c24b3e 100644 --- a/modules/navigation/nav_map.h +++ b/modules/navigation/nav_map.h @@ -34,8 +34,8 @@ #include "nav_rid.h" #include "core/math/math_defs.h" +#include "core/object/worker_thread_pool.h" #include "core/templates/rb_map.h" -#include "core/templates/thread_work_pool.h" #include "nav_utils.h" #include @@ -81,9 +81,6 @@ class NavMap : public NavRid { /// Change the id each time the map is updated. uint32_t map_update_id = 0; - /// Pooled threads for computing steps - ThreadWorkPool step_work_pool; - public: NavMap(); ~NavMap(); diff --git a/modules/raycast/raycast_occlusion_cull.cpp b/modules/raycast/raycast_occlusion_cull.cpp index 55883f9a78..13824c3830 100644 --- a/modules/raycast/raycast_occlusion_cull.cpp +++ b/modules/raycast/raycast_occlusion_cull.cpp @@ -30,6 +30,7 @@ #include "raycast_occlusion_cull.h" #include "core/config/project_settings.h" +#include "core/object/worker_thread_pool.h" #include "core/templates/local_vector.h" #ifdef __SSE2__ @@ -78,9 +79,9 @@ void RaycastOcclusionCull::RaycastHZBuffer::resize(const Size2i &p_size) { memset(camera_ray_masks.ptr(), ~0, camera_rays_tile_count * TILE_RAYS * sizeof(uint32_t)); } -void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool) { +void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) { CameraRayThreadData td; - td.thread_count = p_thread_work_pool.get_thread_count(); + td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count(); td.z_near = p_cam_projection.get_z_near(); td.z_far = p_cam_projection.get_z_far() * 1.05f; @@ -106,7 +107,8 @@ void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D debug_tex_range = td.z_far; - p_thread_work_pool.do_work(td.thread_count, this, &RaycastHZBuffer::_camera_rays_threaded, &td); + WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RaycastHZBuffer::_camera_rays_threaded, &td, td.thread_count, -1, true, SNAME("RaycastOcclusionCullUpdateCamera")); + WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); } void RaycastOcclusionCull::RaycastHZBuffer::_camera_rays_threaded(uint32_t p_thread, const CameraRayThreadData *p_data) { @@ -331,10 +333,10 @@ void RaycastOcclusionCull::scenario_remove_instance(RID p_scenario, RID p_instan } void RaycastOcclusionCull::Scenario::_update_dirty_instance_thread(int p_idx, RID *p_instances) { - _update_dirty_instance(p_idx, p_instances, nullptr); + _update_dirty_instance(p_idx, p_instances); } -void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_instances, ThreadWorkPool *p_thread_pool) { +void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_instances) { OccluderInstance *occ_inst = instances.getptr(p_instances[p_idx]); if (!occ_inst) { @@ -355,14 +357,16 @@ void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_in const Vector3 *read_ptr = occ->vertices.ptr(); Vector3 *write_ptr = occ_inst->xformed_vertices.ptr(); - if (p_thread_pool && vertices_size > 1024) { + if (vertices_size > 1024) { TransformThreadData td; td.xform = occ_inst->xform; td.read = read_ptr; td.write = write_ptr; td.vertex_count = vertices_size; - td.thread_count = p_thread_pool->get_thread_count(); - p_thread_pool->do_work(td.thread_count, this, &Scenario::_transform_vertices_thread, &td); + td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count(); + WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_transform_vertices_thread, &td, td.thread_count, -1, true, SNAME("RaycastOcclusionCull")); + WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); + } else { _transform_vertices_range(read_ptr, write_ptr, occ_inst->xform, 0, vertices_size); } @@ -392,7 +396,7 @@ void RaycastOcclusionCull::Scenario::_commit_scene(void *p_ud) { scenario->commit_done = true; } -bool RaycastOcclusionCull::Scenario::update(ThreadWorkPool &p_thread_pool) { +bool RaycastOcclusionCull::Scenario::update() { ERR_FAIL_COND_V(singleton == nullptr, false); if (commit_thread == nullptr) { @@ -426,13 +430,15 @@ bool RaycastOcclusionCull::Scenario::update(ThreadWorkPool &p_thread_pool) { instances.erase(removed_instances[i]); } - if (dirty_instances_array.size() / p_thread_pool.get_thread_count() > 128) { + if (dirty_instances_array.size() / WorkerThreadPool::get_singleton()->get_thread_count() > 128) { // Lots of instances, use per-instance threading - p_thread_pool.do_work(dirty_instances_array.size(), this, &Scenario::_update_dirty_instance_thread, dirty_instances_array.ptr()); + WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_update_dirty_instance_thread, dirty_instances_array.ptr(), dirty_instances_array.size(), -1, true, SNAME("RaycastOcclusionCullUpdate")); + WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); + } else { // Few instances, use threading on the vertex transforms for (unsigned int i = 0; i < dirty_instances_array.size(); i++) { - _update_dirty_instance(i, dirty_instances_array.ptr(), &p_thread_pool); + _update_dirty_instance(i, dirty_instances_array.ptr()); } } @@ -484,7 +490,7 @@ void RaycastOcclusionCull::Scenario::_raycast(uint32_t p_idx, const RaycastThrea rtcIntersect16((const int *)&p_raycast_data->masks[p_idx * TILE_RAYS], ebr_scene[current_scene_idx], &ctx, &p_raycast_data->rays[p_idx]); } -void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count, ThreadWorkPool &p_thread_pool) const { +void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count) const { ERR_FAIL_COND(singleton == nullptr); if (raycast_singleton->ebr_device == nullptr) { return; // Embree is initialized on demand when there is some scenario with occluders in it. @@ -498,7 +504,8 @@ void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32 td.rays = r_rays; td.masks = p_valid_masks; - p_thread_pool.do_work(p_tile_count, this, &Scenario::_raycast, &td); + WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_raycast, &td, p_tile_count, -1, true, SNAME("RaycastOcclusionCullRaycast")); + WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); } //////////////////////////////////////////////////////// @@ -524,7 +531,7 @@ void RaycastOcclusionCull::buffer_set_size(RID p_buffer, const Vector2i &p_size) buffers[p_buffer].resize(p_size); } -void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) { +void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) { if (!buffers.has(p_buffer)) { return; } @@ -537,16 +544,16 @@ void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_ Scenario &scenario = scenarios[buffer.scenario_rid]; - bool removed = scenario.update(p_thread_pool); + bool removed = scenario.update(); if (removed) { scenarios.erase(buffer.scenario_rid); return; } - buffer.update_camera_rays(p_cam_transform, p_cam_projection, p_cam_orthogonal, p_thread_pool); + buffer.update_camera_rays(p_cam_transform, p_cam_projection, p_cam_orthogonal); - scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count, p_thread_pool); + scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count); buffer.sort_rays(-p_cam_transform.basis.get_column(2), p_cam_orthogonal); buffer.update_mips(); } diff --git a/modules/raycast/raycast_occlusion_cull.h b/modules/raycast/raycast_occlusion_cull.h index 8c8b444309..056b808640 100644 --- a/modules/raycast/raycast_occlusion_cull.h +++ b/modules/raycast/raycast_occlusion_cull.h @@ -76,7 +76,7 @@ public: virtual void clear() override; virtual void resize(const Size2i &p_size) override; void sort_rays(const Vector3 &p_camera_dir, bool p_orthogonal); - void update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool); + void update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal); ~RaycastHZBuffer(); }; @@ -143,14 +143,14 @@ private: LocalVector removed_instances; void _update_dirty_instance_thread(int p_idx, RID *p_instances); - void _update_dirty_instance(int p_idx, RID *p_instances, ThreadWorkPool *p_thread_pool); + void _update_dirty_instance(int p_idx, RID *p_instances); void _transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data); void _transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to); static void _commit_scene(void *p_ud); - bool update(ThreadWorkPool &p_thread_pool); + bool update(); void _raycast(uint32_t p_thread, const RaycastThreadData *p_raycast_data) const; - void raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count, ThreadWorkPool &p_thread_pool) const; + void raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count) const; }; static RaycastOcclusionCull *raycast_singleton; @@ -183,7 +183,8 @@ public: virtual HZBuffer *buffer_get_ptr(RID p_buffer) override; virtual void buffer_set_scenario(RID p_buffer, RID p_scenario) override; virtual void buffer_set_size(RID p_buffer, const Vector2i &p_size) override; - virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) override; + virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) override; + virtual RID buffer_get_debug_texture(RID p_buffer) override; virtual void set_build_quality(RS::ViewportOcclusionCullingBuildQuality p_quality) override; diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index fe2279df69..fa234081f0 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -29,6 +29,7 @@ /*************************************************************************/ #include "text_server_adv.h" +#include "core/object/worker_thread_pool.h" #ifdef GDEXTENSION // Headers for building as GDExtension plug-in. @@ -1039,10 +1040,8 @@ _FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_msdf( td.projection = &projection; td.distancePixelConversion = &distancePixelConversion; - if (p_font_data->work_pool.get_thread_count() == 0) { - p_font_data->work_pool.init(); - } - p_font_data->work_pool.do_work(h, this, &TextServerAdvanced::_generateMTSDF_threaded, &td); + WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &TextServerAdvanced::_generateMTSDF_threaded, &td, h, -1, true, SNAME("FontServerRasterizeMSDF")); + WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); msdfgen::msdfErrorCorrection(image, shape, projection, p_pixel_range, config); diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index a772955d90..8cd0e753ba 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -65,11 +65,12 @@ #include #include #include +#include #include #include #include -#include + #include using namespace godot; @@ -77,9 +78,9 @@ using namespace godot; #else // Headers for building as built-in module. +#include "core/object/worker_thread_pool.h" #include "core/templates/hash_map.h" #include "core/templates/rid_owner.h" -#include "core/templates/thread_work_pool.h" #include "scene/resources/texture.h" #include "servers/text/text_server_extension.h" @@ -252,10 +253,8 @@ class TextServerAdvanced : public TextServerExtension { const uint8_t *data_ptr; size_t data_size; int face_index = 0; - mutable ThreadWorkPool work_pool; ~FontAdvanced() { - work_pool.finish(); for (const KeyValue &E : cache) { memdelete(E.value); } diff --git a/modules/text_server_fb/text_server_fb.cpp b/modules/text_server_fb/text_server_fb.cpp index b845beb158..50ea4677b1 100644 --- a/modules/text_server_fb/text_server_fb.cpp +++ b/modules/text_server_fb/text_server_fb.cpp @@ -461,10 +461,8 @@ _FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_msdf( td.projection = &projection; td.distancePixelConversion = &distancePixelConversion; - if (p_font_data->work_pool.get_thread_count() == 0) { - p_font_data->work_pool.init(); - } - p_font_data->work_pool.do_work(h, this, &TextServerFallback::_generateMTSDF_threaded, &td); + WorkerThreadPool::GroupID group_id = WorkerThreadPool::get_singleton()->add_template_group_task(this, &TextServerFallback::_generateMTSDF_threaded, &td, h, -1, true, SNAME("TextServerFBRenderMSDF")); + WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_id); msdfgen::msdfErrorCorrection(image, shape, projection, p_pixel_range, config); diff --git a/modules/text_server_fb/text_server_fb.h b/modules/text_server_fb/text_server_fb.h index b3ec4ffb38..adb5cbb817 100644 --- a/modules/text_server_fb/text_server_fb.h +++ b/modules/text_server_fb/text_server_fb.h @@ -79,9 +79,9 @@ using namespace godot; #include "servers/text/text_server_extension.h" +#include "core/object/worker_thread_pool.h" #include "core/templates/hash_map.h" #include "core/templates/rid_owner.h" -#include "core/templates/thread_work_pool.h" #include "scene/resources/texture.h" #include "modules/modules_enabled.gen.h" // For freetype, msdfgen. @@ -208,10 +208,7 @@ class TextServerFallback : public TextServerExtension { size_t data_size; int face_index = 0; - mutable ThreadWorkPool work_pool; - ~FontFallback() { - work_pool.finish(); for (const KeyValue &E : cache) { memdelete(E.value); } -- cgit v1.2.3