diff options
45 files changed, 2282 insertions, 1193 deletions
diff --git a/doc/classes/CodeHighlighter.xml b/doc/classes/CodeHighlighter.xml index 7a1dad547b..f078e4e5b0 100644 --- a/doc/classes/CodeHighlighter.xml +++ b/doc/classes/CodeHighlighter.xml @@ -1,8 +1,10 @@ <?xml version="1.0" encoding="UTF-8" ?> <class name="CodeHighlighter" inherits="SyntaxHighlighter" version="4.0"> <brief_description> + A syntax highlighter for code. </brief_description> <description> + A syntax highlighter for code. </description> <tutorials> </tutorials> @@ -10,15 +12,18 @@ <method name="add_color_region"> <return type="void"> </return> - <argument index="0" name="p_start_key" type="String"> + <argument index="0" name="start_key" type="String"> </argument> - <argument index="1" name="p_end_key" type="String"> + <argument index="1" name="end_key" type="String"> </argument> - <argument index="2" name="p_color" type="Color"> + <argument index="2" name="color" type="Color"> </argument> - <argument index="3" name="p_line_only" type="bool" default="false"> + <argument index="3" name="line_only" type="bool" default="false"> </argument> <description> + Adds a color region such as comments or strings. + Both the start and end keys must be symbols. Only the start key has to be unique. + Line only denotes if the region should continue until the end of the line or carry over on to the next line. If the end key is blank this is automatically set to [code]true[/code]. </description> </method> <method name="add_keyword_color"> @@ -29,6 +34,8 @@ <argument index="1" name="color" type="Color"> </argument> <description> + Sets the color for a keyword. + The keyword cannot contain any symbols except '_'. </description> </method> <method name="add_member_keyword_color"> @@ -39,24 +46,30 @@ <argument index="1" name="color" type="Color"> </argument> <description> + Sets the color for a member keyword. + The member keyword cannot contain any symbols except '_'. + It will not be highlighted if preceded by a '.'. </description> </method> <method name="clear_color_regions"> <return type="void"> </return> <description> + Removes all color regions. </description> </method> <method name="clear_keyword_colors"> <return type="void"> </return> <description> + Removes all keywords. </description> </method> <method name="clear_member_keyword_colors"> <return type="void"> </return> <description> + Removes all member keywords. </description> </method> <method name="get_keyword_color" qualifiers="const"> @@ -65,6 +78,7 @@ <argument index="0" name="keyword" type="String"> </argument> <description> + Returns the color for a keyword. </description> </method> <method name="get_member_keyword_color" qualifiers="const"> @@ -73,14 +87,16 @@ <argument index="0" name="member_keyword" type="String"> </argument> <description> + Returns the color for a member keyword. </description> </method> <method name="has_color_region" qualifiers="const"> <return type="bool"> </return> - <argument index="0" name="p_start_key" type="String"> + <argument index="0" name="start_key" type="String"> </argument> <description> + Return [code]true[/code] if the start key exists, else [code]false[/code]. </description> </method> <method name="has_keyword_color" qualifiers="const"> @@ -89,6 +105,7 @@ <argument index="0" name="keyword" type="String"> </argument> <description> + Return [code]true[/code] if the keyword exists, else [code]false[/code]. </description> </method> <method name="has_member_keyword_color" qualifiers="const"> @@ -97,14 +114,16 @@ <argument index="0" name="member_keyword" type="String"> </argument> <description> + Return [code]true[/code] if the member keyword exists, else [code]false[/code]. </description> </method> <method name="remove_color_region"> <return type="void"> </return> - <argument index="0" name="p_start_key" type="String"> + <argument index="0" name="start_key" type="String"> </argument> <description> + Removes the color region that uses that start key. </description> </method> <method name="remove_keyword_color"> @@ -113,6 +132,7 @@ <argument index="0" name="keyword" type="String"> </argument> <description> + Removes the keyword. </description> </method> <method name="remove_member_keyword_color"> @@ -121,23 +141,31 @@ <argument index="0" name="member_keyword" type="String"> </argument> <description> + Removes the member keyword. </description> </method> </methods> <members> <member name="color_regions" type="Dictionary" setter="set_color_regions" getter="get_color_regions" default="{}"> + Sets the color regions. All existing regions will be removed. The [Dictionary] key is the region start and end key, separated by a space. The value is the region color. </member> <member name="function_color" type="Color" setter="set_function_color" getter="get_function_color" default="Color( 0, 0, 0, 1 )"> + Sets color for functions. A function is a non-keyword string followed by a '('. </member> <member name="keyword_colors" type="Dictionary" setter="set_keyword_colors" getter="get_keyword_colors" default="{}"> + Sets the keyword colors. All existing keywords will be removed. The [Dictionary] key is the keyword. The value is the keyword color. </member> <member name="member_keyword_colors" type="Dictionary" setter="set_member_keyword_colors" getter="get_member_keyword_colors" default="{}"> + Sets the member keyword colors. All existing member keyword will be removed. The [Dictionary] key is the member keyword. The value is the member keyword color. </member> <member name="member_variable_color" type="Color" setter="set_member_variable_color" getter="get_member_variable_color" default="Color( 0, 0, 0, 1 )"> + Sets color for member variables. A member variable is non-keyword, non-function string proceeded with a '.'. </member> <member name="number_color" type="Color" setter="set_number_color" getter="get_number_color" default="Color( 0, 0, 0, 1 )"> + Sets the color for numbers. </member> <member name="symbol_color" type="Color" setter="set_symbol_color" getter="get_symbol_color" default="Color( 0, 0, 0, 1 )"> + Sets the color for symbols. </member> </members> <constants> diff --git a/doc/classes/EditorSyntaxHighlighter.xml b/doc/classes/EditorSyntaxHighlighter.xml index 103d95e1d6..b80e81928f 100644 --- a/doc/classes/EditorSyntaxHighlighter.xml +++ b/doc/classes/EditorSyntaxHighlighter.xml @@ -1,8 +1,11 @@ <?xml version="1.0" encoding="UTF-8" ?> <class name="EditorSyntaxHighlighter" inherits="SyntaxHighlighter" version="4.0"> <brief_description> + Base Syntax highlighter resource for the [ScriptEditor]. </brief_description> <description> + Base syntax highlighter resource all editor syntax highlighters extend from, it is used in the [ScriptEditor]. + Add a syntax highlighter to an individual script by calling [method ScriptEditorBase.add_syntax_highlighter]. To apply to all scripts on open, call [method ScriptEditor.register_syntax_highlighter] </description> <tutorials> </tutorials> @@ -11,18 +14,21 @@ <return type="String"> </return> <description> + Virtual method which can be overridden to return the syntax highlighter name. </description> </method> <method name="_get_supported_extentions" qualifiers="virtual"> <return type="Array"> </return> <description> + Virtual method which can be overridden to return the supported file extensions. </description> </method> <method name="_get_supported_languages" qualifiers="virtual"> <return type="Array"> </return> <description> + Virtual method which can be overridden to return the supported language names. </description> </method> </methods> diff --git a/doc/classes/ScriptEditor.xml b/doc/classes/ScriptEditor.xml index d5a32dd20c..28620bd29b 100644 --- a/doc/classes/ScriptEditor.xml +++ b/doc/classes/ScriptEditor.xml @@ -37,6 +37,7 @@ <return type="ScriptEditorBase"> </return> <description> + Returns the [ScriptEditorBase] object that the user is currently editing. </description> </method> <method name="get_current_script"> @@ -60,6 +61,7 @@ <return type="Array"> </return> <description> + Returns an array with all [ScriptEditorBase] objects which are currently open in editor. </description> </method> <method name="get_open_scripts" qualifiers="const"> @@ -95,6 +97,8 @@ <argument index="0" name="syntax_highlighter" type="EditorSyntaxHighlighter"> </argument> <description> + Registers the [EditorSyntaxHighlighter] to the editor, the [EditorSyntaxHighlighter] will be available on all open scripts. + [b]Note:[/b] Does not apply to scripts that are already opened. </description> </method> <method name="unregister_syntax_highlighter"> @@ -103,6 +107,8 @@ <argument index="0" name="syntax_highlighter" type="EditorSyntaxHighlighter"> </argument> <description> + Unregisters the [EditorSyntaxHighlighter] from the editor. + [b]Note:[/b] The [EditorSyntaxHighlighter] will still be applied to scripts that are already opened. </description> </method> </methods> diff --git a/doc/classes/ScriptEditorBase.xml b/doc/classes/ScriptEditorBase.xml index 9968ae06c3..ee498de302 100644 --- a/doc/classes/ScriptEditorBase.xml +++ b/doc/classes/ScriptEditorBase.xml @@ -1,8 +1,10 @@ <?xml version="1.0" encoding="UTF-8" ?> <class name="ScriptEditorBase" inherits="VBoxContainer" version="4.0"> <brief_description> + Base editor for editing scripts in the [ScriptEditor]. </brief_description> <description> + Base editor for editing scripts in the [ScriptEditor], this does not include documentation items. </description> <tutorials> </tutorials> @@ -13,34 +15,40 @@ <argument index="0" name="highlighter" type="Object"> </argument> <description> + Adds a [EditorSyntaxHighlighter] to the open script. </description> </method> </methods> <signals> <signal name="edited_script_changed"> <description> + Emitted after script validation. For visual scripts on modification. </description> </signal> <signal name="go_to_help"> <argument index="0" name="what" type="String"> </argument> <description> + Emitted when the user requests a specific documentation page. </description> </signal> <signal name="name_changed"> <description> + Emitted after script validation or when the edited resource has changed. Not used by visual scripts. </description> </signal> <signal name="replace_in_files_requested"> <argument index="0" name="text" type="String"> </argument> <description> + Emitted when the user request to find and replace text in the file system. Not used by visual scripts. </description> </signal> <signal name="request_help"> <argument index="0" name="topic" type="String"> </argument> <description> + Emitted when the user requests contextual help. </description> </signal> <signal name="request_open_script_at_line"> @@ -49,16 +57,19 @@ <argument index="1" name="line" type="int"> </argument> <description> + Emitted when the user requests a script. </description> </signal> <signal name="request_save_history"> <description> + Emitted when the user contextual goto and the item is in the same script. </description> </signal> <signal name="search_in_files_requested"> <argument index="0" name="text" type="String"> </argument> <description> + Emitted when the user request to search text in the file system. Not used by visual scripts. </description> </signal> </signals> diff --git a/doc/classes/SyntaxHighlighter.xml b/doc/classes/SyntaxHighlighter.xml index 2d6e3de02a..3f87e4f61d 100644 --- a/doc/classes/SyntaxHighlighter.xml +++ b/doc/classes/SyntaxHighlighter.xml @@ -1,50 +1,83 @@ <?xml version="1.0" encoding="UTF-8" ?> <class name="SyntaxHighlighter" inherits="Resource" version="4.0"> <brief_description> + Base Syntax highlighter resource for [TextEdit]. </brief_description> <description> + Base syntax highlighter resource all syntax highlighters extend from, provides syntax highlighting data to [TextEdit]. + The associated [TextEdit] node will call into the [SyntaxHighlighter] on a as needed basis. + [b]Note:[/b] Each Syntax highlighter instance should not be shared across multiple [TextEdit] nodes. </description> <tutorials> </tutorials> <methods> + <method name="_clear_highlighting_cache" qualifiers="virtual"> + <return type="void"> + </return> + <description> + Virtual method which can be overridden to clear any local caches. + </description> + </method> <method name="_get_line_syntax_highlighting" qualifiers="virtual"> <return type="Dictionary"> </return> - <argument index="0" name="p_line" type="int"> + <argument index="0" name="line" type="int"> </argument> <description> + Virtual method which can be overridden to return syntax highlighting data. + See [method get_line_syntax_highlighting] for more details. </description> </method> <method name="_update_cache" qualifiers="virtual"> <return type="void"> </return> <description> + Virtual method which can be overridden to update any local caches. </description> </method> <method name="clear_highlighting_cache"> <return type="void"> </return> <description> + Clears all cached syntax highlighting data. + Then calls overridable method [method _clear_highlighting_cache]. </description> </method> <method name="get_line_syntax_highlighting"> <return type="Dictionary"> </return> - <argument index="0" name="p_line" type="int"> + <argument index="0" name="line" type="int"> </argument> <description> + Returns syntax highlighting data for a single line. If the line is not cached, calls [method _get_line_syntax_highlighting] to calculate the data. + The return [Dictionary] is column number to [Dictionary]. The column number notes the start of a region, the region will end if another region is found, or at the end of the line. The nested [Dictionary] contains the data for that region, currently only the key "color" is supported. + [b]Example return:[/b] + [codeblock] + var color_map = { + 0: { + "color": Color(1, 0, 0) + }, + 5: { + "color": Color(0, 1, 0) + } + } + [/codeblock] + This will color columns 0-4 red, and columns 5-eol in green. </description> </method> <method name="get_text_edit"> <return type="TextEdit"> </return> <description> + Returns the associated [TextEdit] node. </description> </method> <method name="update_cache"> <return type="void"> </return> <description> + Clears then updates the [SyntaxHighlighter] caches. Override [method _update_cache] for a callback. + [b]Note:[/b] This is called automatically when the associated [TextEdit] node, updates its own cache. </description> </method> </methods> diff --git a/doc/classes/TextEdit.xml b/doc/classes/TextEdit.xml index af4543374a..539f7afbd8 100644 --- a/doc/classes/TextEdit.xml +++ b/doc/classes/TextEdit.xml @@ -725,6 +725,7 @@ Set additional options for BiDi override. </member> <member name="syntax_highlighter" type="SyntaxHighlighter" setter="set_syntax_highlighter" getter="get_syntax_highlighter"> + Sets the [SyntaxHighlighter] to use. </member> <member name="text" type="String" setter="set_text" getter="get_text" default=""""> String value of the [TextEdit]. @@ -914,7 +915,7 @@ </constants> <theme_items> <theme_item name="background_color" type="Color" default="Color( 0, 0, 0, 0 )"> - Sets the background [Color] of this [TextEdit]. [member syntax_highlighting] has to be enabled. + Sets the background [Color] of this [TextEdit]. </theme_item> <theme_item name="brace_mismatch_color" type="Color" default="Color( 1, 0.2, 0.2, 1 )"> </theme_item> diff --git a/drivers/dummy/rasterizer_dummy.h b/drivers/dummy/rasterizer_dummy.h index 72ab18d115..e69f36e16f 100644 --- a/drivers/dummy/rasterizer_dummy.h +++ b/drivers/dummy/rasterizer_dummy.h @@ -566,6 +566,7 @@ public: AABB mesh_get_custom_aabb(RID p_mesh) const override { return AABB(); } AABB mesh_get_aabb(RID p_mesh, RID p_skeleton = RID()) override { return AABB(); } + void mesh_set_shadow_mesh(RID p_mesh, RID p_shadow_mesh) override {} void mesh_clear(RID p_mesh) override {} /* MULTIMESH API */ diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index ef331ec4b6..9584dd3f67 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -74,11 +74,13 @@ RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID } else if (texture_buffer_owner.owns(p_buffer)) { if (p_post_barrier & BARRIER_MASK_RASTER) { r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT; } if (p_post_barrier & BARRIER_MASK_COMPUTE) { r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT; } - r_access_mask |= VK_ACCESS_SHADER_READ_BIT; + buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; } else if (storage_buffer_owner.owns(p_buffer)) { buffer = storage_buffer_owner.getornull(p_buffer); @@ -1627,6 +1629,9 @@ void RenderingDeviceVulkan::_memory_barrier(VkPipelineStageFlags p_src_stage_mas mem_barrier.srcAccessMask = p_src_access; mem_barrier.dstAccessMask = p_dst_sccess; + if (p_src_stage_mask == 0 || p_dst_stage_mask == 0) { + return; //no barrier, since this is invalid + } vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 1, &mem_barrier, 0, nullptr, 0, nullptr); } @@ -2477,6 +2482,10 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; @@ -2496,6 +2505,13 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } + if (texture->used_in_frame != frames_drawn) { + texture->used_in_raster = false; + texture->used_in_compute = false; + texture->used_in_frame = frames_drawn; + } + texture->used_in_transfer = true; + return OK; } @@ -2844,6 +2860,10 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -3011,6 +3031,10 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -3143,6 +3167,10 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; @@ -3163,6 +3191,13 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } + if (src_tex->used_in_frame != frames_drawn) { + src_tex->used_in_raster = false; + src_tex->used_in_compute = false; + src_tex->used_in_frame = frames_drawn; + } + src_tex->used_in_transfer = true; + return OK; } @@ -3289,6 +3324,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF dependency_from_external.srcStageMask |= reading_stages; } } break; + case INITIAL_ACTION_CLEAR_REGION_CONTINUE: case INITIAL_ACTION_CONTINUE: { if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; @@ -3296,7 +3332,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; //don't care what is there + description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; @@ -3425,8 +3461,13 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF render_pass_create_info.pAttachments = attachments.ptr(); render_pass_create_info.subpassCount = 1; render_pass_create_info.pSubpasses = &subpass; - render_pass_create_info.dependencyCount = 2; - render_pass_create_info.pDependencies = dependencies; + // Commenting this because it seems it just avoids raster and compute to work at the same time. + // Other barriers seem to be protecting the render pass fine. + // render_pass_create_info.dependencyCount = 2; + // render_pass_create_info.pDependencies = dependencies; + + render_pass_create_info.dependencyCount = 0; + render_pass_create_info.pDependencies = nullptr; VkRenderPass render_pass; VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass); @@ -4108,6 +4149,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages bool is_compute = false; + uint32_t compute_local_size[3] = { 0, 0, 0 }; + for (int i = 0; i < p_stages.size(); i++) { if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) { is_compute = true; @@ -4124,6 +4167,11 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed parsing shader."); + if (is_compute) { + compute_local_size[0] = module.entry_points->local_size.x; + compute_local_size[1] = module.entry_points->local_size.y; + compute_local_size[2] = module.entry_points->local_size.z; + } uint32_t binding_count = 0; result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -4328,6 +4376,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } } } + uint32_t pc_count = 0; result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -4376,6 +4425,9 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages shader.fragment_outputs = fragment_outputs; shader.push_constant = push_constant; shader.is_compute = is_compute; + shader.compute_local_size[0] = compute_local_size[0]; + shader.compute_local_size[1] = compute_local_size[1]; + shader.compute_local_size[2] = compute_local_size[2]; String error_text; @@ -5216,7 +5268,14 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) { + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); + } + #endif return err; } @@ -5255,7 +5314,12 @@ Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint3 #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_post_barrier); + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, dst_stage_mask); + #endif return OK; } @@ -5710,6 +5774,9 @@ RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) { pipeline.pipeline_layout = shader->pipeline_layout; pipeline.shader = p_shader; pipeline.push_constant_size = shader->push_constant.push_constant_size; + pipeline.local_group_size[0] = shader->compute_local_size[0]; + pipeline.local_group_size[1] = shader->compute_local_size[1]; + pipeline.local_group_size[2] = shader->compute_local_size[2]; //create ID to associate with this pipeline RID id = compute_pipeline_owner.make_rid(pipeline); @@ -6019,7 +6086,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); - ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, INVALID_ID, "Only one draw/compute list can be active at the same time."); Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer); ERR_FAIL_COND_V(!framebuffer, INVALID_ID); @@ -6040,7 +6107,14 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu viewport_offset = regioni.position; viewport_size = regioni.size; - + if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { + needs_clear_color = true; + p_initial_color_action = INITIAL_ACTION_CONTINUE; + } + if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { + needs_clear_depth = true; + p_initial_depth_action = INITIAL_ACTION_CONTINUE; + } if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_color = true; p_initial_color_action = INITIAL_ACTION_KEEP; @@ -6388,6 +6462,19 @@ void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_ dl->state.sets[p_index].uniform_set_format = uniform_set->format; dl->state.sets[p_index].uniform_set = p_uniform_set; + uint32_t mst_count = uniform_set->mutable_storage_textures.size(); + if (mst_count) { + Texture **mst_textures = const_cast<UniformSet *>(uniform_set)->mutable_storage_textures.ptrw(); + for (uint32_t i = 0; i < mst_count; i++) { + if (mst_textures[i]->used_in_frame != frames_drawn) { + mst_textures[i]->used_in_frame = frames_drawn; + mst_textures[i]->used_in_transfer = false; + mst_textures[i]->used_in_compute = false; + } + mst_textures[i]->used_in_raster = true; + } + } + #ifdef DEBUG_ENABLED { //validate that textures bound are not attached as framebuffer bindings uint32_t attachable_count = uniform_set->attachable_textures.size(); @@ -6673,23 +6760,43 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } if (p_post_barrier & BARRIER_MASK_RASTER) { - barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; - access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/; } if (p_post_barrier & BARRIER_MASK_TRANSFER) { barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + draw_list_bound_textures.clear(); - for (int i = 0; i < draw_list_storage_textures.size(); i++) { + VkImageMemoryBarrier *image_barriers = nullptr; + + uint32_t image_barrier_count = draw_list_storage_textures.size(); + + if (image_barrier_count) { + image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * draw_list_storage_textures.size()); + } + + uint32_t src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + uint32_t src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + + if (image_barrier_count) { + src_stage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + src_access |= VK_ACCESS_SHADER_WRITE_BIT; + } + + for (uint32_t i = 0; i < image_barrier_count; i++) { Texture *texture = texture_owner.getornull(draw_list_storage_textures[i]); - VkImageMemoryBarrier image_memory_barrier; + VkImageMemoryBarrier &image_memory_barrier = image_barriers[i]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.srcAccessMask = src_access; image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = texture->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6703,8 +6810,6 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer; image_memory_barrier.subresourceRange.layerCount = texture->layers; - vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -6717,7 +6822,17 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, barrier_flags, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, access_flags, true); + + VkMemoryBarrier mem_barrier; + mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mem_barrier.pNext = nullptr; + mem_barrier.srcAccessMask = src_access; + mem_barrier.dstAccessMask = access_flags; + + if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { + vkCmdPipelineBarrier(frames[frame].draw_command_buffer, src_stage, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); + } + #endif } @@ -6725,12 +6840,13 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { /**** COMPUTE LISTS ****/ /***********************/ -RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin() { - ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); +RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin(bool p_allow_draw_overlap) { + ERR_FAIL_COND_V_MSG(!p_allow_draw_overlap && draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); compute_list = memnew(ComputeList); compute_list->command_buffer = frames[frame].draw_command_buffer; + compute_list->state.allow_draw_overlap = p_allow_draw_overlap; return ID_TYPE_COMPUTE_LIST; } @@ -6787,6 +6903,9 @@ void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_l } cl->state.pipeline_shader = pipeline->shader; + cl->state.local_group_size[0] = pipeline->local_group_size[0]; + cl->state.local_group_size[1] = pipeline->local_group_size[1]; + cl->state.local_group_size[2] = pipeline->local_group_size[2]; } #ifdef DEBUG_ENABLED @@ -6824,11 +6943,24 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, cl->state.sets[p_index].uniform_set = p_uniform_set; uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size(); + uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); + Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw(); + VkImageMemoryBarrier *texture_barriers = nullptr; + + if (textures_to_sampled_count + textures_to_storage_count) { + texture_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * (textures_to_sampled_count + textures_to_storage_count)); + } + uint32_t texture_barrier_count = 0; + + uint32_t src_stage_flags = 0; + for (uint32_t i = 0; i < textures_to_sampled_count; i++) { if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { - VkImageMemoryBarrier image_memory_barrier; + src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + + VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; @@ -6845,23 +6977,55 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]); } + + if (textures_to_sampled[i]->used_in_frame != frames_drawn) { + textures_to_sampled[i]->used_in_frame = frames_drawn; + textures_to_sampled[i]->used_in_transfer = false; + textures_to_sampled[i]->used_in_raster = false; + } + textures_to_sampled[i]->used_in_compute = true; } - uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw(); for (uint32_t i = 0; i < textures_to_storage_count; i++) { if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) { - VkImageMemoryBarrier image_memory_barrier; + uint32_t src_access_flags = 0; + + if (textures_to_storage[i]->used_in_frame == frames_drawn) { + if (textures_to_storage[i]->used_in_compute) { + src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (textures_to_storage[i]->used_in_raster) { + src_stage_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (textures_to_storage[i]->used_in_transfer) { + src_stage_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_raster = false; + textures_to_storage[i]->used_in_compute = false; + + } else { + src_access_flags = 0; + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_raster = false; + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_frame = frames_drawn; + } + + VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.srcAccessMask = src_access_flags; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; image_memory_barrier.oldLayout = textures_to_storage[i]->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; @@ -6875,14 +7039,20 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_storage[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL; cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards } } + if (texture_barrier_count) { + if (src_stage_flags == 0) { + src_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + + vkCmdPipelineBarrier(cl->command_buffer, src_stage_flags, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, texture_barrier_count, texture_barriers); + } + #if 0 { //validate that textures bound are not attached as framebuffer bindings uint32_t attachable_count = uniform_set->attachable_textures.size(); @@ -6976,6 +7146,27 @@ void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups); } +void RenderingDeviceVulkan::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) { + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_COND(!compute_list); + + ComputeList *cl = compute_list; + +#ifdef DEBUG_ENABLED + + ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw."); + + if (cl->validation.pipeline_push_constant_size > 0) { + //using push constants, check that they were supplied + ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + compute_list_dispatch(p_list, (p_x_threads - 1) / cl->state.local_group_size[0] + 1, (p_y_threads - 1) / cl->state.local_group_size[1] + 1, (p_z_threads - 1) / cl->state.local_group_size[2] + 1); +} + void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) { ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_COND(!compute_list); @@ -7047,7 +7238,7 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { uint32_t access_flags = 0; if (p_post_barrier & BARRIER_MASK_COMPUTE) { barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } if (p_post_barrier & BARRIER_MASK_RASTER) { barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; @@ -7058,8 +7249,22 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + VkImageMemoryBarrier *image_barriers = nullptr; + + uint32_t image_barrier_count = compute_list->state.textures_to_sampled_layout.size(); + + if (image_barrier_count) { + image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * image_barrier_count); + } + + uint32_t barrier_idx = 0; + for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) { - VkImageMemoryBarrier image_memory_barrier; + VkImageMemoryBarrier &image_memory_barrier = image_barriers[barrier_idx++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; @@ -7076,19 +7281,33 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { image_memory_barrier.subresourceRange.baseArrayLayer = E->get()->base_layer; image_memory_barrier.subresourceRange.layerCount = E->get()->layers; - // TODO: Look at the usages in the compute list and determine tighter dst stage and access masks based on some "final" usage equivalent - vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + if (E->get()->used_in_frame != frames_drawn) { + E->get()->used_in_transfer = false; + E->get()->used_in_raster = false; + E->get()->used_in_compute = false; + E->get()->used_in_frame = frames_drawn; + } } - memdelete(compute_list); - compute_list = nullptr; #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); + VkMemoryBarrier mem_barrier; + mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mem_barrier.pNext = nullptr; + mem_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + mem_barrier.dstAccessMask = access_flags; + + if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); + } + #endif + + memdelete(compute_list); + compute_list = nullptr; } void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { @@ -7107,11 +7326,15 @@ void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; } + if (p_from == 0) { + src_barrier_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + uint32_t dst_barrier_flags = 0; uint32_t dst_access_flags = 0; if (p_to & BARRIER_MASK_COMPUTE) { dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; } if (p_to & BARRIER_MASK_RASTER) { dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; @@ -7122,6 +7345,10 @@ void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; } + if (p_to == 0) { + dst_barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + _memory_barrier(src_barrier_flags, dst_barrier_flags, src_access_flags, dst_access_flags, true); } @@ -7325,6 +7552,16 @@ void RenderingDeviceVulkan::draw_command_end_label() { context->command_end_label(frames[frame].draw_command_buffer); } +String RenderingDeviceVulkan::get_device_vendor_name() const { + return context->get_device_vendor_name(); +} +String RenderingDeviceVulkan::get_device_name() const { + return context->get_device_name(); +} +String RenderingDeviceVulkan::get_device_pipeline_cache_uuid() const { + return context->get_device_pipeline_cache_uuid(); +} + void RenderingDeviceVulkan::_finalize_command_bufers() { if (draw_list) { ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); @@ -7377,6 +7614,7 @@ void RenderingDeviceVulkan::_begin_frame() { if (frames[frame].timestamp_count) { vkGetQueryPoolResults(device, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count, sizeof(uint64_t) * max_timestamp_query_elements, frames[frame].timestamp_result_values, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); + vkCmdResetQueryPool(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count); SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names); SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values); } diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index 4bea17e4a1..a2527d5c33 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -141,6 +141,11 @@ class RenderingDeviceVulkan : public RenderingDevice { VkImageLayout layout; + uint64_t used_in_frame = 0; + bool used_in_transfer = false; + bool used_in_raster = false; + bool used_in_compute = false; + uint32_t read_aspect_mask = 0; uint32_t barrier_aspect_mask = 0; bool bound = false; //bound to framebffer @@ -528,6 +533,8 @@ class RenderingDeviceVulkan : public RenderingDevice { PushConstant push_constant; + uint32_t compute_local_size[3] = { 0, 0, 0 }; + bool is_compute = false; int max_output = 0; Vector<Set> sets; @@ -686,6 +693,7 @@ class RenderingDeviceVulkan : public RenderingDevice { VkPipeline pipeline = VK_NULL_HANDLE; uint32_t push_constant_size = 0; uint32_t push_constant_stages = 0; + uint32_t local_group_size[3] = { 0, 0, 0 }; }; RID_Owner<ComputePipeline, true> compute_pipeline_owner; @@ -808,8 +816,10 @@ class RenderingDeviceVulkan : public RenderingDevice { uint32_t set_count = 0; RID pipeline; RID pipeline_shader; + uint32_t local_group_size[3] = { 0, 0, 0 }; VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; uint32_t pipeline_push_constant_stages = 0; + bool allow_draw_overlap; } state; #ifdef DEBUG_ENABLED @@ -1028,13 +1038,14 @@ public: /**** COMPUTE LISTS ****/ /***********************/ - virtual ComputeListID compute_list_begin(); + virtual ComputeListID compute_list_begin(bool p_allow_draw_overlap = false); virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline); virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index); virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size); virtual void compute_list_add_barrier(ComputeListID p_list); virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads); virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); @@ -1085,6 +1096,10 @@ public: virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)); virtual void draw_command_end_label(); + virtual String get_device_vendor_name() const; + virtual String get_device_name() const; + virtual String get_device_pipeline_cache_uuid() const; + RenderingDeviceVulkan(); ~RenderingDeviceVulkan(); }; diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index 98966477a5..c564cee757 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -380,7 +380,8 @@ Error VulkanContext::_create_physical_device() { ERR_FAIL_V(ERR_CANT_CREATE); } /* for now, just grab the first physical device */ - gpu = physical_devices[0]; + uint32_t device_index = 0; + gpu = physical_devices[device_index]; free(physical_devices); /* Look for device extensions */ @@ -389,6 +390,40 @@ Error VulkanContext::_create_physical_device() { enabled_extension_count = 0; memset(extension_names, 0, sizeof(extension_names)); + /* Get identifier properties */ + vkGetPhysicalDeviceProperties(gpu, &gpu_props); + + static const struct { + uint32_t id; + const char *name; + } vendor_names[] = { + { 0x1002, "AMD" }, + { 0x1010, "ImgTec" }, + { 0x10DE, "NVIDIA" }, + { 0x13B5, "ARM" }, + { 0x5143, "Qualcomm" }, + { 0x8086, "INTEL" }, + { 0, nullptr }, + }; + device_name = gpu_props.deviceName; + pipeline_cache_id = String::hex_encode_buffer(gpu_props.pipelineCacheUUID, VK_UUID_SIZE); + pipeline_cache_id += "-driver-" + itos(gpu_props.driverVersion); + { + device_vendor = "Unknown"; + uint32_t vendor_idx = 0; + while (vendor_names[vendor_idx].name != nullptr) { + if (gpu_props.vendorID == vendor_names[vendor_idx].id) { + device_vendor = vendor_names[vendor_idx].name; + break; + } + vendor_idx++; + } + } +#ifdef DEBUG_ENABLED + print_line("Using Vulkan Device #" + itos(device_index) + ": " + device_vendor + " - " + device_name); +#endif + device_api_version = gpu_props.apiVersion; + err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); @@ -498,7 +533,6 @@ Error VulkanContext::_create_physical_device() { break; } } - vkGetPhysicalDeviceProperties(gpu, &gpu_props); /* Call with NULL data to get count */ vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, nullptr); @@ -565,6 +599,7 @@ Error VulkanContext::_create_device() { } err = vkCreateDevice(gpu, &sdevice, nullptr, &device); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + return OK; } @@ -1590,11 +1625,12 @@ void VulkanContext::command_begin_label(VkCommandBuffer p_command_buffer, String if (!enabled_debug_utils) { return; } + + CharString cs = p_label_name.utf8().get_data(); VkDebugUtilsLabelEXT label; label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; label.pNext = nullptr; - CharString label_name = p_label_name.utf8(); - label.pLabelName = label_name.get_data(); + label.pLabelName = cs.get_data(); label.color[0] = p_color[0]; label.color[1] = p_color[1]; label.color[2] = p_color[2]; @@ -1606,11 +1642,11 @@ void VulkanContext::command_insert_label(VkCommandBuffer p_command_buffer, Strin if (!enabled_debug_utils) { return; } + CharString cs = p_label_name.utf8().get_data(); VkDebugUtilsLabelEXT label; label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; label.pNext = nullptr; - CharString label_name = p_label_name.utf8(); - label.pLabelName = label_name.get_data(); + label.pLabelName = cs.get_data(); label.color[0] = p_color[0]; label.color[1] = p_color[1]; label.color[2] = p_color[2]; @@ -1629,16 +1665,26 @@ void VulkanContext::set_object_name(VkObjectType p_object_type, uint64_t p_objec if (!enabled_debug_utils) { return; } + CharString obj_data = p_object_name.utf8(); VkDebugUtilsObjectNameInfoEXT name_info; name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; name_info.pNext = nullptr; name_info.objectType = p_object_type; name_info.objectHandle = p_object_handle; - CharString object_name = p_object_name.utf8(); - name_info.pObjectName = object_name.get_data(); + name_info.pObjectName = obj_data.get_data(); SetDebugUtilsObjectNameEXT(device, &name_info); } +String VulkanContext::get_device_vendor_name() const { + return device_vendor; +} +String VulkanContext::get_device_name() const { + return device_name; +} +String VulkanContext::get_device_pipeline_cache_uuid() const { + return pipeline_cache_id; +} + VulkanContext::VulkanContext() { use_validation_layers = Engine::get_singleton()->is_validation_layers_enabled(); diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h index 5cb762aca8..dc6b0410bc 100644 --- a/drivers/vulkan/vulkan_context.h +++ b/drivers/vulkan/vulkan_context.h @@ -57,6 +57,11 @@ class VulkanContext { bool device_initialized = false; bool inst_initialized = false; + String device_vendor; + String device_name; + String pipeline_cache_id; + uint32_t device_api_version = 0; + bool buffers_prepared = false; // Present queue. @@ -215,6 +220,10 @@ public: void command_end_label(VkCommandBuffer p_command_buffer); void set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name); + String get_device_vendor_name() const; + String get_device_name() const; + String get_device_pipeline_cache_uuid() const; + VulkanContext(); virtual ~VulkanContext(); }; diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp index 040f1b1640..208f4b954a 100644 --- a/editor/editor_node.cpp +++ b/editor/editor_node.cpp @@ -6265,7 +6265,7 @@ EditorNode::EditorNode() { p = help_menu->get_popup(); p->connect("id_pressed", callable_mp(this, &EditorNode::_menu_option)); - p->add_icon_shortcut(gui_base->get_theme_icon("HelpSearch", "EditorIcons"), ED_SHORTCUT("editor/editor_help", TTR("Search"), KEY_MASK_SHIFT | KEY_F1), HELP_SEARCH); + p->add_icon_shortcut(gui_base->get_theme_icon("HelpSearch", "EditorIcons"), ED_SHORTCUT("editor/editor_help", TTR("Search")), HELP_SEARCH); p->add_separator(); p->add_icon_shortcut(gui_base->get_theme_icon("Instance", "EditorIcons"), ED_SHORTCUT("editor/online_docs", TTR("Online Docs")), HELP_DOCS); p->add_icon_shortcut(gui_base->get_theme_icon("Instance", "EditorIcons"), ED_SHORTCUT("editor/q&a", TTR("Q&A")), HELP_QA); diff --git a/editor/plugins/texture_region_editor_plugin.cpp b/editor/plugins/texture_region_editor_plugin.cpp index 61e0cc281d..36348f7753 100644 --- a/editor/plugins/texture_region_editor_plugin.cpp +++ b/editor/plugins/texture_region_editor_plugin.cpp @@ -480,20 +480,41 @@ void TextureRegionEditor::_region_input(const Ref<InputEvent> &p_input) { Vector2 dragged(mm->get_relative().x / draw_zoom, mm->get_relative().y / draw_zoom); hscroll->set_value(hscroll->get_value() - dragged.x); vscroll->set_value(vscroll->get_value() - dragged.y); - } else if (drag) { if (edited_margin >= 0) { float new_margin = 0; - if (edited_margin == 0) { - new_margin = prev_margin + (mm->get_position().y - drag_from.y) / draw_zoom; - } else if (edited_margin == 1) { - new_margin = prev_margin - (mm->get_position().y - drag_from.y) / draw_zoom; - } else if (edited_margin == 2) { - new_margin = prev_margin + (mm->get_position().x - drag_from.x) / draw_zoom; - } else if (edited_margin == 3) { - new_margin = prev_margin - (mm->get_position().x - drag_from.x) / draw_zoom; + + if (snap_mode != SNAP_GRID) { + if (edited_margin == 0) { + new_margin = prev_margin + (mm->get_position().y - drag_from.y) / draw_zoom; + } else if (edited_margin == 1) { + new_margin = prev_margin - (mm->get_position().y - drag_from.y) / draw_zoom; + } else if (edited_margin == 2) { + new_margin = prev_margin + (mm->get_position().x - drag_from.x) / draw_zoom; + } else if (edited_margin == 3) { + new_margin = prev_margin - (mm->get_position().x - drag_from.x) / draw_zoom; + } else { + ERR_PRINT("Unexpected edited_margin"); + } + + if (snap_mode == SNAP_PIXEL) { + new_margin = Math::round(new_margin); + } } else { - ERR_PRINT("Unexpected edited_margin"); + Vector2 pos_snapped = snap_point(mtx.affine_inverse().xform(mm->get_position())); + Rect2 rect_rounded = Rect2(rect.position.round(), rect.size.round()); + + if (edited_margin == 0) { + new_margin = pos_snapped.y - rect_rounded.position.y; + } else if (edited_margin == 1) { + new_margin = rect_rounded.size.y + rect_rounded.position.y - pos_snapped.y; + } else if (edited_margin == 2) { + new_margin = pos_snapped.x - rect_rounded.position.x; + } else if (edited_margin == 3) { + new_margin = rect_rounded.size.x + rect_rounded.position.x - pos_snapped.x; + } else { + ERR_PRINT("Unexpected edited_margin"); + } } if (new_margin < 0) { diff --git a/editor/project_manager.cpp b/editor/project_manager.cpp index 5951373af9..afbed0c610 100644 --- a/editor/project_manager.cpp +++ b/editor/project_manager.cpp @@ -2281,6 +2281,11 @@ void ProjectManager::_install_project(const String &p_zip_path, const String &p_ } void ProjectManager::_files_dropped(PackedStringArray p_files, int p_screen) { + if (p_files.size() == 1 && p_files[0].ends_with(".zip")) { + const String file = p_files[0].get_file(); + _install_project(p_files[0], file.substr(0, file.length() - 4).capitalize()); + return; + } Set<String> folders_set; DirAccess *da = DirAccess::create(DirAccess::ACCESS_FILESYSTEM); for (int i = 0; i < p_files.size(); i++) { diff --git a/misc/dist/html/editor.html b/misc/dist/html/editor.html index 53ad826730..540ab94e51 100644 --- a/misc/dist/html/editor.html +++ b/misc/dist/html/editor.html @@ -326,7 +326,7 @@ function startEditor(zip) { const INDETERMINATE_STATUS_STEP_MS = 100; - const persistentPaths = ['/home/web_user/.config', '/home/web_user/.cache', '/home/web_user/projects']; + const persistentPaths = ['/home/web_user/']; var editorCanvas = document.getElementById('editor-canvas'); var gameCanvas = document.getElementById('game-canvas'); @@ -493,11 +493,11 @@ engine.setUnloadAfterInit(false); // Don't want to reload when starting game. engine.init('godot.tools').then(function() { if (zip) { - engine.copyToFS("/home/web_user/preload.zip", zip); + engine.copyToFS("/tmp/preload.zip", zip); } try { // Avoid user creating project in the persistent root folder. - engine.copyToFS("/home/web_user/projects/keep", new Uint8Array()); + engine.copyToFS("/home/web_user/keep", new Uint8Array()); } catch(e) { // File exists } diff --git a/modules/mono/editor/GodotTools/GodotTools/Build/MsBuildFinder.cs b/modules/mono/editor/GodotTools/GodotTools/Build/MsBuildFinder.cs index 5ef55fea49..774c49e705 100644 --- a/modules/mono/editor/GodotTools/GodotTools/Build/MsBuildFinder.cs +++ b/modules/mono/editor/GodotTools/GodotTools/Build/MsBuildFinder.cs @@ -86,7 +86,7 @@ namespace GodotTools.Build { case BuildTool.DotnetCli: { - string dotnetCliPath = OS.PathWhich("dotnet"); + string dotnetCliPath = FindBuildEngineOnUnix("dotnet"); if (!string.IsNullOrEmpty(dotnetCliPath)) return (dotnetCliPath, BuildTool.DotnetCli); GD.PushError($"Cannot find executable for '{BuildManager.PropNameDotnetCli}'. Fallback to MSBuild from Mono."); @@ -122,7 +122,11 @@ namespace GodotTools.Build if (OS.IsMacOS) { result.Add("/Library/Frameworks/Mono.framework/Versions/Current/bin/"); + result.Add("/opt/local/bin/"); result.Add("/usr/local/var/homebrew/linked/mono/bin/"); + result.Add("/usr/local/bin/"); + result.Add("/usr/local/bin/dotnet/"); + result.Add("/usr/local/share/dotnet/"); } result.Add("/opt/novell/mono/bin/"); diff --git a/modules/tga/image_loader_tga.cpp b/modules/tga/image_loader_tga.cpp index 2da9159228..ef53661557 100644 --- a/modules/tga/image_loader_tga.cpp +++ b/modules/tga/image_loader_tga.cpp @@ -56,6 +56,10 @@ Error ImageLoaderTGA::decode_tga_rle(const uint8_t *p_compressed_buffer, size_t compressed_pos += 1; count = (c & 0x7f) + 1; + if (output_pos + count * p_pixel_size > output_pos) { + return ERR_PARSE_ERROR; + } + if (c & 0x80) { for (size_t i = 0; i < p_pixel_size; i++) { pixels_w[i] = p_compressed_buffer[compressed_pos]; @@ -79,7 +83,7 @@ Error ImageLoaderTGA::decode_tga_rle(const uint8_t *p_compressed_buffer, size_t return OK; } -Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buffer, const tga_header_s &p_header, const uint8_t *p_palette, const bool p_is_monochrome) { +Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buffer, const tga_header_s &p_header, const uint8_t *p_palette, const bool p_is_monochrome, size_t p_output_size) { #define TGA_PUT_PIXEL(r, g, b, a) \ int image_data_ofs = ((y * width) + x); \ image_data_w[image_data_ofs * 4 + 0] = r; \ @@ -130,6 +134,9 @@ Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buff if (p_is_monochrome) { while (y != y_end) { while (x != x_end) { + if (i > p_output_size) { + return ERR_PARSE_ERROR; + } uint8_t shade = p_buffer[i]; TGA_PUT_PIXEL(shade, shade, shade, 0xff) @@ -143,6 +150,9 @@ Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buff } else { while (y != y_end) { while (x != x_end) { + if (i > p_output_size) { + return ERR_PARSE_ERROR; + } uint8_t index = p_buffer[i]; uint8_t r = 0x00; uint8_t g = 0x00; @@ -171,6 +181,10 @@ Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buff } else if (p_header.pixel_depth == 24) { while (y != y_end) { while (x != x_end) { + if (i + 2 > p_output_size) { + return ERR_PARSE_ERROR; + } + uint8_t r = p_buffer[i + 2]; uint8_t g = p_buffer[i + 1]; uint8_t b = p_buffer[i + 0]; @@ -186,6 +200,10 @@ Error ImageLoaderTGA::convert_to_image(Ref<Image> p_image, const uint8_t *p_buff } else if (p_header.pixel_depth == 32) { while (y != y_end) { while (x != x_end) { + if (i + 3 > p_output_size) { + return ERR_PARSE_ERROR; + } + uint8_t a = p_buffer[i + 3]; uint8_t r = p_buffer[i + 2]; uint8_t g = p_buffer[i + 1]; @@ -279,7 +297,7 @@ Error ImageLoaderTGA::load_image(Ref<Image> p_image, FileAccess *f, bool p_force const uint8_t *src_image_r = src_image.ptr(); const size_t pixel_size = tga_header.pixel_depth >> 3; - const size_t buffer_size = (tga_header.image_width * tga_header.image_height) * pixel_size; + size_t buffer_size = (tga_header.image_width * tga_header.image_height) * pixel_size; Vector<uint8_t> uncompressed_buffer; uncompressed_buffer.resize(buffer_size); @@ -297,11 +315,12 @@ Error ImageLoaderTGA::load_image(Ref<Image> p_image, FileAccess *f, bool p_force } } else { buffer = src_image_r; + buffer_size = src_image_len; }; if (err == OK) { const uint8_t *palette_r = palette.ptr(); - err = convert_to_image(p_image, buffer, tga_header, palette_r, is_monochrome); + err = convert_to_image(p_image, buffer, tga_header, palette_r, is_monochrome, buffer_size); } } diff --git a/modules/tga/image_loader_tga.h b/modules/tga/image_loader_tga.h index 249e33411e..bbfc3fed32 100644 --- a/modules/tga/image_loader_tga.h +++ b/modules/tga/image_loader_tga.h @@ -73,7 +73,7 @@ class ImageLoaderTGA : public ImageFormatLoader { uint8_t image_descriptor; }; static Error decode_tga_rle(const uint8_t *p_compressed_buffer, size_t p_pixel_size, uint8_t *p_uncompressed_buffer, size_t p_output_size); - static Error convert_to_image(Ref<Image> p_image, const uint8_t *p_buffer, const tga_header_s &p_header, const uint8_t *p_palette, const bool p_is_monochrome); + static Error convert_to_image(Ref<Image> p_image, const uint8_t *p_buffer, const tga_header_s &p_header, const uint8_t *p_palette, const bool p_is_monochrome, size_t p_output_size); public: virtual Error load_image(Ref<Image> p_image, FileAccess *f, bool p_force_linear, float p_scale); diff --git a/platform/javascript/javascript_main.cpp b/platform/javascript/javascript_main.cpp index 0b8af70b13..0fe95b0a8f 100644 --- a/platform/javascript/javascript_main.cpp +++ b/platform/javascript/javascript_main.cpp @@ -88,6 +88,13 @@ extern EMSCRIPTEN_KEEPALIVE int godot_js_main(int argc, char *argv[]) { Main::start(); os->get_main_loop()->initialize(); +#ifdef TOOLS_ENABLED + if (Main::is_project_manager() && FileAccess::exists("/tmp/preload.zip")) { + PackedStringArray ps; + ps.push_back("/tmp/preload.zip"); + os->get_main_loop()->emit_signal("files_dropped", ps, -1); + } +#endif emscripten_set_main_loop(main_loop_callback, -1, false); // Immediately run the first iteration. // We are inside an animation frame, we want to immediately draw on the newly setup canvas. diff --git a/scene/3d/visual_instance_3d.cpp b/scene/3d/visual_instance_3d.cpp index 1d0a830383..dd731d13bd 100644 --- a/scene/3d/visual_instance_3d.cpp +++ b/scene/3d/visual_instance_3d.cpp @@ -371,7 +371,7 @@ void GeometryInstance3D::_bind_methods() { ClassDB::bind_method(D_METHOD("set_gi_mode", "mode"), &GeometryInstance3D::set_gi_mode); ClassDB::bind_method(D_METHOD("get_gi_mode"), &GeometryInstance3D::get_gi_mode); - ClassDB::bind_method(D_METHOD("set_lod_bias", "p_bias"), &GeometryInstance3D::set_lod_bias); + ClassDB::bind_method(D_METHOD("set_lod_bias", "bias"), &GeometryInstance3D::set_lod_bias); ClassDB::bind_method(D_METHOD("get_lod_bias"), &GeometryInstance3D::get_lod_bias); ClassDB::bind_method(D_METHOD("set_custom_aabb", "aabb"), &GeometryInstance3D::set_custom_aabb); diff --git a/scene/animation/animation_tree.cpp b/scene/animation/animation_tree.cpp index e6abbc0c7a..54523cc390 100644 --- a/scene/animation/animation_tree.cpp +++ b/scene/animation/animation_tree.cpp @@ -820,6 +820,7 @@ void AnimationTree::_process_graph(float p_delta) { Ref<Animation> a = as.animation; float time = as.time; float delta = as.delta; + float weight = as.blend; bool seeked = as.seeked; for (int i = 0; i < a->get_track_count(); i++) { @@ -839,7 +840,7 @@ void AnimationTree::_process_graph(float p_delta) { ERR_CONTINUE(blend_idx < 0 || blend_idx >= state.track_count); - float blend = (*as.track_blends)[blend_idx]; + float blend = (*as.track_blends)[blend_idx] * weight; if (blend < CMP_EPSILON) { continue; //nothing to blend diff --git a/scene/gui/graph_edit.cpp b/scene/gui/graph_edit.cpp index bc87aabb2c..d7602bd7cf 100644 --- a/scene/gui/graph_edit.cpp +++ b/scene/gui/graph_edit.cpp @@ -1593,7 +1593,7 @@ void GraphEdit::_bind_methods() { ClassDB::bind_method(D_METHOD("remove_valid_connection_type", "from_type", "to_type"), &GraphEdit::remove_valid_connection_type); ClassDB::bind_method(D_METHOD("is_valid_connection_type", "from_type", "to_type"), &GraphEdit::is_valid_connection_type); - ClassDB::bind_method(D_METHOD("set_zoom", "p_zoom"), &GraphEdit::set_zoom); + ClassDB::bind_method(D_METHOD("set_zoom", "zoom"), &GraphEdit::set_zoom); ClassDB::bind_method(D_METHOD("get_zoom"), &GraphEdit::get_zoom); ClassDB::bind_method(D_METHOD("set_snap", "pixels"), &GraphEdit::set_snap); @@ -1608,9 +1608,9 @@ void GraphEdit::_bind_methods() { ClassDB::bind_method(D_METHOD("set_connection_lines_antialiased", "pixels"), &GraphEdit::set_connection_lines_antialiased); ClassDB::bind_method(D_METHOD("is_connection_lines_antialiased"), &GraphEdit::is_connection_lines_antialiased); - ClassDB::bind_method(D_METHOD("set_minimap_size", "p_size"), &GraphEdit::set_minimap_size); + ClassDB::bind_method(D_METHOD("set_minimap_size", "size"), &GraphEdit::set_minimap_size); ClassDB::bind_method(D_METHOD("get_minimap_size"), &GraphEdit::get_minimap_size); - ClassDB::bind_method(D_METHOD("set_minimap_opacity", "p_opacity"), &GraphEdit::set_minimap_opacity); + ClassDB::bind_method(D_METHOD("set_minimap_opacity", "opacity"), &GraphEdit::set_minimap_opacity); ClassDB::bind_method(D_METHOD("get_minimap_opacity"), &GraphEdit::get_minimap_opacity); ClassDB::bind_method(D_METHOD("set_minimap_enabled", "enable"), &GraphEdit::set_minimap_enabled); diff --git a/scene/gui/texture_button.cpp b/scene/gui/texture_button.cpp index 23c48b0906..bd670555ea 100644 --- a/scene/gui/texture_button.cpp +++ b/scene/gui/texture_button.cpp @@ -29,7 +29,9 @@ /*************************************************************************/ #include "texture_button.h" + #include "core/typedefs.h" + #include <stdlib.h> Size2 TextureButton::get_minimum_size() const { @@ -247,8 +249,8 @@ void TextureButton::_bind_methods() { ClassDB::bind_method(D_METHOD("set_disabled_texture", "texture"), &TextureButton::set_disabled_texture); ClassDB::bind_method(D_METHOD("set_focused_texture", "texture"), &TextureButton::set_focused_texture); ClassDB::bind_method(D_METHOD("set_click_mask", "mask"), &TextureButton::set_click_mask); - ClassDB::bind_method(D_METHOD("set_expand", "p_expand"), &TextureButton::set_expand); - ClassDB::bind_method(D_METHOD("set_stretch_mode", "p_mode"), &TextureButton::set_stretch_mode); + ClassDB::bind_method(D_METHOD("set_expand", "expand"), &TextureButton::set_expand); + ClassDB::bind_method(D_METHOD("set_stretch_mode", "mode"), &TextureButton::set_stretch_mode); ClassDB::bind_method(D_METHOD("set_flip_h", "enable"), &TextureButton::set_flip_h); ClassDB::bind_method(D_METHOD("is_flipped_h"), &TextureButton::is_flipped_h); ClassDB::bind_method(D_METHOD("set_flip_v", "enable"), &TextureButton::set_flip_v); diff --git a/scene/resources/sky_material.cpp b/scene/resources/sky_material.cpp index ee6a26bc65..b2efecb1cb 100644 --- a/scene/resources/sky_material.cpp +++ b/scene/resources/sky_material.cpp @@ -597,5 +597,4 @@ PhysicalSkyMaterial::PhysicalSkyMaterial() { PhysicalSkyMaterial::~PhysicalSkyMaterial() { RS::get_singleton()->free(shader); - RS::get_singleton()->material_set_shader(_get_material(), RID()); } diff --git a/scene/resources/syntax_highlighter.cpp b/scene/resources/syntax_highlighter.cpp index f3f881a774..9dd00849f4 100644 --- a/scene/resources/syntax_highlighter.cpp +++ b/scene/resources/syntax_highlighter.cpp @@ -110,16 +110,13 @@ TextEdit *SyntaxHighlighter::get_text_edit() { } void SyntaxHighlighter::_bind_methods() { - ClassDB::bind_method(D_METHOD("get_line_syntax_highlighting", "p_line"), &SyntaxHighlighter::get_line_syntax_highlighting); + ClassDB::bind_method(D_METHOD("get_line_syntax_highlighting", "line"), &SyntaxHighlighter::get_line_syntax_highlighting); ClassDB::bind_method(D_METHOD("update_cache"), &SyntaxHighlighter::update_cache); ClassDB::bind_method(D_METHOD("clear_highlighting_cache"), &SyntaxHighlighter::clear_highlighting_cache); ClassDB::bind_method(D_METHOD("get_text_edit"), &SyntaxHighlighter::get_text_edit); - ClassDB::bind_method(D_METHOD("_get_line_syntax_highlighting", "p_line"), &SyntaxHighlighter::_get_line_syntax_highlighting); - ClassDB::bind_method(D_METHOD("_update_cache"), &SyntaxHighlighter::_update_cache); - ClassDB::bind_method(D_METHOD("_clear_highlighting_cache"), &SyntaxHighlighter::_clear_highlighting_cache); - - BIND_VMETHOD(MethodInfo(Variant::DICTIONARY, "_get_line_syntax_highlighting", PropertyInfo(Variant::INT, "p_line"))); + BIND_VMETHOD(MethodInfo(Variant::DICTIONARY, "_get_line_syntax_highlighting", PropertyInfo(Variant::INT, "line"))); + BIND_VMETHOD(MethodInfo("_clear_highlighting_cache")); BIND_VMETHOD(MethodInfo("_update_cache")); } @@ -576,11 +573,11 @@ void CodeHighlighter::_bind_methods() { ClassDB::bind_method(D_METHOD("clear_member_keyword_colors"), &CodeHighlighter::clear_member_keyword_colors); ClassDB::bind_method(D_METHOD("get_member_keyword_colors"), &CodeHighlighter::get_member_keyword_colors); - ClassDB::bind_method(D_METHOD("add_color_region", "p_start_key", "p_end_key", "p_color", "p_line_only"), &CodeHighlighter::add_color_region, DEFVAL(false)); - ClassDB::bind_method(D_METHOD("remove_color_region", "p_start_key"), &CodeHighlighter::remove_color_region); - ClassDB::bind_method(D_METHOD("has_color_region", "p_start_key"), &CodeHighlighter::has_color_region); + ClassDB::bind_method(D_METHOD("add_color_region", "start_key", "end_key", "color", "line_only"), &CodeHighlighter::add_color_region, DEFVAL(false)); + ClassDB::bind_method(D_METHOD("remove_color_region", "start_key"), &CodeHighlighter::remove_color_region); + ClassDB::bind_method(D_METHOD("has_color_region", "start_key"), &CodeHighlighter::has_color_region); - ClassDB::bind_method(D_METHOD("set_color_regions", "p_color_regions"), &CodeHighlighter::set_color_regions); + ClassDB::bind_method(D_METHOD("set_color_regions", "color_regions"), &CodeHighlighter::set_color_regions); ClassDB::bind_method(D_METHOD("clear_color_regions"), &CodeHighlighter::clear_color_regions); ClassDB::bind_method(D_METHOD("get_color_regions"), &CodeHighlighter::get_color_regions); diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp index c35e5e1730..0fdd864d47 100644 --- a/servers/rendering/renderer_rd/cluster_builder_rd.cpp +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -400,12 +400,14 @@ void ClusterBuilderRD::begin(const Transform &p_view_transform, const CameraMatr void ClusterBuilderRD::bake_cluster() { RENDER_TIMESTAMP(">Bake Cluster"); + RD::get_singleton()->draw_command_begin_label("Bake Light Cluster"); + //clear cluster buffer - RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size); + RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); if (render_element_count > 0) { //clear render buffer - RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size); + RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, RD::BARRIER_MASK_RASTER); { //fill state uniform @@ -420,12 +422,12 @@ void ClusterBuilderRD::bake_cluster() { state.cluster_depth_offset = (render_element_max / 32); state.cluster_data_size = state.cluster_depth_offset + render_element_max; - RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state); + RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } //update instances - RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements); + RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); RENDER_TIMESTAMP("Render Elements"); @@ -469,7 +471,7 @@ void ClusterBuilderRD::bake_cluster() { RD::get_singleton()->draw_list_draw(draw_list, true, instances); i += instances; } - RD::get_singleton()->draw_list_end(); + RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_COMPUTE); } //store elements RENDER_TIMESTAMP("Pack Elements"); @@ -491,12 +493,15 @@ void ClusterBuilderRD::bake_cluster() { RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1); - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } + } else { + RD::get_singleton()->barrier(RD::BARRIER_MASK_TRANSFER, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } RENDER_TIMESTAMP("<Bake Cluster"); + RD::get_singleton()->draw_command_end_label(); } void ClusterBuilderRD::debug(ElementType p_element) { @@ -519,7 +524,7 @@ void ClusterBuilderRD::debug(ElementType p_element) { RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1); RD::get_singleton()->compute_list_end(); } diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp index f1bab19445..8269328597 100644 --- a/servers/rendering/renderer_rd/effects_rd.cpp +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -299,15 +299,12 @@ void EffectsRD::copy_to_rect(RID p_source_rd_texture, RID p_dest_texture, const copy.push_constant.target[0] = p_rect.position.x; copy.push_constant.target[1] = p_rect.position.y; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8_bit_dst ? COPY_MODE_SIMPLY_COPY_8BIT : COPY_MODE_SIMPLY_COPY]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -322,15 +319,12 @@ void EffectsRD::copy_cubemap_to_panorama(RID p_source_cube, RID p_dest_panorama, copy.push_constant.target[1] = 0; copy.push_constant.camera_z_far = p_lod; - int32_t x_groups = (p_panorama_size.width - 1) / 8 + 1; - int32_t y_groups = (p_panorama_size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_is_array ? COPY_MODE_CUBE_ARRAY_TO_PANORAMA : COPY_MODE_CUBE_TO_PANORAMA]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_cube), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_panorama), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_panorama_size.width, p_panorama_size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -349,15 +343,12 @@ void EffectsRD::copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_ copy.push_constant.camera_z_far = p_z_far; copy.push_constant.camera_z_near = p_z_near; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_LINEARIZE_DEPTH]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -374,15 +365,12 @@ void EffectsRD::copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_texture, copy.push_constant.target[0] = p_rect.position.x; copy.push_constant.target[1] = p_rect.position.y; - int32_t x_groups = (p_rect.size.width - 1) / 8 + 1; - int32_t y_groups = (p_rect.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_SIMPLY_COPY_DEPTH]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -400,14 +388,11 @@ void EffectsRD::set_color(RID p_dest_texture, const Color &p_color, const Rect2i copy.push_constant.set_color[2] = p_color.b; copy.push_constant.set_color[3] = p_color.a; - int32_t x_groups = (p_region.size.width - 1) / 8 + 1; - int32_t y_groups = (p_region.size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_SET_COLOR_8BIT : COPY_MODE_SET_COLOR]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -420,8 +405,6 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back copy.push_constant.section[2] = p_region.size.width; copy.push_constant.section[3] = p_region.size.height; - int32_t x_groups = (p_region.size.width - 1) / 8 + 1; - int32_t y_groups = (p_region.size.height - 1) / 8 + 1; //HORIZONTAL RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]); @@ -431,7 +414,7 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL; RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -442,7 +425,7 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back copy.push_constant.flags = base_flags; RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -452,9 +435,6 @@ void EffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const CopyMode copy_mode = p_first_pass && p_auto_exposure.is_valid() ? COPY_MODE_GAUSSIAN_GLOW_AUTO_EXPOSURE : COPY_MODE_GAUSSIAN_GLOW; uint32_t base_flags = 0; - int32_t x_groups = (p_size.width + 7) / 8; - int32_t y_groups = (p_size.height + 7) / 8; - copy.push_constant.section[2] = p_size.x; copy.push_constant.section[3] = p_size.y; @@ -479,16 +459,13 @@ void EffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const copy.push_constant.flags = base_flags | (p_first_pass ? COPY_FLAG_GLOW_FIRST_PASS : 0) | (p_high_quality ? COPY_FLAG_HIGH_QUALITY_GLOW : 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1); RD::get_singleton()->compute_list_end(); } void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - int32_t x_groups = (p_screen_size.width - 1) / 8 + 1; - int32_t y_groups = (p_screen_size.height - 1) / 8 + 1; - { //scale color and depth to half ssr_scale.push_constant.camera_z_far = p_camera.get_z_far(); ssr_scale.push_constant.camera_z_near = p_camera.get_z_near(); @@ -506,7 +483,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_scale.push_constant, sizeof(ScreenSpaceReflectionScalePushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); } @@ -547,7 +524,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R } RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 2); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); } if (p_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED) { @@ -585,7 +562,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -600,7 +577,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); } RD::get_singleton()->compute_list_end(); @@ -609,9 +586,6 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RenderingServer::SubSurfaceScatteringQuality p_quality) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - int32_t x_groups = (p_screen_size.width - 1) / 8 + 1; - int32_t y_groups = (p_screen_size.height - 1) / 8 + 1; - Plane p = p_camera.xform4(Plane(1, 0, -1, 1)); p.normal /= p.d; float unit_size = p.normal.x; @@ -635,7 +609,7 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -646,7 +620,7 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept sss.push_constant.vertical = true; RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -690,15 +664,12 @@ void EffectsRD::make_mipmap(RID p_source_rd_texture, RID p_dest_texture, const S copy.push_constant.section[2] = p_size.width; copy.push_constant.section[3] = p_size.height; - int32_t x_groups = (p_size.width - 1) / 8 + 1; - int32_t y_groups = (p_size.height - 1) / 8 + 1; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_MIPMAP]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©.push_constant, sizeof(CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1); RD::get_singleton()->compute_list_end(); } @@ -719,7 +690,7 @@ void EffectsRD::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffe RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(CopyToDPPushConstant)); RD::get_singleton()->draw_list_draw(draw_list, true); - RD::get_singleton()->draw_list_end(); + RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_TRANSFER); } void EffectsRD::tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings) { @@ -804,10 +775,7 @@ void EffectsRD::luminance_reduction(RID p_source_texture, const Size2i p_source_ RD::get_singleton()->compute_list_set_push_constant(compute_list, &luminance_reduce.push_constant, sizeof(LuminanceReducePushConstant)); - int32_t x_groups = (luminance_reduce.push_constant.source_size[0] - 1) / 8 + 1; - int32_t y_groups = (luminance_reduce.push_constant.source_size[1] - 1) / 8 + 1; - - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, luminance_reduce.push_constant.source_size[0], luminance_reduce.push_constant.source_size[1], 1); luminance_reduce.push_constant.source_size[0] = MAX(luminance_reduce.push_constant.source_size[0] / 8, 1); luminance_reduce.push_constant.source_size[1] = MAX(luminance_reduce.push_constant.source_size[1] / 8, 1); @@ -848,14 +816,12 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_texture), 1); - int32_t x_groups = (p_base_texture_size.x - 1) / 8 + 1; - int32_t y_groups = (p_base_texture_size.y - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[1] = p_base_texture_size.y; RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); if (p_bokeh_shape == RS::DOF_BOKEH_BOX || p_bokeh_shape == RS::DOF_BOKEH_HEXAGON) { @@ -872,8 +838,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); - x_groups = ((p_base_texture_size.x >> 1) - 1) / 8 + 1; - y_groups = ((p_base_texture_size.y >> 1) - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; bokeh.push_constant.half_size = true; @@ -887,7 +851,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //third pass @@ -903,7 +867,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); RD::get_singleton()->compute_list_add_barrier(compute_list); if (p_quality == RS::DOF_BLUR_QUALITY_VERY_LOW || p_quality == RS::DOF_BLUR_QUALITY_LOW) { @@ -914,8 +878,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture2), 1); - x_groups = (p_base_texture_size.x - 1) / 8 + 1; - y_groups = (p_base_texture_size.y - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[1] = p_base_texture_size.y; bokeh.push_constant.half_size = false; @@ -923,7 +885,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); } } else { //circle @@ -941,15 +903,13 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); - x_groups = ((p_base_texture_size.x >> 1) - 1) / 8 + 1; - y_groups = ((p_base_texture_size.y >> 1) - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; bokeh.push_constant.half_size = true; RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //circle is just one pass, then upscale @@ -961,8 +921,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture1), 1); - x_groups = (p_base_texture_size.x - 1) / 8 + 1; - y_groups = (p_base_texture_size.y - 1) / 8 + 1; bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[1] = p_base_texture_size.y; bokeh.push_constant.half_size = false; @@ -970,7 +928,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1); } RD::get_singleton()->compute_list_end(); @@ -995,10 +953,9 @@ void EffectsRD::gather_ssao(RD::ComputeListID p_compute_list, const Vector<RID> RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 2); RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); - int x_groups = ((p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - int y_groups = ((p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + Size2i size = Size2i(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); - RD::get_singleton()->compute_list_dispatch(p_compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(p_compute_list, size.x, size.y, 1); } RD::get_singleton()->compute_list_add_barrier(p_compute_list); } @@ -1072,10 +1029,9 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep } RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.downsample_push_constant, sizeof(SSAODownsamplePushConstant)); - int x_groups = (MAX(1, p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - int y_groups = (MAX(1, p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; + Size2i size(MAX(1, p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)), MAX(1, p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1))); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->draw_command_end_label(); // Downsample SSAO } @@ -1193,21 +1149,19 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_BASE]); gather_ssao(compute_list, p_ao_pong_slices, p_settings, true); //generate importance map - int x_groups = (p_settings.quarter_screen_size.x - 1) / 8 + 1; - int y_groups = (p_settings.quarter_screen_size.y - 1) / 8 + 1; RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GENERATE_IMPORTANCE_MAP]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //process importance map A RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPA]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map_pong), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); //process Importance Map B RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPB]); @@ -1215,7 +1169,7 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.counter_uniform_set, 2); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_ADAPTIVE]); @@ -1272,10 +1226,8 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep } RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); - int x_groups = ((p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - int y_groups = ((p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; - - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + Size2i size(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); } if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { @@ -1313,18 +1265,15 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.interleave_push_constant, sizeof(SSAOInterleavePushConstant)); - int x_groups = (p_settings.full_screen_size.x - 1) / 8 + 1; - int y_groups = (p_settings.full_screen_size.y - 1) / 8 + 1; - - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.full_screen_size.x, p_settings.full_screen_size.y, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->draw_command_end_label(); // Interleave } RD::get_singleton()->draw_command_end_label(); //SSAO - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_TRANSFER); //wait for upcoming transfer int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier } void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve) { @@ -1337,12 +1286,9 @@ void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_normal), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_roughness), 1); - int x_groups = (p_size.x - 1) / 8 + 1; - int y_groups = (p_size.y - 1) / 8 + 1; - RD::get_singleton()->compute_list_set_push_constant(compute_list, &roughness_limiter.push_constant, sizeof(RoughnessLimiterPushConstant)); //not used but set anyway - RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.x, p_size.y, 1); RD::get_singleton()->compute_list_end(); } @@ -1455,7 +1401,7 @@ void EffectsRD::render_sky(RD::DrawListID p_list, float p_time, RID p_fb, RID p_ RD::get_singleton()->draw_list_draw(draw_list, true); } -void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples) { +void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { ResolvePushConstant push_constant; push_constant.screen_size[0] = p_screen_size.x; push_constant.screen_size[1] = p_screen_size.y; @@ -1472,19 +1418,26 @@ void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RI RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(p_barrier); } void EffectsRD::reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RD::ComputeListID compute_list) { uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, (uint32_t)p_shrink_limit, 0, 0, 0 }; - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[SHADOW_REDUCE_REDUCE]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_source_shadow, p_dest_shadow), 0); + uint32_t height = p_source_rect.size.height; + if (true) { // subgroup support, @TODO must detect them + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[p_shrink_limit == 1 ? SHADOW_REDUCE_REDUCE_SUBGROUPS_8 : SHADOW_REDUCE_REDUCE_SUBGROUPS]); + height /= 2; //cause kernel is 8x4 + } else { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[SHADOW_REDUCE_REDUCE]); + } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_shadow), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, height, 1); } void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RenderingServer::EnvVolumetricFogShadowFilter p_filter, RD::ComputeListID compute_list, bool p_vertical, bool p_horizontal) { uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, 0, 0, 0, 0 }; @@ -1506,9 +1459,10 @@ void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i & if (p_vertical) { push_constant[6] = 1; push_constant[7] = 0; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_shadow, p_backing_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_backing_shadow), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1); } if (p_vertical && p_horizontal) { RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -1516,9 +1470,10 @@ void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i & if (p_horizontal) { push_constant[6] = 0; push_constant[7] = 1; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_backing_shadow, p_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_backing_shadow), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_shadow), 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1); } } @@ -2020,6 +1975,8 @@ EffectsRD::EffectsRD() { { Vector<String> shadow_reduce_modes; shadow_reduce_modes.push_back("\n#define MODE_REDUCE\n"); + shadow_reduce_modes.push_back("\n#define MODE_REDUCE_SUBGROUP\n"); + shadow_reduce_modes.push_back("\n#define MODE_REDUCE_SUBGROUP\n#define MODE_REDUCE_8\n"); shadow_reduce_modes.push_back("\n#define MODE_FILTER\n"); shadow_reduce.shader.initialize(shadow_reduce_modes); diff --git a/servers/rendering/renderer_rd/effects_rd.h b/servers/rendering/renderer_rd/effects_rd.h index 00309b4d0f..7ae5ea2f37 100644 --- a/servers/rendering/renderer_rd/effects_rd.h +++ b/servers/rendering/renderer_rd/effects_rd.h @@ -599,6 +599,8 @@ class EffectsRD { enum ShadowReduceMode { SHADOW_REDUCE_REDUCE, + SHADOW_REDUCE_REDUCE_SUBGROUPS, + SHADOW_REDUCE_REDUCE_SUBGROUPS_8, SHADOW_REDUCE_FILTER, SHADOW_REDUCE_MAX }; @@ -763,7 +765,7 @@ public: void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection); void sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RS::SubSurfaceScatteringQuality p_quality); - void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples); + void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL); void reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RenderingDevice::ComputeListID compute_list); void filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RS::EnvVolumetricFogShadowFilter p_filter, RenderingDevice::ComputeListID compute_list, bool p_vertical = true, bool p_horizontal = true); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp index a20a5073c3..509495680a 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp @@ -453,7 +453,7 @@ void RendererSceneRenderForward::MaterialData::update_parameters(const Map<Strin //check whether buffer changed if (p_uniform_dirty && ubo_data.size()) { update_uniform_buffer(shader_data->uniforms, shader_data->ubo_offsets.ptr(), p_parameters, ubo_data.ptrw(), ubo_data.size(), false); - RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw()); + RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw(), RD::BARRIER_MASK_RASTER); } uint32_t tex_uniform_count = shader_data->texture_uniforms.size(); @@ -810,10 +810,20 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList bool shadow_pass = (p_params->pass_mode == PASS_MODE_SHADOW) || (p_params->pass_mode == PASS_MODE_SHADOW_DP); - float old_offset[2] = { 0, 0 }; + SceneState::PushConstant push_constant; + + if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { + push_constant.uv_offset = Math::make_half_float(p_params->uv_offset.y) << 16; + push_constant.uv_offset |= Math::make_half_float(p_params->uv_offset.x); + } else { + push_constant.uv_offset = 0; + } for (uint32_t i = p_from_element; i < p_to_element; i++) { const GeometryInstanceSurfaceDataCache *surf = p_params->elements[i]; + const RenderElementInfo &element_info = p_params->element_info[i]; + + push_constant.base_index = i + p_params->element_offset; RID material_uniform_set; ShaderData *shader; @@ -834,13 +844,6 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList continue; } - if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { - old_offset[0] = surf->owner->push_constant.lightmap_uv_scale[0]; - old_offset[1] = surf->owner->push_constant.lightmap_uv_scale[1]; - surf->owner->push_constant.lightmap_uv_scale[0] = p_params->uv_offset.x; - surf->owner->push_constant.lightmap_uv_scale[1] = p_params->uv_offset.y; - } - //find cull variant ShaderData::CullVariant cull_variant; @@ -862,16 +865,16 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList switch (p_params->pass_mode) { case PASS_MODE_COLOR: case PASS_MODE_COLOR_TRANSPARENT: { - if (surf->sort.uses_lightmap) { + if (element_info.uses_lightmap) { shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS; - } else if (surf->sort.uses_forward_gi) { + } else if (element_info.uses_forward_gi) { shader_version = SHADER_VERSION_COLOR_PASS_WITH_FORWARD_GI; } else { shader_version = SHADER_VERSION_COLOR_PASS; } } break; case PASS_MODE_COLOR_SPECULAR: { - if (surf->sort.uses_lightmap) { + if (element_info.uses_lightmap) { shader_version = SHADER_VERSION_LIGHTMAP_COLOR_PASS_WITH_SEPARATE_SPECULAR; } else { shader_version = SHADER_VERSION_COLOR_PASS_WITH_SEPARATE_SPECULAR; @@ -913,31 +916,7 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format); } - if (p_params->screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(mesh_surface)) { - //lod - Vector3 support_min = surf->owner->transformed_aabb.get_support(-p_params->lod_plane.normal); - Vector3 support_max = surf->owner->transformed_aabb.get_support(p_params->lod_plane.normal); - - float distance_min = p_params->lod_plane.distance_to(support_min); - float distance_max = p_params->lod_plane.distance_to(support_max); - - float distance = 0.0; - - if (distance_min * distance_max < 0.0) { - //crossing plane - distance = 0.0; - } else if (distance_min >= 0.0) { - distance = distance_min; - } else if (distance_max <= 0.0) { - distance = -distance_max; - } - - index_array_rd = storage->mesh_surface_get_index_array_with_lod(mesh_surface, surf->owner->lod_model_scale * surf->owner->lod_bias, distance * p_params->lod_distance_multiplier, p_params->screen_lod_threshold); - - } else { - //no lod - index_array_rd = storage->mesh_surface_get_index_array(mesh_surface); - } + index_array_rd = storage->mesh_surface_get_index_array(mesh_surface, element_info.lod_index); if (prev_vertex_array_rd != vertex_array_rd) { RD::get_singleton()->draw_list_bind_vertex_array(draw_list, vertex_array_rd); @@ -974,14 +953,11 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList prev_material_uniform_set = material_uniform_set; } - RD::get_singleton()->draw_list_set_push_constant(draw_list, &surf->owner->push_constant, sizeof(GeometryInstanceForward::PushConstant)); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SceneState::PushConstant)); - RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), surf->owner->instance_count); - - if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) { - surf->owner->push_constant.lightmap_uv_scale[0] = old_offset[0]; - surf->owner->push_constant.lightmap_uv_scale[1] = old_offset[1]; - } + uint32_t instance_count = surf->owner->instance_count > 1 ? surf->owner->instance_count : element_info.repeat; + RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), instance_count); + i += element_info.repeat - 1; //skip equal elements } } @@ -1039,16 +1015,16 @@ void RendererSceneRenderForward::_render_list_with_threads(RenderListParameters thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RendererSceneRenderForward::_render_list_thread_function, p_params); - RD::get_singleton()->draw_list_end(); + RD::get_singleton()->draw_list_end(p_params->barrier); } else { //single threaded RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); _render_list(draw_list, fb_format, p_params, 0, p_params->element_count); - RD::get_singleton()->draw_list_end(); + RD::get_singleton()->draw_list_end(p_params->barrier); } } -void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) { +void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows, int p_index) { //CameraMatrix projection = p_cam_projection; //projection.flip_y(); // Vulkan and modern APIs use Y-Down CameraMatrix correction; @@ -1287,22 +1263,120 @@ void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_ren scene_state.ubo.roughness_limiter_amount = screen_space_roughness_limiter_get_amount(); scene_state.ubo.roughness_limiter_limit = screen_space_roughness_limiter_get_limit(); - RD::get_singleton()->buffer_update(scene_state.uniform_buffer, 0, sizeof(SceneState::UBO), &scene_state.ubo); + if (p_index >= (int)scene_state.uniform_buffers.size()) { + uint32_t from = scene_state.uniform_buffers.size(); + scene_state.uniform_buffers.resize(p_index + 1); + render_pass_uniform_sets.resize(p_index + 1); + for (uint32_t i = from; i < scene_state.uniform_buffers.size(); i++) { + scene_state.uniform_buffers[i] = RD::get_singleton()->uniform_buffer_create(sizeof(SceneState::UBO)); + } + } + RD::get_singleton()->buffer_update(scene_state.uniform_buffers[p_index], 0, sizeof(SceneState::UBO), &scene_state.ubo, RD::BARRIER_MASK_RASTER); +} + +void RendererSceneRenderForward::_update_instance_data_buffer(RenderListType p_render_list) { + if (scene_state.instance_data[p_render_list].size() > 0) { + if (scene_state.instance_buffer[p_render_list] == RID() || scene_state.instance_buffer_size[p_render_list] < scene_state.instance_data[p_render_list].size()) { + if (scene_state.instance_buffer[p_render_list] != RID()) { + RD::get_singleton()->free(scene_state.instance_buffer[p_render_list]); + } + uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), scene_state.instance_data[p_render_list].size())); + scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData)); + scene_state.instance_buffer_size[p_render_list] = new_size; + } + RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr(), RD::BARRIER_MASK_RASTER); + } +} +void RendererSceneRenderForward::_fill_instance_data(RenderListType p_render_list, uint32_t p_offset, int32_t p_max_elements, bool p_update_buffer) { + RenderList *rl = &render_list[p_render_list]; + uint32_t element_total = p_max_elements >= 0 ? uint32_t(p_max_elements) : rl->elements.size(); + + scene_state.instance_data[p_render_list].resize(p_offset + element_total); + rl->element_info.resize(p_offset + element_total); + + uint32_t repeats = 0; + GeometryInstanceSurfaceDataCache *prev_surface = nullptr; + for (uint32_t i = 0; i < element_total; i++) { + GeometryInstanceSurfaceDataCache *surface = rl->elements[i + p_offset]; + GeometryInstanceForward *inst = surface->owner; + + SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset]; + + if (inst->store_transform_cache) { + RendererStorageRD::store_transform(inst->transform, instance_data.transform); + } else { + RendererStorageRD::store_transform(Transform(), instance_data.transform); + } + + instance_data.flags = inst->flags_cache; + instance_data.gi_offset = inst->gi_offset_cache; + instance_data.layer_mask = inst->layer_mask; + instance_data.instance_uniforms_ofs = uint32_t(inst->shader_parameters_offset); + instance_data.lightmap_uv_scale[0] = inst->lightmap_uv_scale.position.x; + instance_data.lightmap_uv_scale[1] = inst->lightmap_uv_scale.position.y; + instance_data.lightmap_uv_scale[2] = inst->lightmap_uv_scale.size.x; + instance_data.lightmap_uv_scale[3] = inst->lightmap_uv_scale.size.y; + + bool cant_repeat = instance_data.flags & INSTANCE_DATA_FLAG_MULTIMESH || inst->mesh_instance.is_valid(); + + if (prev_surface != nullptr && !cant_repeat && prev_surface->sort.sort_key1 == surface->sort.sort_key1 && prev_surface->sort.sort_key2 == surface->sort.sort_key2) { + //this element is the same as the previous one, count repeats to draw it using instancing + repeats++; + } else { + if (repeats > 0) { + for (uint32_t j = 1; j <= repeats; j++) { + rl->element_info[p_offset + i - j].repeat = j; + } + } + repeats = 1; + } + + RenderElementInfo &element_info = rl->element_info[p_offset + i]; + + element_info.lod_index = surface->sort.lod_index; + element_info.uses_forward_gi = surface->sort.uses_forward_gi; + element_info.uses_lightmap = surface->sort.uses_lightmap; + + if (cant_repeat) { + prev_surface = nullptr; + } else { + prev_surface = surface; + } + } + + if (repeats > 0) { + for (uint32_t j = 1; j <= repeats; j++) { + rl->element_info[p_offset + element_total - j].repeat = j; + } + } + + if (p_update_buffer) { + _update_instance_data_buffer(p_render_list); + } } -void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi, bool p_using_opaque_gi) { - scene_state.used_sss = false; - scene_state.used_screen_texture = false; - scene_state.used_normal_texture = false; - scene_state.used_depth_texture = false; +void RendererSceneRenderForward::_fill_render_list(RenderListType p_render_list, const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi, bool p_using_opaque_gi, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, bool p_append) { + if (p_render_list == RENDER_LIST_OPAQUE) { + scene_state.used_sss = false; + scene_state.used_screen_texture = false; + scene_state.used_normal_texture = false; + scene_state.used_depth_texture = false; + } + uint32_t lightmap_captures_used = 0; Plane near_plane(p_cam_transform.origin, -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); near_plane.d += p_cam_projection.get_z_near(); float z_max = p_cam_projection.get_z_far() - p_cam_projection.get_z_near(); - uint32_t lightmap_captures_used = 0; + RenderList *rl = &render_list[p_render_list]; _update_dirty_geometry_instances(); - render_list.clear(); + + if (!p_append) { + rl->clear(); + if (p_render_list == RENDER_LIST_OPAQUE) { + render_list[RENDER_LIST_ALPHA].clear(); //opaque fills alpha too + } + } //fill list @@ -1318,7 +1392,7 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst bool uses_lightmap = false; bool uses_gi = false; - if (p_pass_mode == PASS_MODE_COLOR) { + if (p_render_list == RENDER_LIST_OPAQUE) { //setup GI if (inst->lightmap_instance.is_valid()) { @@ -1330,15 +1404,15 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst } } if (lightmap_cull_index >= 0) { - inst->push_constant.gi_offset &= 0xFFFF; - inst->push_constant.gi_offset |= lightmap_cull_index; + inst->gi_offset_cache = inst->lightmap_slice_index << 16; + inst->gi_offset_cache |= lightmap_cull_index; flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP; if (scene_state.lightmap_has_sh[lightmap_cull_index]) { flags |= INSTANCE_DATA_FLAG_USE_SH_LIGHTMAP; } uses_lightmap = true; } else { - inst->push_constant.gi_offset = 0xFFFFFFFF; + inst->gi_offset_cache = 0xFFFFFFFF; } } else if (inst->lightmap_sh) { @@ -1352,7 +1426,7 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst lcd.sh[j * 4 + 3] = src_capture[j].a; } flags |= INSTANCE_DATA_FLAG_USE_LIGHTMAP_CAPTURE; - inst->push_constant.gi_offset = lightmap_captures_used; + inst->gi_offset_cache = lightmap_captures_used; lightmap_captures_used++; uses_lightmap = true; } @@ -1379,7 +1453,7 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst SWAP(probe0_index, probe1_index); } - inst->push_constant.gi_offset = probe0_index | (probe1_index << 16); + inst->gi_offset_cache = probe0_index | (probe1_index << 16); flags |= INSTANCE_DATA_FLAG_USE_GIPROBE; uses_gi = true; } else { @@ -1387,11 +1461,11 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst flags |= INSTANCE_DATA_FLAG_USE_SDFGI; uses_gi = true; } - inst->push_constant.gi_offset = 0xFFFFFFFF; + inst->gi_offset_cache = 0xFFFFFFFF; } } } - inst->push_constant.flags = flags; + inst->flags_cache = flags; GeometryInstanceSurfaceDataCache *surf = inst->surface_caches; @@ -1399,12 +1473,39 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst surf->sort.uses_forward_gi = 0; surf->sort.uses_lightmap = 0; + // LOD + + if (p_screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(surf->surface)) { + //lod + Vector3 lod_support_min = inst->transformed_aabb.get_support(-p_lod_plane.normal); + Vector3 lod_support_max = inst->transformed_aabb.get_support(p_lod_plane.normal); + + float distance_min = p_lod_plane.distance_to(lod_support_min); + float distance_max = p_lod_plane.distance_to(lod_support_max); + + float distance = 0.0; + + if (distance_min * distance_max < 0.0) { + //crossing plane + distance = 0.0; + } else if (distance_min >= 0.0) { + distance = distance_min; + } else if (distance_max <= 0.0) { + distance = -distance_max; + } + + surf->sort.lod_index = storage->mesh_surface_get_lod(surf->surface, inst->lod_model_scale * inst->lod_bias, distance * p_lod_distance_multiplier, p_screen_lod_threshold); + } else { + surf->sort.lod_index = 0; + } + + // ADD Element if (p_pass_mode == PASS_MODE_COLOR) { if (surf->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE)) { - render_list.add_element(surf); + rl->add_element(surf); } if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA) { - render_list.add_alpha_element(surf); + render_list[RENDER_LIST_ALPHA].add_element(surf); if (uses_gi) { surf->sort.uses_forward_gi = 1; } @@ -1429,11 +1530,11 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst } else if (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) { if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW) { - render_list.add_element(surf); + rl->add_element(surf); } } else { if (surf->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE)) { - render_list.add_element(surf); + rl->add_element(surf); } } @@ -1443,8 +1544,8 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst } } - if (lightmap_captures_used) { - RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures); + if (p_render_list == RENDER_LIST_OPAQUE && lightmap_captures_used) { + RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures, RD::BARRIER_MASK_RASTER); } } @@ -1473,29 +1574,21 @@ void RendererSceneRenderForward::_setup_lightmaps(const PagedArray<RID> &p_light scene_state.lightmaps_used++; } if (scene_state.lightmaps_used > 0) { - RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps); + RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps, RD::BARRIER_MASK_RASTER); } } -void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { +void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) { RenderBufferDataForward *render_buffer = nullptr; if (p_render_buffer.is_valid()) { render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffer); } //first of all, make a new render pass - render_pass++; - //fill up ubo RENDER_TIMESTAMP("Setup 3D Scene"); - if (p_reflection_probe.is_valid()) { - scene_state.ubo.reflection_multiplier = 0.0; - } else { - scene_state.ubo.reflection_multiplier = 1.0; - } - float lod_distance_multiplier = p_cam_projection.get_lod_multiplier(); Plane lod_camera_plane(p_cam_transform.get_origin(), -p_cam_transform.basis.get_axis(Vector3::AXIS_Z)); @@ -1508,7 +1601,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf Vector2 vp_he = p_cam_projection.get_viewport_half_extents(); scene_state.ubo.viewport_size[0] = vp_he.x; scene_state.ubo.viewport_size[1] = vp_he.y; - scene_state.ubo.directional_light_count = p_directional_light_count; + scene_state.ubo.directional_light_count = 0; Size2i screen_size; RID opaque_framebuffer; @@ -1592,13 +1685,21 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf ERR_FAIL(); //bug? } + RD::get_singleton()->draw_command_begin_label("Render Setup"); + _setup_lightmaps(p_lightmaps, p_cam_transform); _setup_giprobes(p_gi_probes); _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); _update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example) - _fill_render_list(p_instances, PASS_MODE_COLOR, p_cam_projection, p_cam_transform, using_sdfgi, using_sdfgi || using_giprobe); + _fill_render_list(RENDER_LIST_OPAQUE, p_instances, PASS_MODE_COLOR, p_cam_projection, p_cam_transform, using_sdfgi, using_sdfgi || using_giprobe, lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + render_list[RENDER_LIST_OPAQUE].sort_by_key(); + render_list[RENDER_LIST_ALPHA].sort_by_depth(); + _fill_instance_data(RENDER_LIST_OPAQUE); + _fill_instance_data(RENDER_LIST_ALPHA); + + RD::get_singleton()->draw_command_end_label(); bool using_sss = !low_end && render_buffer && scene_state.used_sss && sub_surface_scattering_get_quality() != RS::SUB_SURFACE_SCATTERING_QUALITY_DISABLED; @@ -1682,8 +1783,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf clear_color = p_default_bg_color; } - render_list.sort_by_key(false); - bool debug_giprobes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_ALBEDO || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION; bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES; bool depth_pre_pass = !low_end && depth_framebuffer.is_valid(); @@ -1691,42 +1790,64 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf bool using_ssao = depth_pre_pass && p_render_buffer.is_valid() && p_environment.is_valid() && environment_is_ssao_enabled(p_environment); bool continue_depth = false; if (depth_pre_pass) { //depth pre pass - RENDER_TIMESTAMP("Render Depth Pre-Pass"); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + bool needs_pre_resolve = _needs_post_prepass_render(using_sdfgi || using_giprobe); + if (needs_pre_resolve) { + RENDER_TIMESTAMP("GI + Render Depth Pre-Pass (parallel)"); + } else { + RENDER_TIMESTAMP("Render Depth Pre-Pass"); + } + if (needs_pre_resolve) { + //pre clear the depth framebuffer, as AMD (and maybe others?) use compute for it, and barrier other compute shaders. + RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, depth_pass_clear); + RD::get_singleton()->draw_list_end(); + //start compute processes here, so they run at the same time as depth pre-pass + _post_prepass_render(using_sdfgi || using_giprobe); + } - bool finish_depth = using_ssao || using_sdfgi || using_giprobe; - RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); RD::get_singleton()->draw_command_begin_label("Render Depth Pre-Pass"); - _render_list_with_threads(&render_list_params, depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, depth_pass_clear); + + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + + bool finish_depth = using_ssao || using_sdfgi || using_giprobe; + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, depth_framebuffer, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, needs_pre_resolve ? Vector<Color>() : depth_pass_clear); + RD::get_singleton()->draw_command_end_label(); + + if (needs_pre_resolve) { + _pre_resolve_render(using_sdfgi || using_giprobe); + } + if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { RENDER_TIMESTAMP("Resolve Depth Pre-Pass"); - RD::get_singleton()->draw_command_insert_label("Resolve Depth Pre-Pass"); + RD::get_singleton()->draw_command_begin_label("Resolve Depth Pre-Pass"); if (depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS || depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE) { + if (needs_pre_resolve) { + RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, RD::BARRIER_MASK_COMPUTE); + } static int texture_samples[RS::VIEWPORT_MSAA_MAX] = { 1, 2, 4, 8, 16 }; storage->get_effects()->resolve_gi(render_buffer->depth_msaa, render_buffer->normal_roughness_buffer_msaa, using_giprobe ? render_buffer->giprobe_buffer_msaa : RID(), render_buffer->depth, render_buffer->normal_roughness_buffer, using_giprobe ? render_buffer->giprobe_buffer : RID(), Vector2i(render_buffer->width, render_buffer->height), texture_samples[render_buffer->msaa]); } else if (finish_depth) { RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth); } + RD::get_singleton()->draw_command_end_label(); } continue_depth = !finish_depth; } - if (using_ssao) { - _process_ssao(p_render_buffer, p_environment, render_buffer->normal_roughness_buffer, p_cam_projection); - } + _pre_opaque_render(using_ssao, using_sdfgi || using_giprobe, render_buffer ? render_buffer->normal_roughness_buffer : RID(), render_buffer ? render_buffer->giprobe_buffer : RID()); - if (using_sdfgi || using_giprobe) { - _process_gi(p_render_buffer, render_buffer->normal_roughness_buffer, render_buffer->giprobe_buffer, p_environment, p_cam_projection, p_cam_transform, p_gi_probes); - } + RD::get_singleton()->draw_command_begin_label("Render Opaque Pass"); + + scene_state.ubo.directional_light_count = _get_render_state_directional_light_count(); _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid()); RENDER_TIMESTAMP("Render Opaque Pass"); - RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss; bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss; @@ -1747,10 +1868,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } RID framebuffer = using_separate_specular ? opaque_specular_framebuffer : opaque_framebuffer; - RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); - RD::get_singleton()->draw_command_begin_label("Render Opaque Pass"); - _render_list_with_threads(&render_list_params, framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); - RD::get_singleton()->draw_command_end_label(); + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + _render_list_with_threads(&render_list_params, framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0); if (will_continue_color && using_separate_specular) { // close the specular framebuffer, as it's no longer used RD::get_singleton()->draw_list_begin(render_buffer->specular_only_fb, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE); @@ -1758,6 +1877,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } } + RD::get_singleton()->draw_command_end_label(); + if (debug_giprobes) { //debug giprobes bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only); @@ -1837,32 +1958,44 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf RENDER_TIMESTAMP("Render Transparent Pass"); - _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); + RD::get_singleton()->draw_command_begin_label("Render Transparent Pass"); - render_list.sort_by_reverse_depth_and_priority(true); + rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_ALPHA, p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps, true); + + _setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false); { - RD::get_singleton()->draw_command_begin_label("Render Transparent Pass"); - RenderListParameters render_list_params(&render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); + RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold); _render_list_with_threads(&render_list_params, alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); - RD::get_singleton()->draw_command_end_label(); } + RD::get_singleton()->draw_command_end_label(); + + RD::get_singleton()->draw_command_begin_label("Resolve"); + if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); } -} -void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end) { - RENDER_TIMESTAMP("Setup Rendering Shadow"); + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderForward::_render_shadow_begin() { + scene_state.shadow_passes.clear(); + RD::get_singleton()->draw_command_begin_label("Shadow Setup"); _update_render_base_uniform_set(); - render_pass++; + render_list[RENDER_LIST_SECONDARY].clear(); + scene_state.instance_data[RENDER_LIST_SECONDARY].clear(); +} +void RendererSceneRenderForward::_render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end) { + uint32_t shadow_pass_index = scene_state.shadow_passes.size(); + + SceneState::ShadowPass shadow_pass; scene_state.ubo.dual_paraboloid_side = p_use_dp_flip ? -1 : 1; - _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), !p_flip_y, Color(), 0, p_zfar, false, p_use_pancake); + _setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), !p_flip_y, Color(), 0, p_zfar, false, p_use_pancake, shadow_pass_index); if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) { p_screen_lod_threshold = 0.0; @@ -1870,13 +2003,11 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr PassMode pass_mode = p_use_dp ? PASS_MODE_SHADOW_DP : PASS_MODE_SHADOW; - _fill_render_list(p_instances, pass_mode, p_projection, p_transform); - - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); - - RENDER_TIMESTAMP("Render Shadow"); - - render_list.sort_by_key(false); + uint32_t render_list_from = render_list[RENDER_LIST_SECONDARY].elements.size(); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_projection, p_transform, false, false, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, true); + uint32_t render_list_size = render_list[RENDER_LIST_SECONDARY].elements.size() - render_list_from; + render_list[RENDER_LIST_SECONDARY].sort_by_key_range(render_list_from, render_list_size); + _fill_instance_data(RENDER_LIST_SECONDARY, render_list_from, render_list_size, false); { //regular forward for now @@ -1884,49 +2015,87 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr if (p_flip_y) { flip_cull = !flip_cull; } - RD::get_singleton()->draw_command_begin_label("Render Shadow"); - RenderListParameters render_list_params(render_list.elements, render_list.element_count, flip_cull, pass_mode, true, rp_uniform_set, false, Vector2(), p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); - _render_list_with_threads(&render_list_params, p_framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, p_begin ? (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION : RD::INITIAL_ACTION_CLEAR) : RD::INITIAL_ACTION_CONTINUE, p_end ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, Vector<Color>(), 1.0, 0, p_rect); - RD::get_singleton()->draw_command_end_label(); + + shadow_pass.element_from = render_list_from; + shadow_pass.element_count = render_list_size; + shadow_pass.flip_cull = flip_cull; + shadow_pass.pass_mode = pass_mode; + + shadow_pass.rp_uniform_set = RID(); //will be filled later when instance buffer is complete + shadow_pass.camera_plane = p_camera_plane; + shadow_pass.screen_lod_threshold = p_screen_lod_threshold; + shadow_pass.lod_distance_multiplier = p_lod_distance_multiplier; + + shadow_pass.framebuffer = p_framebuffer; + shadow_pass.initial_depth_action = p_begin ? (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION : RD::INITIAL_ACTION_CLEAR) : (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION_CONTINUE : RD::INITIAL_ACTION_CONTINUE); + shadow_pass.final_depth_action = p_end ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE; + shadow_pass.rect = p_rect; + + scene_state.shadow_passes.push_back(shadow_pass); } } +void RendererSceneRenderForward::_render_shadow_process() { + _update_instance_data_buffer(RENDER_LIST_SECONDARY); + //render shadows one after the other, so this can be done un-barriered and the driver can optimize (as well as allow us to run compute at the same time) + + for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) { + //render passes need to be configured after instance buffer is done, since they need the latest version + SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i]; + shadow_pass.rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>(), false, i); + } + + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderForward::_render_shadow_end(uint32_t p_barrier) { + RD::get_singleton()->draw_command_begin_label("Shadow Render"); + + for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) { + SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i]; + RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, true, shadow_pass.rp_uniform_set, false, Vector2(), shadow_pass.camera_plane, shadow_pass.lod_distance_multiplier, shadow_pass.screen_lod_threshold, shadow_pass.element_from, RD::BARRIER_MASK_NO_BARRIER); + _render_list_with_threads(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, shadow_pass.final_depth_action, Vector<Color>(), 1.0, 0, shadow_pass.rect); + } + + if (p_barrier != RD::BARRIER_MASK_NO_BARRIER) { + RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, p_barrier); + } + RD::get_singleton()->draw_command_end_label(); +} + void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray<GeometryInstance *> &p_instances) { RENDER_TIMESTAMP("Setup Render Collider Heightfield"); - _update_render_base_uniform_set(); - - render_pass++; + RD::get_singleton()->draw_command_begin_label("Render Collider Heightfield"); + _update_render_base_uniform_set(); scene_state.ubo.dual_paraboloid_side = 0; _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false); PassMode pass_mode = PASS_MODE_SHADOW; - _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_cam_projection, p_cam_transform); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Collider Heightfield"); - render_list.sort_by_key(false); - { //regular forward for now - RD::get_singleton()->draw_command_begin_label("Render Collider Heightfield"); - RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, pass_mode, true, rp_uniform_set); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, pass_mode, true, rp_uniform_set); _render_list_with_threads(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); - RD::get_singleton()->draw_command_end_label(); } + RD::get_singleton()->draw_command_end_label(); } void RendererSceneRenderForward::_render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { RENDER_TIMESTAMP("Setup Rendering Material"); - _update_render_base_uniform_set(); + RD::get_singleton()->draw_command_begin_label("Render Material"); - render_pass++; + _update_render_base_uniform_set(); scene_state.ubo.dual_paraboloid_side = 0; scene_state.ubo.material_uv2_mode = false; @@ -1934,16 +2103,16 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo _setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; - _fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, p_cam_projection, p_cam_transform); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); - render_list.sort_by_key(false); - { - RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set); //regular forward for now Vector<Color> clear; clear.push_back(Color(0, 0, 0, 0)); @@ -1955,14 +2124,16 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); RD::get_singleton()->draw_list_end(); } + + RD::get_singleton()->draw_command_end_label(); } void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) { RENDER_TIMESTAMP("Setup Rendering UV2"); - _update_render_base_uniform_set(); + RD::get_singleton()->draw_command_begin_label("Render UV2"); - render_pass++; + _update_render_base_uniform_set(); scene_state.ubo.dual_paraboloid_side = 0; scene_state.ubo.material_uv2_mode = true; @@ -1970,16 +2141,16 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance * _setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL; - _fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform()); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, CameraMatrix(), Transform()); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); - RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); + RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>()); RENDER_TIMESTAMP("Render Material"); - render_list.sort_by_key(false); - { - RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, true); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set, true); //regular forward for now Vector<Color> clear; clear.push_back(Color(0, 0, 0, 0)); @@ -2015,23 +2186,24 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance * RD::get_singleton()->draw_list_end(); } + + RD::get_singleton()->draw_command_end_label(); } void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) { RENDER_TIMESTAMP("Render SDFGI"); + RD::get_singleton()->draw_command_begin_label("Render SDFGI Voxel"); + _update_render_base_uniform_set(); RenderBufferDataForward *render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffers); ERR_FAIL_COND(!render_buffer); - render_pass++; - PassMode pass_mode = PASS_MODE_SDF; - _fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform()); - render_list.sort_by_key(false); - - RID rp_uniform_set = _setup_sdfgi_render_pass_uniform_set(p_albedo_texture, p_emission_texture, p_emission_aniso_texture, p_geom_facing_texture); + _fill_render_list(RENDER_LIST_SECONDARY, p_instances, pass_mode, CameraMatrix(), Transform()); + render_list[RENDER_LIST_SECONDARY].sort_by_key(); + _fill_instance_data(RENDER_LIST_SECONDARY); Vector3 half_extents = p_bounds.size * 0.5; Vector3 center = p_bounds.position + half_extents; @@ -2084,15 +2256,19 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto _setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0); + RID rp_uniform_set = _setup_sdfgi_render_pass_uniform_set(p_albedo_texture, p_emission_texture, p_emission_aniso_texture, p_geom_facing_texture); + Map<Size2i, RID>::Element *E = sdfgi_framebuffer_size_cache.find(fb_size); if (!E) { RID fb = RD::get_singleton()->framebuffer_create_empty(fb_size); E = sdfgi_framebuffer_size_cache.insert(fb_size, fb); } - RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, true, rp_uniform_set, false); _render_list_with_threads(&render_list_params, E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, Rect2(), sbs); } + + RD::get_singleton()->draw_command_end_label(); } void RendererSceneRenderForward::_base_uniforms_changed() { @@ -2144,21 +2320,13 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { { RD::Uniform u; u.binding = 3; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.ids.push_back(scene_state.uniform_buffer); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_omni_light_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 5; + u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_spot_light_buffer()); uniforms.push_back(u); @@ -2166,35 +2334,35 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { { RD::Uniform u; - u.binding = 6; + u.binding = 5; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_reflection_probe_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 7; + u.binding = 6; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.ids.push_back(get_directional_light_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 8; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(scene_state.lightmap_buffer); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 9; + u.binding = 8; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(scene_state.lightmap_capture_buffer); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 10; + u.binding = 9; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID decal_atlas = storage->decal_atlas_get_texture(); u.ids.push_back(decal_atlas); @@ -2202,7 +2370,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } { RD::Uniform u; - u.binding = 11; + u.binding = 10; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID decal_atlas = storage->decal_atlas_get_texture_srgb(); u.ids.push_back(decal_atlas); @@ -2210,7 +2378,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } { RD::Uniform u; - u.binding = 12; + u.binding = 11; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.ids.push_back(get_decal_buffer()); uniforms.push_back(u); @@ -2219,7 +2387,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 13; + u.binding = 12; u.ids.push_back(storage->global_variables_get_storage_buffer()); uniforms.push_back(u); } @@ -2227,7 +2395,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { if (!low_end) { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 14; + u.binding = 13; u.ids.push_back(sdfgi_get_ubo()); uniforms.push_back(u); } @@ -2236,10 +2404,9 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() { } } -RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas) { - if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) { - RD::get_singleton()->free(render_pass_uniform_set); - } +RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RenderListType p_render_list, RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas, int p_index) { + //there should always be enough uniform buffers for render passes, otherwise bugs + ERR_FAIL_INDEX_V(p_index, (int)scene_state.uniform_buffers.size(), RID()); RenderBufferDataForward *rb = nullptr; if (p_render_buffers.is_valid()) { @@ -2251,6 +2418,24 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff Vector<RD::Uniform> uniforms; { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(scene_state.uniform_buffers[p_index]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + RID instance_buffer = scene_state.instance_buffer[p_render_list]; + if (instance_buffer == RID()) { + instance_buffer = default_vec4_xform_buffer; // any buffer will do since its not used + } + u.ids.push_back(instance_buffer); + uniforms.push_back(u); + } + { RID radiance_texture; if (p_radiance_texture.is_valid()) { radiance_texture = p_radiance_texture; @@ -2258,7 +2443,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff radiance_texture = storage->texture_rd_get_default(is_using_radiance_cubemap_array() ? RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK : RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK); } RD::Uniform u; - u.binding = 0; + u.binding = 2; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.push_back(radiance_texture); uniforms.push_back(u); @@ -2267,7 +2452,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RID ref_texture = p_reflection_atlas.is_valid() ? reflection_atlas_get_texture(p_reflection_atlas) : RID(); RD::Uniform u; - u.binding = 1; + u.binding = 3; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (ref_texture.is_valid()) { u.ids.push_back(ref_texture); @@ -2279,7 +2464,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 2; + u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture; if (p_shadow_atlas.is_valid()) { @@ -2293,7 +2478,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 3; + u.binding = 5; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (p_use_directional_shadow_atlas && directional_shadow_get_texture().is_valid()) { u.ids.push_back(directional_shadow_get_texture()); @@ -2304,7 +2489,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 4; + u.binding = 6; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(scene_state.max_lightmaps); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); @@ -2323,7 +2508,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 5; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(MAX_GI_PROBES); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); @@ -2344,7 +2529,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 6; + u.binding = 8; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; RID cb = p_cluster_buffer.is_valid() ? p_cluster_buffer : default_vec4_xform_buffer; u.ids.push_back(cb); @@ -2353,7 +2538,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 7; + u.binding = 9; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = (false && rb && rb->depth.is_valid()) ? rb->depth : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); @@ -2361,7 +2546,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 8; + u.binding = 10; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID bbt = rb ? render_buffers_get_back_buffer_texture(p_render_buffers) : RID(); RID texture = bbt.is_valid() ? bbt : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2372,7 +2557,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff if (!low_end) { { RD::Uniform u; - u.binding = 9; + u.binding = 11; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = rb && rb->normal_roughness_buffer.is_valid() ? rb->normal_roughness_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_NORMAL); u.ids.push_back(texture); @@ -2381,7 +2566,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 10; + u.binding = 12; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID aot = rb ? render_buffers_get_ao_texture(p_render_buffers) : RID(); RID texture = aot.is_valid() ? aot : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2391,7 +2576,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 11; + u.binding = 13; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID ambient_buffer = p_render_buffers.is_valid() ? render_buffers_get_gi_ambient_texture(p_render_buffers) : RID(); RID texture = ambient_buffer.is_valid() ? ambient_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2401,7 +2586,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff { RD::Uniform u; - u.binding = 12; + u.binding = 14; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID reflection_buffer = p_render_buffers.is_valid() ? render_buffers_get_gi_reflection_texture(p_render_buffers) : RID(); RID texture = reflection_buffer.is_valid() ? reflection_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK); @@ -2410,7 +2595,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 13; + u.binding = 15; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID t; if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { @@ -2423,7 +2608,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 14; + u.binding = 16; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) { u.ids.push_back(render_buffers_get_sdfgi_occlusion_texture(p_render_buffers)); @@ -2434,14 +2619,14 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } { RD::Uniform u; - u.binding = 15; + u.binding = 17; u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; u.ids.push_back(rb ? render_buffers_get_gi_probe_buffer(p_render_buffers) : render_buffers_get_default_gi_probe_buffer()); uniforms.push_back(u); } { RD::Uniform u; - u.binding = 16; + u.binding = 18; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID vfog = RID(); if (rb && render_buffers_has_volumetric_fog(p_render_buffers)) { @@ -2457,8 +2642,16 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff } } - render_pass_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_PASS_UNIFORM_SET); - return render_pass_uniform_set; + if (p_index >= (int)render_pass_uniform_sets.size()) { + render_pass_uniform_sets.resize(p_index + 1); + } + + if (render_pass_uniform_sets[p_index].is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_sets[p_index])) { + RD::get_singleton()->free(render_pass_uniform_sets[p_index]); + } + + render_pass_uniform_sets[p_index] = RD::get_singleton()->uniform_set_create(uniforms, default_shader_rd, RENDER_PASS_UNIFORM_SET); + return render_pass_uniform_sets[p_index]; } RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture) { @@ -2469,10 +2662,24 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed Vector<RD::Uniform> uniforms; { + RD::Uniform u; + u.binding = 0; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.ids.push_back(scene_state.uniform_buffers[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.ids.push_back(scene_state.instance_buffer[RENDER_LIST_SECONDARY]); + uniforms.push_back(u); + } + { // No radiance texture. RID radiance_texture = storage->texture_rd_get_default(is_using_radiance_cubemap_array() ? RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK : RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_BLACK); RD::Uniform u; - u.binding = 0; + u.binding = 2; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.push_back(radiance_texture); uniforms.push_back(u); @@ -2482,7 +2689,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed // No reflection atlas. RID ref_texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_CUBEMAP_ARRAY_BLACK); RD::Uniform u; - u.binding = 1; + u.binding = 3; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.push_back(ref_texture); uniforms.push_back(u); @@ -2491,7 +2698,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { // No shadow atlas. RD::Uniform u; - u.binding = 2; + u.binding = 4; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); @@ -2501,7 +2708,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { // No directional shadow atlas. RD::Uniform u; - u.binding = 3; + u.binding = 5; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; RID texture = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE); u.ids.push_back(texture); @@ -2511,7 +2718,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { // No Lightmaps RD::Uniform u; - u.binding = 4; + u.binding = 6; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(scene_state.max_lightmaps); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE); @@ -2525,7 +2732,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { // No GIProbes RD::Uniform u; - u.binding = 5; + u.binding = 7; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.ids.resize(MAX_GI_PROBES); RID default_tex = storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_3D_WHITE); @@ -2538,7 +2745,7 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { RD::Uniform u; - u.binding = 6; + u.binding = 8; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; RID cb = default_vec4_xform_buffer; u.ids.push_back(cb); @@ -2550,28 +2757,28 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 7; + u.binding = 9; u.ids.push_back(p_albedo_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 8; + u.binding = 10; u.ids.push_back(p_emission_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 9; + u.binding = 11; u.ids.push_back(p_emission_aniso_texture); uniforms.push_back(u); } { RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 10; + u.binding = 12; u.ids.push_back(p_geom_facing_texture); uniforms.push_back(u); } @@ -2709,10 +2916,11 @@ void RendererSceneRenderForward::_geometry_instance_add_surface_with_material(Ge sdcache->sort.sort_key1 = 0; sdcache->sort.sort_key2 = 0; - sdcache->sort.surface_type = ginstance->data->base_type; - sdcache->sort.material_id = p_material_id; + sdcache->sort.surface_index = p_surface; + sdcache->sort.material_id_low = p_material_id & 0x3FFF; + sdcache->sort.material_id_hi = p_material_id >> 14; sdcache->sort.shader_id = p_shader_id; - sdcache->sort.geometry_id = p_mesh.get_local_index(); + sdcache->sort.geometry_id = p_mesh.get_local_index(); //only meshes can repeat anyway sdcache->sort.uses_forward_gi = ginstance->can_sdfgi; sdcache->sort.priority = p_material->priority; } @@ -2842,11 +3050,6 @@ void RendererSceneRenderForward::_geometry_instance_update(GeometryInstance *p_g //Fill push constant - ginstance->push_constant.instance_uniforms_ofs = ginstance->data->shader_parameters_offset >= 0 ? ginstance->data->shader_parameters_offset : 0; - ginstance->push_constant.layer_mask = ginstance->data->layer_mask; - ginstance->push_constant.flags = 0; - ginstance->push_constant.gi_offset = 0xFFFFFFFF; //disabled - bool store_transform = true; if (ginstance->data->base_type == RS::INSTANCE_MULTIMESH) { @@ -2903,21 +3106,10 @@ void RendererSceneRenderForward::_geometry_instance_update(GeometryInstance *p_g } } - if (store_transform) { - RendererStorageRD::store_transform(ginstance->data->transform, ginstance->push_constant.transform); - } else { - RendererStorageRD::store_transform(Transform(), ginstance->push_constant.transform); - } - + ginstance->store_transform_cache = store_transform; ginstance->can_sdfgi = false; - if (lightmap_instance_is_valid(ginstance->lightmap_instance)) { - ginstance->push_constant.gi_offset = ginstance->data->lightmap_slice_index << 16; - ginstance->push_constant.lightmap_uv_scale[0] = ginstance->data->lightmap_uv_scale.position.x; - ginstance->push_constant.lightmap_uv_scale[1] = ginstance->data->lightmap_uv_scale.position.y; - ginstance->push_constant.lightmap_uv_scale[2] = ginstance->data->lightmap_uv_scale.size.width; - ginstance->push_constant.lightmap_uv_scale[3] = ginstance->data->lightmap_uv_scale.size.height; - } else if (!low_end) { + if (!lightmap_instance_is_valid(ginstance->lightmap_instance) && !low_end) { if (ginstance->gi_probes[0].is_null() && (ginstance->data->use_baked_light || ginstance->data->use_dynamic_gi)) { ginstance->can_sdfgi = true; } @@ -3007,8 +3199,7 @@ void RendererSceneRenderForward::geometry_instance_set_mesh_instance(GeometryIns void RendererSceneRenderForward::geometry_instance_set_transform(GeometryInstance *p_geometry_instance, const Transform &p_transform, const AABB &p_aabb, const AABB &p_transformed_aabb) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); ERR_FAIL_COND(!ginstance); - RendererStorageRD::store_transform(p_transform, ginstance->push_constant.transform); - ginstance->data->transform = p_transform; + ginstance->transform = p_transform; ginstance->mirror = p_transform.basis.determinant() < 0; ginstance->data->aabb = p_aabb; ginstance->transformed_aabb = p_transformed_aabb; @@ -3043,8 +3234,8 @@ void RendererSceneRenderForward::geometry_instance_set_use_lightmap(GeometryInst GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); ERR_FAIL_COND(!ginstance); ginstance->lightmap_instance = p_lightmap_instance; - ginstance->data->lightmap_uv_scale = p_lightmap_uv_scale; - ginstance->data->lightmap_slice_index = p_lightmap_slice_index; + ginstance->lightmap_uv_scale = p_lightmap_uv_scale; + ginstance->lightmap_slice_index = p_lightmap_slice_index; _geometry_instance_mark_dirty(ginstance); } void RendererSceneRenderForward::geometry_instance_set_lightmap_capture(GeometryInstance *p_geometry_instance, const Color *p_sh9) { @@ -3067,7 +3258,7 @@ void RendererSceneRenderForward::geometry_instance_set_lightmap_capture(Geometry void RendererSceneRenderForward::geometry_instance_set_instance_shader_parameters_offset(GeometryInstance *p_geometry_instance, int32_t p_offset) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); ERR_FAIL_COND(!ginstance); - ginstance->data->shader_parameters_offset = p_offset; + ginstance->shader_parameters_offset = p_offset; _geometry_instance_mark_dirty(ginstance); } void RendererSceneRenderForward::geometry_instance_set_cast_double_sided_shadows(GeometryInstance *p_geometry_instance, bool p_enable) { @@ -3081,8 +3272,7 @@ void RendererSceneRenderForward::geometry_instance_set_cast_double_sided_shadows void RendererSceneRenderForward::geometry_instance_set_layer_mask(GeometryInstance *p_geometry_instance, uint32_t p_layer_mask) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_geometry_instance); ERR_FAIL_COND(!ginstance); - ginstance->data->layer_mask = p_layer_mask; - ginstance->push_constant.layer_mask = p_layer_mask; + ginstance->layer_mask = p_layer_mask; } void RendererSceneRenderForward::geometry_instance_free(GeometryInstance *p_geometry_instance) { @@ -3114,7 +3304,7 @@ void RendererSceneRenderForward::geometry_instance_pair_decal_instances(Geometry Transform RendererSceneRenderForward::geometry_instance_get_transform(GeometryInstance *p_instance) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_instance); ERR_FAIL_COND_V(!ginstance, Transform()); - return ginstance->data->transform; + return ginstance->transform; } AABB RendererSceneRenderForward::geometry_instance_get_aabb(GeometryInstance *p_instance) { GeometryInstanceForward *ginstance = static_cast<GeometryInstanceForward *>(p_instance); @@ -3382,13 +3572,6 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor shader.compiler.initialize(actions); } - //render list - render_list.max_elements = GLOBAL_DEF_RST("rendering/limits/rendering/max_renderable_elements", (int)128000); - render_list.init(); - render_pass = 0; - - scene_state.uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SceneState::UBO)); - { //default material and shader default_shader = storage->shader_create(); @@ -3442,8 +3625,10 @@ RendererSceneRenderForward::~RendererSceneRenderForward() { directional_shadow_atlas_set_size(0); //clear base uniform set if still valid - if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) { - RD::get_singleton()->free(render_pass_uniform_set); + for (uint32_t i = 0; i < render_pass_uniform_sets.size(); i++) { + if (render_pass_uniform_sets[i].is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_sets[i])) { + RD::get_singleton()->free(render_pass_uniform_sets[i]); + } } if (sdfgi_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(sdfgi_pass_uniform_set)) { @@ -3462,9 +3647,16 @@ RendererSceneRenderForward::~RendererSceneRenderForward() { storage->free(default_material); { - RD::get_singleton()->free(scene_state.uniform_buffer); + for (uint32_t i = 0; i < scene_state.uniform_buffers.size(); i++) { + RD::get_singleton()->free(scene_state.uniform_buffers[i]); + } RD::get_singleton()->free(scene_state.lightmap_buffer); RD::get_singleton()->free(scene_state.lightmap_capture_buffer); + for (uint32_t i = 0; i < RENDER_LIST_MAX; i++) { + if (scene_state.instance_buffer[i] != RID()) { + RD::get_singleton()->free(scene_state.instance_buffer[i]); + } + } memdelete_arr(scene_state.lightmap_captures); } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.h b/servers/rendering/renderer_rd/renderer_scene_render_forward.h index 0b57c7f76c..af78c50fda 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.h @@ -50,6 +50,15 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { MAX_GI_PROBES = 8, MAX_LIGHTMAPS = 8, MAX_GI_PROBES_PER_INSTANCE = 2, + INSTANCE_DATA_BUFFER_MIN_SIZE = 4096 + }; + + enum RenderListType { + RENDER_LIST_OPAQUE, //used for opaque objects + RENDER_LIST_ALPHA, //used for transparent objects + RENDER_LIST_SECONDARY, //used for shadows and other objects + RENDER_LIST_MAX + }; /* Scene Shader */ @@ -245,7 +254,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RID shadow_sampler; RID render_base_uniform_set; - RID render_pass_uniform_set; + LocalVector<RID> render_pass_uniform_sets; RID sdfgi_pass_uniform_set; uint64_t lightmap_texture_array_version = 0xFFFFFFFF; @@ -257,7 +266,58 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { void _update_render_base_uniform_set(); RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture); - RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas = false); + RID _setup_render_pass_uniform_set(RenderListType p_render_list, RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas = false, int p_index = 0); + + enum PassMode { + PASS_MODE_COLOR, + PASS_MODE_COLOR_SPECULAR, + PASS_MODE_COLOR_TRANSPARENT, + PASS_MODE_SHADOW, + PASS_MODE_SHADOW_DP, + PASS_MODE_DEPTH, + PASS_MODE_DEPTH_NORMAL_ROUGHNESS, + PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE, + PASS_MODE_DEPTH_MATERIAL, + PASS_MODE_SDF, + }; + + struct GeometryInstanceSurfaceDataCache; + struct RenderElementInfo; + + struct RenderListParameters { + GeometryInstanceSurfaceDataCache **elements = nullptr; + RenderElementInfo *element_info = nullptr; + int element_count = 0; + bool reverse_cull = false; + PassMode pass_mode = PASS_MODE_COLOR; + bool no_gi = false; + RID render_pass_uniform_set; + bool force_wireframe = false; + Vector2 uv_offset; + Plane lod_plane; + float lod_distance_multiplier = 0.0; + float screen_lod_threshold = 0.0; + RD::FramebufferFormatID framebuffer_format = 0; + uint32_t element_offset = 0; + uint32_t barrier = RD::BARRIER_MASK_ALL; + + RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, uint32_t p_element_offset = 0, uint32_t p_barrier = RD::BARRIER_MASK_ALL) { + elements = p_elements; + element_info = p_element_info; + element_count = p_element_count; + reverse_cull = p_reverse_cull; + pass_mode = p_pass_mode; + no_gi = p_no_gi; + render_pass_uniform_set = p_render_pass_uniform_set; + force_wireframe = p_force_wireframe; + uv_offset = p_uv_offset; + lod_plane = p_lod_plane; + lod_distance_multiplier = p_lod_distance_multiplier; + screen_lod_threshold = p_screen_lod_threshold; + element_offset = p_element_offset; + barrier = p_barrier; + } + }; struct LightmapData { float normal_xform[12]; @@ -367,9 +427,24 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { uint32_t pancake_shadows; }; + struct PushConstant { + uint32_t base_index; // + uint32_t uv_offset; //packed + uint32_t pad[2]; + }; + + struct InstanceData { + float transform[16]; + uint32_t flags; + uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables + uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint32_t layer_mask; + float lightmap_uv_scale[4]; + }; + UBO ubo; - RID uniform_buffer; + LocalVector<RID> uniform_buffers; LightmapData lightmaps[MAX_LIGHTMAPS]; RID lightmap_ids[MAX_LIGHTMAPS]; @@ -378,6 +453,10 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { uint32_t max_lightmaps; RID lightmap_buffer; + RID instance_buffer[RENDER_LIST_MAX]; + uint32_t instance_buffer_size[RENDER_LIST_MAX] = { 0, 0, 0 }; + LocalVector<InstanceData> instance_data[RENDER_LIST_MAX]; + LightmapCaptureData *lightmap_captures; uint32_t max_lightmap_captures; RID lightmap_capture_buffer; @@ -390,10 +469,29 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { bool used_depth_texture = false; bool used_sss = false; + struct ShadowPass { + uint32_t element_from; + uint32_t element_count; + bool flip_cull; + PassMode pass_mode; + + RID rp_uniform_set; + Plane camera_plane; + float lod_distance_multiplier; + float screen_lod_threshold; + + RID framebuffer; + RD::InitialAction initial_depth_action; + RD::FinalAction final_depth_action; + Rect2i rect; + }; + + LocalVector<ShadowPass> shadow_passes; + } scene_state; static RendererSceneRenderForward *singleton; - uint64_t render_pass; + double time; RID default_shader; RID default_material; @@ -407,51 +505,15 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RID default_vec4_xform_buffer; RID default_vec4_xform_uniform_set; - enum PassMode { - PASS_MODE_COLOR, - PASS_MODE_COLOR_SPECULAR, - PASS_MODE_COLOR_TRANSPARENT, - PASS_MODE_SHADOW, - PASS_MODE_SHADOW_DP, - PASS_MODE_DEPTH, - PASS_MODE_DEPTH_NORMAL_ROUGHNESS, - PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE, - PASS_MODE_DEPTH_MATERIAL, - PASS_MODE_SDF, - }; - - void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false); + void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false, int p_index = 0); void _setup_giprobes(const PagedArray<RID> &p_giprobes); void _setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform); - struct GeometryInstanceSurfaceDataCache; - - struct RenderListParameters { - GeometryInstanceSurfaceDataCache **elements = nullptr; - int element_count = 0; - bool reverse_cull = false; - PassMode pass_mode = PASS_MODE_COLOR; - bool no_gi = false; - RID render_pass_uniform_set; - bool force_wireframe = false; - Vector2 uv_offset; - Plane lod_plane; - float lod_distance_multiplier = 0.0; - float screen_lod_threshold = 0.0; - RD::FramebufferFormatID framebuffer_format = 0; - RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) { - elements = p_elements; - element_count = p_element_count; - reverse_cull = p_reverse_cull; - pass_mode = p_pass_mode; - no_gi = p_no_gi; - render_pass_uniform_set = p_render_pass_uniform_set; - force_wireframe = p_force_wireframe; - uv_offset = p_uv_offset; - lod_plane = p_lod_plane; - lod_distance_multiplier = p_lod_distance_multiplier; - screen_lod_threshold = p_screen_lod_threshold; - } + struct RenderElementInfo { + uint32_t repeat : 22; + uint32_t uses_lightmap : 1; + uint32_t uses_forward_gi : 1; + uint32_t lod_index : 8; }; template <PassMode p_pass_mode> @@ -465,7 +527,9 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { uint32_t render_list_thread_threshold = 500; - void _fill_render_list(const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false); + void _update_instance_data_buffer(RenderListType p_render_list); + void _fill_instance_data(RenderListType p_render_list, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true); + void _fill_render_list(RenderListType p_render_list, const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, const Plane &p_lod_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, bool p_append = false); Map<Size2i, RID> sdfgi_framebuffer_size_cache; @@ -493,14 +557,17 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { union { struct { - uint32_t geometry_id; - uint32_t material_id; - uint32_t shader_id; - uint32_t surface_type : 4; - uint32_t uses_forward_gi : 1; //set during addition - uint32_t uses_lightmap : 1; //set during addition - uint32_t depth_layer : 4; //set during addition - uint32_t priority : 8; + uint64_t lod_index : 8; + uint64_t surface_index : 10; + uint64_t geometry_id : 32; + uint64_t material_id_low : 14; + + uint64_t material_id_hi : 18; + uint64_t shader_id : 32; + uint64_t uses_forward_gi : 1; + uint64_t uses_lightmap : 1; + uint64_t depth_layer : 4; + uint64_t priority : 8; }; struct { uint64_t sort_key1; @@ -532,20 +599,20 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { float lod_model_scale = 1.0; AABB transformed_aabb; //needed for LOD float depth = 0; - struct PushConstant { - float transform[16]; - uint32_t flags; - uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables - uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index) - uint32_t layer_mask; - float lightmap_uv_scale[4]; - } push_constant; + uint32_t gi_offset_cache = 0; + uint32_t flags_cache = 0; + bool store_transform_cache = true; + int32_t shader_parameters_offset = -1; + uint32_t lightmap_slice_index; + Rect2 lightmap_uv_scale; + uint32_t layer_mask = 1; RID transforms_uniform_set; uint32_t instance_count = 0; RID mesh_instance; bool can_sdfgi = false; //used during setup uint32_t base_flags = 0; + Transform transform; RID gi_probes[MAX_GI_PROBES_PER_INSTANCE]; RID lightmap_instance; GeometryInstanceLightmapSH *lightmap_sh = nullptr; @@ -558,21 +625,14 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { RS::InstanceType base_type; RID skeleton; - - uint32_t layer_mask = 1; - Vector<RID> surface_materials; RID material_override; - Transform transform; AABB aabb; - int32_t shader_parameters_offset = -1; bool use_dynamic_gi = false; bool use_baked_light = false; bool cast_double_sided_shaodows = false; bool mirror = false; - Rect2 lightmap_uv_scale; - uint32_t lightmap_slice_index = 0; bool dirty_dependencies = false; RendererStorage::DependencyTracker dependency_tracker; @@ -604,16 +664,12 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { /* Render List */ struct RenderList { - int max_elements; - - GeometryInstanceSurfaceDataCache **elements = nullptr; - - int element_count; - int alpha_element_count; + LocalVector<GeometryInstanceSurfaceDataCache *> elements; + LocalVector<RenderElementInfo> element_info; void clear() { - element_count = 0; - alpha_element_count = 0; + elements.clear(); + element_info.clear(); } //should eventually be replaced by radix @@ -624,13 +680,14 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { } }; - void sort_by_key(bool p_alpha) { + void sort_by_key() { SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter; - if (p_alpha) { - sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); - } else { - sorter.sort(elements, element_count); - } + sorter.sort(elements.ptr(), elements.size()); + } + + void sort_by_key_range(uint32_t p_from, uint32_t p_size) { + SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter; + sorter.sort(elements.ptr() + p_from, p_size); } struct SortByDepth { @@ -639,14 +696,10 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { } }; - void sort_by_depth(bool p_alpha) { //used for shadows + void sort_by_depth() { //used for shadows SortArray<GeometryInstanceSurfaceDataCache *, SortByDepth> sorter; - if (p_alpha) { - sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); - } else { - sorter.sort(elements, element_count); - } + sorter.sort(elements.ptr(), elements.size()); } struct SortByReverseDepthAndPriority { @@ -658,50 +711,24 @@ class RendererSceneRenderForward : public RendererSceneRenderRD { void sort_by_reverse_depth_and_priority(bool p_alpha) { //used for alpha SortArray<GeometryInstanceSurfaceDataCache *, SortByReverseDepthAndPriority> sorter; - if (p_alpha) { - sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); - } else { - sorter.sort(elements, element_count); - } + sorter.sort(elements.ptr(), elements.size()); } _FORCE_INLINE_ void add_element(GeometryInstanceSurfaceDataCache *p_element) { - if (element_count + alpha_element_count >= max_elements) { - return; - } - elements[element_count] = p_element; - element_count++; - } - - _FORCE_INLINE_ void add_alpha_element(GeometryInstanceSurfaceDataCache *p_element) { - if (element_count + alpha_element_count >= max_elements) { - return; - } - int idx = max_elements - alpha_element_count - 1; - elements[idx] = p_element; - alpha_element_count++; - } - - void init() { - element_count = 0; - alpha_element_count = 0; - elements = memnew_arr(GeometryInstanceSurfaceDataCache *, max_elements); - } - - RenderList() { - max_elements = 0; - } - - ~RenderList() { - memdelete_arr(elements); + elements.push_back(p_element); } }; - RenderList render_list; + RenderList render_list[RENDER_LIST_MAX]; protected: - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); - virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true); + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); + + virtual void _render_shadow_begin(); + virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true); + virtual void _render_shadow_process(); + virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL); + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index d5c9ccd956..3035124cca 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -1148,162 +1148,71 @@ void RendererSceneRenderRD::_sdfgi_update_cascades(RID p_render_buffers) { cascade_data[i].pad = 0; } - RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data); + RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, RD::BARRIER_MASK_COMPUTE); } -void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) { +void RendererSceneRenderRD::_sdfgi_update_light(RID p_render_buffers, RID p_environment) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); if (rb->sdfgi == nullptr) { return; } - Environment *env = environment_owner.getornull(p_environment); - - RENDER_TIMESTAMP(">SDFGI Update Probes"); - - /* Update Cascades UBO */ - _sdfgi_update_cascades(p_render_buffers); - /* Update Dynamic Lights Buffer */ - - RENDER_TIMESTAMP("Update Lights"); - /* Update dynamic lights */ + RD::get_singleton()->draw_command_begin_label("SDFGI Update dynamic Light"); - { - int32_t cascade_light_count[SDFGI::MAX_CASCADES]; - - for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { - SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; - - SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; - uint32_t idx = 0; - for (uint32_t j = 0; j < (uint32_t)p_directional_lights.size(); j++) { - if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { - break; - } - - LightInstance *li = light_instance_owner.getornull(p_directional_lights[j]); - ERR_CONTINUE(!li); + /* Update dynamic light */ - if (storage->light_directional_is_sky_only(li->light)) { - continue; - } - - Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); - dir.y *= rb->sdfgi->y_mult; - dir.normalize(); - lights[idx].direction[0] = dir.x; - lights[idx].direction[1] = dir.y; - lights[idx].direction[2] = dir.z; - Color color = storage->light_get_color(li->light); - color = color.to_linear(); - lights[idx].color[0] = color.r; - lights[idx].color[1] = color.g; - lights[idx].color[2] = color.b; - lights[idx].type = RS::LIGHT_DIRECTIONAL; - lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); - lights[idx].has_shadow = storage->light_has_shadow(li->light); - - idx++; - } - - AABB cascade_aabb; - cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cascade.position)) * cascade.cell_size; - cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cascade.cell_size; - - for (uint32_t j = 0; j < p_positional_light_count; j++) { - if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { - break; - } - - LightInstance *li = light_instance_owner.getornull(p_positional_light_instances[j]); - ERR_CONTINUE(!li); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); - uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); - if (i > max_sdfgi_cascade) { - continue; - } + SDGIShader::DirectLightPushConstant push_constant; - if (!cascade_aabb.intersects(li->aabb)) { - continue; - } + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.multibounce = rb->sdfgi->uses_multibounce; + push_constant.y_mult = rb->sdfgi->y_mult; - Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); - //faster to not do this here - //dir.y *= rb->sdfgi->y_mult; - //dir.normalize(); - lights[idx].direction[0] = dir.x; - lights[idx].direction[1] = dir.y; - lights[idx].direction[2] = dir.z; - Vector3 pos = li->transform.origin; - pos.y *= rb->sdfgi->y_mult; - lights[idx].position[0] = pos.x; - lights[idx].position[1] = pos.y; - lights[idx].position[2] = pos.z; - Color color = storage->light_get_color(li->light); - color = color.to_linear(); - lights[idx].color[0] = color.r; - lights[idx].color[1] = color.g; - lights[idx].color[2] = color.b; - lights[idx].type = storage->light_get_type(li->light); - lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); - lights[idx].has_shadow = storage->light_has_shadow(li->light); - lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); - lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); - lights[idx].spot_angle = Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE)); - lights[idx].spot_attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; + push_constant.light_count = rb->sdfgi->cascade_dynamic_light_count[i]; + push_constant.cascade = i; - idx++; - } + if (rb->sdfgi->cascades[i].all_dynamic_lights_dirty || sdfgi_frames_to_update_light == RS::ENV_SDFGI_UPDATE_LIGHT_IN_1_FRAME) { + push_constant.process_offset = 0; + push_constant.process_increment = 1; + } else { + static uint32_t frames_to_update_table[RS::ENV_SDFGI_UPDATE_LIGHT_MAX] = { + 1, 2, 4, 8, 16 + }; - if (idx > 0) { - RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights); - } + uint32_t frames_to_update = frames_to_update_table[sdfgi_frames_to_update_light]; - cascade_light_count[i] = idx; + push_constant.process_offset = RSG::rasterizer->get_frame_number() % frames_to_update; + push_constant.process_increment = frames_to_update; } + rb->sdfgi->cascades[i].all_dynamic_lights_dirty = false; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.direct_light_pipeline[SDGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); - - SDGIShader::DirectLightPushConstant push_constant; - - push_constant.grid_size[0] = rb->sdfgi->cascade_size; - push_constant.grid_size[1] = rb->sdfgi->cascade_size; - push_constant.grid_size[2] = rb->sdfgi->cascade_size; - push_constant.max_cascades = rb->sdfgi->cascades.size(); - push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; - push_constant.multibounce = rb->sdfgi->uses_multibounce; - push_constant.y_mult = rb->sdfgi->y_mult; - - for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { - SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; - push_constant.light_count = cascade_light_count[i]; - push_constant.cascade = i; - - if (rb->sdfgi->cascades[i].all_dynamic_lights_dirty || sdfgi_frames_to_update_light == RS::ENV_SDFGI_UPDATE_LIGHT_IN_1_FRAME) { - push_constant.process_offset = 0; - push_constant.process_increment = 1; - } else { - static uint32_t frames_to_update_table[RS::ENV_SDFGI_UPDATE_LIGHT_MAX] = { - 1, 2, 4, 8, 16 - }; - - uint32_t frames_to_update = frames_to_update_table[sdfgi_frames_to_update_light]; - - push_constant.process_offset = RSG::rasterizer->get_frame_number() % frames_to_update; - push_constant.process_increment = frames_to_update; - } - rb->sdfgi->cascades[i].all_dynamic_lights_dirty = false; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DirectLightPushConstant)); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); + } + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_end_label(); +} - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DirectLightPushConstant)); - RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); - } - RD::get_singleton()->compute_list_end(); +void RendererSceneRenderRD::_sdfgi_update_probes(RID p_render_buffers, RID p_environment) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; } - RENDER_TIMESTAMP("Raytrace"); + RD::get_singleton()->draw_command_begin_label("SDFGI Update Probes"); + + Environment *env = environment_owner.getornull(p_environment); SDGIShader::IntegratePushConstant push_constant; push_constant.grid_size[1] = rb->sdfgi->cascade_size; @@ -1372,7 +1281,7 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi rb->sdfgi->render_pass++; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_PROCESS]); int32_t probe_divisor = rb->sdfgi->cascade_size / SDFGI::PROBE_DIVISOR; @@ -1386,14 +1295,47 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); + } + + //end later after raster to avoid barriering on layout changes + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); + + RD::get_singleton()->draw_command_end_label(); +} + +void RendererSceneRenderRD::_sdfgi_store_probes(RID p_render_buffers) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + if (rb->sdfgi == nullptr) { + return; } - RD::get_singleton()->compute_list_add_barrier(compute_list); //wait until done + RD::get_singleton()->barrier(RD::BARRIER_MASK_COMPUTE, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_begin_label("SDFGI Store Probes"); + + SDGIShader::IntegratePushConstant push_constant; + push_constant.grid_size[1] = rb->sdfgi->cascade_size; + push_constant.grid_size[2] = rb->sdfgi->cascade_size; + push_constant.grid_size[0] = rb->sdfgi->cascade_size; + push_constant.max_cascades = rb->sdfgi->cascades.size(); + push_constant.probe_axis_size = rb->sdfgi->probe_axis_count; + push_constant.history_index = rb->sdfgi->render_pass % rb->sdfgi->history_size; + push_constant.history_size = rb->sdfgi->history_size; + static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; + push_constant.ray_count = ray_count[sdfgi_ray_count]; + push_constant.ray_bias = rb->sdfgi->probe_bias; + push_constant.image_size[0] = rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count; + push_constant.image_size[1] = rb->sdfgi->probe_axis_count; + push_constant.store_ambient_texture = false; + + push_constant.sky_mode = 0; + push_constant.y_mult = rb->sdfgi->y_mult; // Then store values into the lightprobe texture. Separating these steps has a small performance hit, but it allows for multiple bounces RENDER_TIMESTAMP("Average Probes"); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.integrate_pipeline[SDGIShader::INTEGRATE_MODE_STORE]); //convert to octahedral to store @@ -1403,20 +1345,22 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { push_constant.cascade = i; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[i].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); } - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); - RENDER_TIMESTAMP("<SDFGI Update Probes"); + RD::get_singleton()->draw_command_end_label(); } - void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used) { r_gi_probes_used = 0; RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); + RD::get_singleton()->draw_command_begin_label("GIProbes Setup"); + RID gi_probe_buffer = render_buffers_get_gi_probe_buffer(p_render_buffers); GI::GIProbeData gi_probe_data[RenderBuffers::MAX_GIPROBES]; @@ -1500,80 +1444,25 @@ void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transfor } if (p_gi_probes.size() > 0) { - RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data); + RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data, RD::BARRIER_MASK_COMPUTE); } -} -void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes) { - RENDER_TIMESTAMP("Render GI"); + RD::get_singleton()->draw_command_end_label(); +} +void RendererSceneRenderRD::_pre_process_gi(RID p_render_buffers, const Transform &p_transform) { + // Do the required buffer transfers and setup before the depth-pre pass, this way GI can + // run in parallel during depth-pre pass and shadow rendering. RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(rb == nullptr); - Environment *env = environment_owner.getornull(p_environment); - - if (rb->ambient_buffer.is_null() || rb->using_half_size_gi != gi.half_resolution) { - if (rb->ambient_buffer.is_valid()) { - RD::get_singleton()->free(rb->ambient_buffer); - RD::get_singleton()->free(rb->reflection_buffer); - } - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - tf.width = rb->width; - tf.height = rb->height; - if (gi.half_resolution) { - tf.width >>= 1; - tf.height >>= 1; - } - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->using_half_size_gi = gi.half_resolution; - - _render_buffers_uniform_set_changed(p_render_buffers); - } - - GI::PushConstant push_constant; - - push_constant.screen_size[0] = rb->width; - push_constant.screen_size[1] = rb->height; - push_constant.z_near = p_projection.get_z_near(); - push_constant.z_far = p_projection.get_z_far(); - push_constant.orthogonal = p_projection.is_orthogonal(); - push_constant.proj_info[0] = -2.0f / (rb->width * p_projection.matrix[0][0]); - push_constant.proj_info[1] = -2.0f / (rb->height * p_projection.matrix[1][1]); - push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; - push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; - push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()); - push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; - - bool use_sdfgi = rb->sdfgi != nullptr; - bool use_giprobes = push_constant.max_giprobes > 0; + /* Update Cascades UBO */ - if (env) { - push_constant.ao_color[0] = env->ao_color.r; - push_constant.ao_color[1] = env->ao_color.g; - push_constant.ao_color[2] = env->ao_color.b; - } else { - push_constant.ao_color[0] = 0; - push_constant.ao_color[1] = 0; - push_constant.ao_color[2] = 0; - } + if (rb->sdfgi) { + /* Update general SDFGI Buffer */ - push_constant.cam_rotation[0] = p_transform.basis[0][0]; - push_constant.cam_rotation[1] = p_transform.basis[1][0]; - push_constant.cam_rotation[2] = p_transform.basis[2][0]; - push_constant.cam_rotation[3] = 0; - push_constant.cam_rotation[4] = p_transform.basis[0][1]; - push_constant.cam_rotation[5] = p_transform.basis[1][1]; - push_constant.cam_rotation[6] = p_transform.basis[2][1]; - push_constant.cam_rotation[7] = 0; - push_constant.cam_rotation[8] = p_transform.basis[0][2]; - push_constant.cam_rotation[9] = p_transform.basis[1][2]; - push_constant.cam_rotation[10] = p_transform.basis[2][2]; - push_constant.cam_rotation[11] = 0; + _sdfgi_update_cascades(p_render_buffers); - if (rb->sdfgi) { GI::SDFGIData sdfgi_data; sdfgi_data.grid_size[0] = rb->sdfgi->cascade_size; @@ -1640,9 +1529,172 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough c.to_cell = 1.0 / rb->sdfgi->cascades[i].cell_size; } - RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data); + RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data, RD::BARRIER_MASK_COMPUTE); + + /* Update dynamic lights in SDFGI cascades */ + + for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { + SDFGI::Cascade &cascade = rb->sdfgi->cascades[i]; + + SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; + uint32_t idx = 0; + for (uint32_t j = 0; j < (uint32_t)render_state.sdfgi_update_data->directional_lights->size(); j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + LightInstance *li = light_instance_owner.getornull(render_state.sdfgi_update_data->directional_lights->get(j)); + ERR_CONTINUE(!li); + + if (storage->light_directional_is_sky_only(li->light)) { + continue; + } + + Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); + dir.y *= rb->sdfgi->y_mult; + dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Color color = storage->light_get_color(li->light); + color = color.to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = RS::LIGHT_DIRECTIONAL; + lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); + lights[idx].has_shadow = storage->light_has_shadow(li->light); + + idx++; + } + + AABB cascade_aabb; + cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(rb->sdfgi->cascade_size >> 1) + cascade.position)) * cascade.cell_size; + cascade_aabb.size = Vector3(1, 1, 1) * rb->sdfgi->cascade_size * cascade.cell_size; + + for (uint32_t j = 0; j < render_state.sdfgi_update_data->positional_light_count; j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + LightInstance *li = light_instance_owner.getornull(render_state.sdfgi_update_data->positional_light_instances[j]); + ERR_CONTINUE(!li); + + uint32_t max_sdfgi_cascade = storage->light_get_max_sdfgi_cascade(li->light); + if (i > max_sdfgi_cascade) { + continue; + } + + if (!cascade_aabb.intersects(li->aabb)) { + continue; + } + + Vector3 dir = -li->transform.basis.get_axis(Vector3::AXIS_Z); + //faster to not do this here + //dir.y *= rb->sdfgi->y_mult; + //dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Vector3 pos = li->transform.origin; + pos.y *= rb->sdfgi->y_mult; + lights[idx].position[0] = pos.x; + lights[idx].position[1] = pos.y; + lights[idx].position[2] = pos.z; + Color color = storage->light_get_color(li->light); + color = color.to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = storage->light_get_type(li->light); + lights[idx].energy = storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY); + lights[idx].has_shadow = storage->light_has_shadow(li->light); + lights[idx].attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); + lights[idx].radius = storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); + lights[idx].spot_angle = Math::deg2rad(storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE)); + lights[idx].spot_attenuation = storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + + idx++; + } + + if (idx > 0) { + RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, RD::BARRIER_MASK_COMPUTE); + } + + rb->sdfgi->cascade_dynamic_light_count[i] = idx; + } + } +} + +void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes) { + RD::get_singleton()->draw_command_begin_label("GI Render"); + + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + Environment *env = environment_owner.getornull(p_environment); + + if (rb->ambient_buffer.is_null() || rb->using_half_size_gi != gi.half_resolution) { + if (rb->ambient_buffer.is_valid()) { + RD::get_singleton()->free(rb->ambient_buffer); + RD::get_singleton()->free(rb->reflection_buffer); + } + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = rb->width; + tf.height = rb->height; + if (gi.half_resolution) { + tf.width >>= 1; + tf.height >>= 1; + } + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->using_half_size_gi = gi.half_resolution; + + _render_buffers_uniform_set_changed(p_render_buffers); + } + + GI::PushConstant push_constant; + + push_constant.screen_size[0] = rb->width; + push_constant.screen_size[1] = rb->height; + push_constant.z_near = p_projection.get_z_near(); + push_constant.z_far = p_projection.get_z_far(); + push_constant.orthogonal = p_projection.is_orthogonal(); + push_constant.proj_info[0] = -2.0f / (rb->width * p_projection.matrix[0][0]); + push_constant.proj_info[1] = -2.0f / (rb->height * p_projection.matrix[1][1]); + push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; + push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; + push_constant.max_giprobes = MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()); + push_constant.high_quality_vct = gi_probe_quality == RS::GI_PROBE_QUALITY_HIGH; + + bool use_sdfgi = rb->sdfgi != nullptr; + bool use_giprobes = push_constant.max_giprobes > 0; + + if (env) { + push_constant.ao_color[0] = env->ao_color.r; + push_constant.ao_color[1] = env->ao_color.g; + push_constant.ao_color[2] = env->ao_color.b; + } else { + push_constant.ao_color[0] = 0; + push_constant.ao_color[1] = 0; + push_constant.ao_color[2] = 0; } + push_constant.cam_rotation[0] = p_transform.basis[0][0]; + push_constant.cam_rotation[1] = p_transform.basis[1][0]; + push_constant.cam_rotation[2] = p_transform.basis[2][0]; + push_constant.cam_rotation[3] = 0; + push_constant.cam_rotation[4] = p_transform.basis[0][1]; + push_constant.cam_rotation[5] = p_transform.basis[1][1]; + push_constant.cam_rotation[6] = p_transform.basis[2][1]; + push_constant.cam_rotation[7] = 0; + push_constant.cam_rotation[8] = p_transform.basis[0][2]; + push_constant.cam_rotation[9] = p_transform.basis[1][2]; + push_constant.cam_rotation[10] = p_transform.basis[2][2]; + push_constant.cam_rotation[11] = 0; + if (rb->gi_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->gi_uniform_set)) { Vector<RD::Uniform> uniforms; { @@ -1806,17 +1858,19 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough } else { mode = (use_sdfgi && use_giprobes) ? GI::MODE_COMBINED : (use_sdfgi ? GI::MODE_SDFGI : GI::MODE_GIPROBE); } - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi.pipelines[mode]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->gi_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(GI::PushConstant)); if (rb->using_half_size_gi) { - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width >> 1, rb->height >> 1, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width >> 1, rb->height >> 1, 1); } else { - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1); } - RD::get_singleton()->compute_list_end(); + //do barrier later to allow oeverlap + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //no barriers, let other compute, raster and transfer happen at the same time + RD::get_singleton()->draw_command_end_label(); } RID RendererSceneRenderRD::sky_create() { @@ -4015,11 +4069,7 @@ void RendererSceneRenderRD::light_instance_set_shadow_transform(RID p_light_inst LightInstance *light_instance = light_instance_owner.getornull(p_light_instance); ERR_FAIL_COND(!light_instance); - if (storage->light_get_type(light_instance->light) != RS::LIGHT_DIRECTIONAL) { - p_pass = 0; - } - - ERR_FAIL_INDEX(p_pass, 4); + ERR_FAIL_INDEX(p_pass, 6); light_instance->shadow_transform[p_pass].camera = p_projection; light_instance->shadow_transform[p_pass].transform = p_transform; @@ -5150,9 +5200,6 @@ void RendererSceneRenderRD::_free_render_buffer_data(RenderBuffers *rb) { RD::get_singleton()->free(rb->luminance.reduce[i]); } - for (int i = 0; i < rb->luminance.reduce.size(); i++) { - RD::get_singleton()->free(rb->luminance.reduce[i]); - } rb->luminance.reduce.clear(); if (rb->luminance.current.is_valid()) { @@ -5767,7 +5814,7 @@ void RendererSceneRenderRD::_sdfgi_debug_draw(RID p_render_buffers, const Camera RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::DebugPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->width, rb->height, 1); RD::get_singleton()->compute_list_end(); Size2 rtsize = storage->render_target_get_size(rb->render_target); @@ -6179,7 +6226,7 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflecti } if (cluster.reflection_count) { - RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections); + RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } } @@ -6571,16 +6618,17 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const r_positional_light_count++; } + //update without barriers if (cluster.omni_light_count) { - RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights); + RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } if (cluster.spot_light_count) { - RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights); + RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } if (r_directional_light_count) { - RD::get_singleton()->buffer_update(cluster.directional_light_buffer, 0, sizeof(Cluster::DirectionalLightData) * r_directional_light_count, cluster.directional_lights); + RD::get_singleton()->buffer_update(cluster.directional_light_buffer, 0, sizeof(Cluster::DirectionalLightData) * r_directional_light_count, cluster.directional_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } } @@ -6741,7 +6789,7 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray<RID> &p_decals, const } if (cluster.decal_count > 0) { - RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals); + RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } } @@ -6786,7 +6834,7 @@ void RendererSceneRenderRD::_allocate_shadow_shrink_stages(RID p_base, int p_bas s.size = base_texture_size; RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32_SFLOAT; + tf.format = RD::DATA_FORMAT_R16_UNORM; tf.width = base_texture_size; tf.height = base_texture_size; tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; @@ -6834,6 +6882,8 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e return; } + RENDER_TIMESTAMP(">Volumetric Fog"); + if (env && env->volumetric_fog_enabled && !rb->volumetric_fog) { //required volumetric fog but not existing, create rb->volumetric_fog = memnew(VolumetricFog); @@ -6870,7 +6920,11 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e //update directional shadow + RENDER_TIMESTAMP("Downsample Shadows"); + if (p_use_directional_shadows) { + RD::get_singleton()->draw_command_begin_label("Downsample Directional Shadows"); + if (directional_shadow.shrink_stages.is_empty()) { if (rb->volumetric_fog->uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->uniform_set)) { //invalidate uniform set, we will need a new one @@ -6897,6 +6951,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e } RD::get_singleton()->compute_list_end(); } + RD::get_singleton()->draw_command_end_label(); } ShadowAtlas *shadow_atlas = shadow_atlas_owner.getornull(p_shadow_atlas); @@ -6904,6 +6959,8 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e if (shadow_atlas) { //shrink shadows that need to be shrunk + RD::get_singleton()->draw_command_begin_label("Downsample Positional Shadows"); + bool force_shrink_shadows = false; if (shadow_atlas->shrink_stages.is_empty()) { @@ -7022,8 +7079,10 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e } } - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); } + + RD::get_singleton()->draw_command_end_label(); } //update volumetric fog @@ -7273,10 +7332,10 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e push_constant.directional_shadow_pixel_size[1] = 1.0 / dssize.y; */ - RENDER_TIMESTAMP(">Volumetric Fog"); + RD::get_singleton()->draw_command_begin_label("Render Volumetric Fog"); RENDER_TIMESTAMP("Render Fog"); - RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms); + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms, RD::BARRIER_MASK_COMPUTE); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -7289,17 +7348,20 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e if (using_sdfgi) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); } - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); + RD::get_singleton()->draw_command_end_label(); RD::get_singleton()->compute_list_add_barrier(compute_list); if (use_filter) { + RD::get_singleton()->draw_command_begin_label("Filter Fog"); + RENDER_TIMESTAMP("Filter Fog"); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); RD::get_singleton()->compute_list_end(); //need restart for buffer update @@ -7313,95 +7375,174 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e if (using_sdfgi) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->sdfgi_uniform_set, 1); } - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, rb->volumetric_fog->depth); RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); } RENDER_TIMESTAMP("Integrate Fog"); + RD::get_singleton()->draw_command_begin_label("Integrate Fog"); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FOG]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->volumetric_fog->uniform_set, 0); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->volumetric_fog->width, rb->volumetric_fog->height, 1); - RD::get_singleton()->compute_list_end(); + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER); RENDER_TIMESTAMP("<Volumetric Fog"); + RD::get_singleton()->draw_command_end_label(); } -void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) { - Color clear_color; - if (p_render_buffers.is_valid()) { - RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); - ERR_FAIL_COND(!rb); - clear_color = storage->render_target_get_clear_request_color(rb->render_target); - } else { - clear_color = storage->get_default_clear_color(); +uint32_t RendererSceneRenderRD::_get_render_state_directional_light_count() const { + return render_state.directional_light_count; +} + +bool RendererSceneRenderRD::_needs_post_prepass_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + RenderBuffers *rb = render_buffers_owner.getornull(render_state.render_buffers); + if (rb->sdfgi != nullptr) { + return true; + } } + return false; +} - //assign render indices to giprobes - for (uint32_t i = 0; i < (uint32_t)p_gi_probes.size(); i++) { - GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probes[i]); - if (giprobe_inst) { - giprobe_inst->render_index = i; +void RendererSceneRenderRD::_post_prepass_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + if (p_use_gi) { + _sdfgi_update_probes(render_state.render_buffers, render_state.environment); } } +} - const PagedArray<RID> *lights = &p_lights; - const PagedArray<RID> *reflections = &p_reflection_probes; - const PagedArray<RID> *gi_probes = &p_gi_probes; +void RendererSceneRenderRD::_pre_resolve_render(bool p_use_gi) { + if (render_state.render_buffers.is_valid()) { + if (p_use_gi) { + RD::get_singleton()->compute_list_end(); + } + } +} - PagedArray<RID> empty; +void RendererSceneRenderRD::_pre_opaque_render(bool p_use_ssao, bool p_use_gi, RID p_normal_roughness_buffer, RID p_gi_probe_buffer) { + // Render shadows while GI is rendering, due to how barriers are handled, this should happen at the same time - if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { - lights = ∅ - reflections = ∅ - gi_probes = ∅ + if (render_state.render_buffers.is_valid() && p_use_gi) { + _sdfgi_store_probes(render_state.render_buffers); } - if (render_buffers_owner.owns(p_render_buffers)) { - RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); - current_cluster_builder = rb->cluster_builder; - } else if (reflection_probe_instance_owner.owns(p_reflection_probe)) { - ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(p_reflection_probe); - ReflectionAtlas *ra = reflection_atlas_owner.getornull(rpi->atlas); - if (!ra) { - ERR_PRINT("reflection probe has no reflection atlas! Bug?"); - current_cluster_builder = nullptr; - } else { - current_cluster_builder = ra->cluster_builder; + render_state.cube_shadows.clear(); + render_state.shadows.clear(); + render_state.directional_shadows.clear(); + + Plane camera_plane(render_state.cam_transform.origin, -render_state.cam_transform.basis.get_axis(Vector3::AXIS_Z)); + float lod_distance_multiplier = render_state.cam_projection.get_lod_multiplier(); + + { + for (int i = 0; i < render_state.render_shadow_count; i++) { + LightInstance *li = light_instance_owner.getornull(render_state.render_shadows[i].light); + + if (storage->light_get_type(li->light) == RS::LIGHT_DIRECTIONAL) { + render_state.directional_shadows.push_back(i); + } else if (storage->light_get_type(li->light) == RS::LIGHT_OMNI && storage->light_omni_get_shadow_mode(li->light) == RS::LIGHT_OMNI_SHADOW_CUBE) { + render_state.cube_shadows.push_back(i); + } else { + render_state.shadows.push_back(i); + } } - } else { - ERR_PRINT("No cluster builder, bug"); //should never happen, will crash - current_cluster_builder = nullptr; + + //cube shadows are rendered in their own way + for (uint32_t i = 0; i < render_state.cube_shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.cube_shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.cube_shadows[i]].pass, render_state.render_shadows[render_state.cube_shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, true, true, true); + } + + if (render_state.directional_shadows.size()) { + //open the pass for directional shadows + _update_directional_shadow_atlas(); + RD::get_singleton()->draw_list_begin(directional_shadow.fb, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE); + RD::get_singleton()->draw_list_end(); + } + } + + // Render GI + + bool render_shadows = render_state.directional_shadows.size() || render_state.shadows.size(); + bool render_gi = render_state.render_buffers.is_valid() && p_use_gi; + + if (render_shadows && render_gi) { + RENDER_TIMESTAMP("Render GI + Render Shadows (parallel)"); + } else if (render_shadows) { + RENDER_TIMESTAMP("Render Shadows"); + } else if (render_gi) { + RENDER_TIMESTAMP("Render GI"); } - current_cluster_builder->begin(p_cam_transform, p_cam_projection, !p_reflection_probe.is_valid()); + //prepare shadow rendering + if (render_shadows) { + _render_shadow_begin(); + + //render directional shadows + for (uint32_t i = 0; i < render_state.directional_shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.directional_shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.directional_shadows[i]].pass, render_state.render_shadows[render_state.directional_shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, false, i == render_state.directional_shadows.size() - 1, false); + } + //render positional shadows + for (uint32_t i = 0; i < render_state.shadows.size(); i++) { + _render_shadow_pass(render_state.render_shadows[render_state.shadows[i]].light, render_state.shadow_atlas, render_state.render_shadows[render_state.shadows[i]].pass, render_state.render_shadows[render_state.shadows[i]].instances, camera_plane, lod_distance_multiplier, render_state.screen_lod_threshold, i == 0, i == render_state.shadows.size() - 1, true); + } + + _render_shadow_process(); + } + + //start GI + if (render_gi) { + _process_gi(render_state.render_buffers, p_normal_roughness_buffer, p_gi_probe_buffer, render_state.environment, render_state.cam_projection, render_state.cam_transform, *render_state.gi_probes); + } + + //Do shadow rendering (in parallel with GI) + if (render_shadows) { + _render_shadow_end(RD::BARRIER_MASK_NO_BARRIER); + } + + if (render_gi) { + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //use a later barrier + } + + if (render_state.render_buffers.is_valid()) { + if (p_use_ssao) { + _process_ssao(render_state.render_buffers, render_state.environment, p_normal_roughness_buffer, render_state.cam_projection); + } + } + + //full barrier here, we need raster, transfer and compute and it depends from the previous work + RD::get_singleton()->barrier(RD::BARRIER_MASK_ALL, RD::BARRIER_MASK_ALL); + + if (current_cluster_builder) { + current_cluster_builder->begin(render_state.cam_transform, render_state.cam_projection, !render_state.reflection_probe.is_valid()); + } bool using_shadows = true; - if (p_reflection_probe.is_valid()) { - if (!storage->reflection_probe_renders_shadows(reflection_probe_instance_get_probe(p_reflection_probe))) { + if (render_state.reflection_probe.is_valid()) { + if (!storage->reflection_probe_renders_shadows(reflection_probe_instance_get_probe(render_state.reflection_probe))) { using_shadows = false; } } else { //do not render reflections when rendering a reflection probe - _setup_reflections(*reflections, p_cam_transform.affine_inverse(), p_environment); + _setup_reflections(*render_state.reflection_probes, render_state.cam_transform.affine_inverse(), render_state.environment); } uint32_t directional_light_count = 0; uint32_t positional_light_count = 0; - _setup_lights(*lights, p_cam_transform, p_shadow_atlas, using_shadows, directional_light_count, positional_light_count); - _setup_decals(p_decals, p_cam_transform.affine_inverse()); + _setup_lights(*render_state.lights, render_state.cam_transform, render_state.shadow_atlas, using_shadows, directional_light_count, positional_light_count); + _setup_decals(*render_state.decals, render_state.cam_transform.affine_inverse()); - current_cluster_builder->bake_cluster(); + render_state.directional_light_count = directional_light_count; - uint32_t gi_probe_count = 0; - if (p_render_buffers.is_valid()) { - _setup_giprobes(p_render_buffers, p_cam_transform, *gi_probes, gi_probe_count); + if (current_cluster_builder) { + current_cluster_builder->bake_cluster(); } - if (p_render_buffers.is_valid()) { + if (render_state.render_buffers.is_valid()) { bool directional_shadows = false; for (uint32_t i = 0; i < directional_light_count; i++) { if (cluster.directional_lights[i].shadow_enabled) { @@ -7409,10 +7550,103 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & break; } } - _update_volumetric_fog(p_render_buffers, p_environment, p_cam_projection, p_cam_transform, p_shadow_atlas, directional_light_count, directional_shadows, positional_light_count, gi_probe_count); + _update_volumetric_fog(render_state.render_buffers, render_state.environment, render_state.cam_projection, render_state.cam_transform, render_state.shadow_atlas, directional_light_count, directional_shadows, positional_light_count, render_state.gi_probe_count); + } +} + +void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data) { + //assign render data + { + render_state.render_buffers = p_render_buffers; + render_state.cam_transform = p_cam_transform; + render_state.cam_projection = p_cam_projection; + render_state.cam_ortogonal = p_cam_projection.is_orthogonal(); + render_state.instances = &p_instances; + render_state.lights = &p_lights; + render_state.reflection_probes = &p_reflection_probes; + render_state.gi_probes = &p_gi_probes; + render_state.decals = &p_decals; + render_state.lightmaps = &p_lightmaps; + render_state.environment = p_environment; + render_state.camera_effects = p_camera_effects; + render_state.shadow_atlas = p_shadow_atlas; + render_state.reflection_atlas = p_reflection_atlas; + render_state.reflection_probe = p_reflection_probe; + render_state.reflection_probe_pass = p_reflection_probe_pass; + render_state.screen_lod_threshold = p_screen_lod_threshold; + + render_state.render_shadows = p_render_shadows; + render_state.render_shadow_count = p_render_shadow_count; + render_state.render_sdfgi_regions = p_render_sdfgi_regions; + render_state.render_sdfgi_region_count = p_render_sdfgi_region_count; + render_state.sdfgi_update_data = p_sdfgi_update_data; + } + + PagedArray<RID> empty; + + if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { + render_state.lights = ∅ + render_state.reflection_probes = ∅ + render_state.gi_probes = ∅ + } + + //sdfgi first + if (p_render_buffers.is_valid()) { + for (int i = 0; i < render_state.render_sdfgi_region_count; i++) { + _render_sdfgi_region(p_render_buffers, render_state.render_sdfgi_regions[i].region, render_state.render_sdfgi_regions[i].instances); + } + if (render_state.sdfgi_update_data->update_static) { + _render_sdfgi_static_lights(p_render_buffers, render_state.sdfgi_update_data->static_cascade_count, p_sdfgi_update_data->static_cascade_indices, render_state.sdfgi_update_data->static_positional_lights); + } + } + + Color clear_color; + if (p_render_buffers.is_valid()) { + RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); + ERR_FAIL_COND(!rb); + clear_color = storage->render_target_get_clear_request_color(rb->render_target); + } else { + clear_color = storage->get_default_clear_color(); + } + + //assign render indices to giprobes + for (uint32_t i = 0; i < (uint32_t)p_gi_probes.size(); i++) { + GIProbeInstance *giprobe_inst = gi_probe_instance_owner.getornull(p_gi_probes[i]); + if (giprobe_inst) { + giprobe_inst->render_index = i; + } + } + + if (render_buffers_owner.owns(render_state.render_buffers)) { + RenderBuffers *rb = render_buffers_owner.getornull(render_state.render_buffers); + current_cluster_builder = rb->cluster_builder; + } else if (reflection_probe_instance_owner.owns(render_state.reflection_probe)) { + ReflectionProbeInstance *rpi = reflection_probe_instance_owner.getornull(render_state.reflection_probe); + ReflectionAtlas *ra = reflection_atlas_owner.getornull(rpi->atlas); + if (!ra) { + ERR_PRINT("reflection probe has no reflection atlas! Bug?"); + current_cluster_builder = nullptr; + } else { + current_cluster_builder = ra->cluster_builder; + } + } else { + ERR_PRINT("No cluster builder, bug"); //should never happen, will crash + current_cluster_builder = nullptr; + } + + if (p_render_buffers.is_valid()) { + _pre_process_gi(p_render_buffers, p_cam_transform); + } + + render_state.gi_probe_count = 0; + if (render_state.render_buffers.is_valid()) { + _setup_giprobes(render_state.render_buffers, render_state.cam_transform, *render_state.gi_probes, render_state.gi_probe_count); + _sdfgi_update_light(render_state.render_buffers, render_state.environment); } - _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, directional_light_count, *gi_probes, p_lightmaps, p_environment, current_cluster_builder->get_cluster_buffer(), current_cluster_builder->get_cluster_size(), current_cluster_builder->get_max_cluster_elements(), p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); + render_state.depth_prepass_used = false; + //calls _pre_opaque_render between depth pre-pass and opaque pass + _render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, *render_state.gi_probes, p_lightmaps, p_environment, current_cluster_builder->get_cluster_buffer(), current_cluster_builder->get_cluster_size(), current_cluster_builder->get_max_cluster_elements(), p_camera_effects, p_shadow_atlas, p_reflection_atlas, p_reflection_probe, p_reflection_probe_pass, clear_color, p_screen_lod_threshold); if (p_render_buffers.is_valid()) { if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS || debug_draw == RS::VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES) { @@ -7446,7 +7680,7 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const Transform & } } -void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) { +void RendererSceneRenderRD::_render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold, bool p_open_pass, bool p_close_pass, bool p_clear_region) { LightInstance *light_instance = light_instance_owner.getornull(p_light); ERR_FAIL_COND(!light_instance); @@ -7469,13 +7703,7 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p CameraMatrix light_projection; Transform light_transform; - bool clear_region = true; - bool begin_texture = true; - bool end_texture = true; - if (storage->light_get_type(light_instance->light) == RS::LIGHT_DIRECTIONAL) { - _update_directional_shadow_atlas(); - //set pssm stuff if (light_instance->last_scene_shadow_pass != scene_pass) { light_instance->directional_rect = _get_directional_shadow_rect(directional_shadow.size, directional_shadow.light_count, directional_shadow.current_light); @@ -7492,7 +7720,6 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p atlas_rect.size.width = light_instance->directional_rect.size.x; atlas_rect.size.height = light_instance->directional_rect.size.y; - int pass_count = 1; if (storage->light_directional_get_shadow_mode(light_instance->light) == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_4_SPLITS) { atlas_rect.size.width /= 2; atlas_rect.size.height /= 2; @@ -7505,7 +7732,6 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p atlas_rect.position.x += atlas_rect.size.width; atlas_rect.position.y += atlas_rect.size.height; } - pass_count = 4; } else if (storage->light_directional_get_shadow_mode(light_instance->light) == RS::LIGHT_DIRECTIONAL_SHADOW_PARALLEL_2_SPLITS) { atlas_rect.size.height /= 2; @@ -7513,7 +7739,6 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p } else { atlas_rect.position.y += atlas_rect.size.height; } - pass_count = 2; } light_instance->shadow_transform[p_pass].atlas_rect = atlas_rect; @@ -7527,10 +7752,6 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p render_texture = RID(); flip_y = true; - clear_region = false; - begin_texture = (directional_shadow.current_light == 1) && (p_pass == 0); //light is 1-index because it was incremented above - end_texture = (directional_shadow.current_light == directional_shadow.light_count) && (p_pass == pass_count - 1); - } else { //set from shadow atlas @@ -7568,14 +7789,17 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p render_fb = cubemap->side_fb[p_pass]; render_texture = cubemap->cubemap; - light_projection = light_instance->shadow_transform[0].camera; - light_transform = light_instance->shadow_transform[0].transform; + light_projection = light_instance->shadow_transform[p_pass].camera; + light_transform = light_instance->shadow_transform[p_pass].transform; render_cubemap = true; finalize_cubemap = p_pass == 5; atlas_fb = shadow_atlas->fb; atlas_size = shadow_atlas->size; - clear_region = false; + + if (p_pass == 0) { + _render_shadow_begin(); + } } else { light_projection = light_instance->shadow_transform[0].camera; @@ -7602,8 +7826,10 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p if (render_cubemap) { //rendering to cubemap - _render_shadow(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold); + _render_shadow_append(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, Rect2(), false, true, true, true); if (finalize_cubemap) { + _render_shadow_process(); + _render_shadow_end(); //reblit Rect2 atlas_rect_norm = atlas_rect; atlas_rect_norm.position.x /= float(atlas_size); @@ -7614,10 +7840,14 @@ void RendererSceneRenderRD::render_shadow(RID p_light, RID p_shadow_atlas, int p storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, light_projection.get_z_near(), light_projection.get_z_far(), false); atlas_rect_norm.position.y += atlas_rect_norm.size.height; storage->get_effects()->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, light_projection.get_z_near(), light_projection.get_z_far(), true); + + //restore transform so it can be properly used + light_instance_set_shadow_transform(p_light, CameraMatrix(), light_instance->transform, zfar, 0, 0, 0); } + } else { //render shadow - _render_shadow(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, using_dual_paraboloid, using_dual_paraboloid_flip, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, atlas_rect, flip_y, clear_region, begin_texture, end_texture); + _render_shadow_append(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, using_dual_paraboloid, using_dual_paraboloid_flip, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold, atlas_rect, flip_y, p_clear_region, p_open_pass, p_close_pass); } } @@ -7625,7 +7855,7 @@ void RendererSceneRenderRD::render_material(const Transform &p_cam_transform, co _render_material(p_cam_transform, p_cam_projection, p_cam_ortogonal, p_instances, p_framebuffer, p_region); } -void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances) { +void RendererSceneRenderRD::_render_sdfgi_region(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances) { //print_line("rendering region " + itos(p_region)); RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(!rb); @@ -7651,6 +7881,8 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con _render_sdfgi(p_render_buffers, from, size, bounds, p_instances, rb->sdfgi->render_albedo, rb->sdfgi->render_emission, rb->sdfgi->render_emission_aniso, rb->sdfgi->render_geom_facing); if (cascade_next != cascade) { + RD::get_singleton()->draw_command_begin_label("SDFGI Pre-Process Cascade"); + RENDER_TIMESTAMP(">SDFGI Update SDF"); //done rendering! must update SDF //clear dispatch indirect data @@ -7700,7 +7932,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con groups.z = rb->sdfgi->cascade_size - ABS(dirty.z); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z); //no barrier, continue together @@ -7742,7 +7974,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7750,7 +7982,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count, rb->sdfgi->probe_axis_count, 1); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7766,7 +7998,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].integrate_uniform_set, 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdfgi_shader.integrate_default_sky_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->probe_axis_count * rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, rb->sdfgi->probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); } } @@ -7790,7 +8022,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_half_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); RD::get_singleton()->compute_list_add_barrier(compute_list); //must start with regular jumpflood @@ -7810,7 +8042,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; @@ -7828,7 +8060,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_half_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; } @@ -7840,7 +8072,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_upscale_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); //run one pass of fullsize jumpflood to fix up half size arctifacts @@ -7850,7 +8082,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[rb->sdfgi->upscale_jfa_uniform_set_index], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); } else { @@ -7860,7 +8092,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->sdf_initialize_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7877,7 +8109,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; @@ -7895,7 +8127,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con push_constant.step_size = s; RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->jump_flood_uniform_set[jf_us], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, optimized_jf_group_size, optimized_jf_group_size, optimized_jf_group_size); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_add_barrier(compute_list); jf_us = jf_us == 0 ? 1 : 0; } @@ -7942,7 +8174,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, sdfgi_shader.preprocess_pipeline[SDGIShader::PRE_PROCESS_STORE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->sdfgi->cascades[cascade].sdf_store_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, 4, 4, 4); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size, rb->sdfgi->cascade_size); RD::get_singleton()->compute_list_end(); @@ -7979,6 +8211,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con #endif RENDER_TIMESTAMP("<SDFGI Update SDF"); + RD::get_singleton()->draw_command_end_label(); } } @@ -7999,11 +8232,13 @@ void RendererSceneRenderRD::render_particle_collider_heightfield(RID p_collider, _render_particle_collider_heightfield(fb, cam_xform, cm, p_instances); } -void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result) { +void RendererSceneRenderRD::_render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result) { RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers); ERR_FAIL_COND(!rb); ERR_FAIL_COND(!rb->sdfgi); + RD::get_singleton()->draw_command_begin_label("SDFGI Render Static Lighs"); + _sdfgi_update_cascades(p_render_buffers); //need cascades updated for this SDGIShader::Light lights[SDFGI::MAX_STATIC_LIGHTS]; @@ -8112,6 +8347,8 @@ void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uin } RD::get_singleton()->compute_list_end(); + + RD::get_singleton()->draw_command_end_label(); } bool RendererSceneRenderRD::free(RID p_rid) { @@ -8797,7 +9034,7 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { cluster.max_reflections = max_cluster_elements; cluster.reflections = memnew_arr(Cluster::ReflectionData, cluster.max_reflections); - cluster.reflection_sort = memnew_arr(Cluster::InstanceSort<ReflectionProbeInstance>, cluster.max_decals); + cluster.reflection_sort = memnew_arr(Cluster::InstanceSort<ReflectionProbeInstance>, cluster.max_reflections); cluster.reflection_buffer = RD::get_singleton()->storage_buffer_create(sizeof(Cluster::ReflectionData) * cluster.max_reflections); } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index af8cdb9b71..707c858ed9 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -109,8 +109,13 @@ protected: void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment); void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used); - virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; - virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true) = 0; + virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; + + virtual void _render_shadow_begin() = 0; + virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true) = 0; + virtual void _render_shadow_process() = 0; + virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL) = 0; + virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0; @@ -132,8 +137,16 @@ protected: void _setup_sky(RID p_environment, RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform, const Size2i p_screen_size); void _update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); void _draw_sky(bool p_can_continue_color, bool p_can_continue_depth, RID p_fb, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); + void _pre_process_gi(RID p_render_buffers, const Transform &p_transform); void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes); + bool _needs_post_prepass_render(bool p_use_gi); + void _post_prepass_render(bool p_use_gi); + void _pre_resolve_render(bool p_use_gi); + + void _pre_opaque_render(bool p_use_ssao, bool p_use_gi, RID p_normal_roughness_buffer, RID p_gi_probe_buffer); + uint32_t _get_render_state_directional_light_count() const; + // needed for a single argument calls (material and uv2) PagedArrayPool<GeometryInstance *> cull_argument_pool; PagedArray<GeometryInstance *> cull_argument; //need this to exist @@ -651,7 +664,7 @@ private: RS::LightType light_type = RS::LIGHT_DIRECTIONAL; - ShadowTransform shadow_transform[4]; + ShadowTransform shadow_transform[6]; AABB aabb; RID self; @@ -1031,8 +1044,14 @@ private: float y_mult = 1.0; uint32_t render_pass = 0; + + int32_t cascade_dynamic_light_count[SDFGI::MAX_CASCADES]; //used dynamically }; + void _sdfgi_update_light(RID p_render_buffers, RID p_environment); + void _sdfgi_update_probes(RID p_render_buffers, RID p_environment); + void _sdfgi_store_probes(RID p_render_buffers); + RS::EnvironmentSDFGIRayCount sdfgi_ray_count = RS::ENV_SDFGI_RAY_COUNT_16; RS::EnvironmentSDFGIFramesToConverge sdfgi_frames_to_converge = RS::ENV_SDFGI_CONVERGE_IN_10_FRAMES; RS::EnvironmentSDFGIFramesToUpdateLight sdfgi_frames_to_update_light = RS::ENV_SDFGI_UPDATE_LIGHT_IN_4_FRAMES; @@ -1460,6 +1479,41 @@ private: } cluster; + struct RenderState { + RID render_buffers; + Transform cam_transform; + CameraMatrix cam_projection; + bool cam_ortogonal = false; + const PagedArray<GeometryInstance *> *instances = nullptr; + const PagedArray<RID> *lights = nullptr; + const PagedArray<RID> *reflection_probes = nullptr; + const PagedArray<RID> *gi_probes = nullptr; + const PagedArray<RID> *decals = nullptr; + const PagedArray<RID> *lightmaps = nullptr; + RID environment; + RID camera_effects; + RID shadow_atlas; + RID reflection_atlas; + RID reflection_probe; + int reflection_probe_pass = 0; + float screen_lod_threshold = 0.0; + + const RenderShadowData *render_shadows = nullptr; + int render_shadow_count = 0; + const RenderSDFGIData *render_sdfgi_regions = nullptr; + int render_sdfgi_region_count = 0; + const RenderSDFGIUpdateData *sdfgi_update_data = nullptr; + + uint32_t directional_light_count = 0; + uint32_t gi_probe_count = 0; + + LocalVector<int> cube_shadows; + LocalVector<int> shadows; + LocalVector<int> directional_shadows; + + bool depth_prepass_used; + } render_state; + struct VolumetricFog { uint32_t width = 0; uint32_t height = 0; @@ -1547,6 +1601,10 @@ private: uint32_t max_cluster_elements = 512; bool low_end = false; + void _render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0, bool p_open_pass = true, bool p_close_pass = true, bool p_clear_region = true); + void _render_sdfgi_region(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances); + void _render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result); + public: virtual Transform geometry_instance_get_transform(GeometryInstance *p_instance) = 0; virtual AABB geometry_instance_get_aabb(GeometryInstance *p_instance) = 0; @@ -1594,7 +1652,6 @@ public: virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const; virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const; virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const; - virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count); RID sdfgi_get_ubo() const { return gi.sdfgi_ubo; } /* SKY API */ @@ -1997,15 +2054,10 @@ public: float render_buffers_get_volumetric_fog_end(RID p_render_buffers); float render_buffers_get_volumetric_fog_detail_spread(RID p_render_buffers); - void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold); - - void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0); + void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr); void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); - void render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances); - void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result); - void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances); virtual void set_scene_pass(uint64_t p_pass) { diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp index 6d4343e183..f9c22dd4bf 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp @@ -3098,7 +3098,7 @@ void RendererStorageRD::update_mesh_instances() { RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SkeletonShader::PushConstant)); //dispatch without barrier, so all is done at the same time - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1); } mi->dirty = false; @@ -4555,7 +4555,7 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ParticlesShader::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); } @@ -4609,7 +4609,7 @@ void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 & RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); @@ -4621,7 +4621,7 @@ void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 & RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); } @@ -4728,7 +4728,7 @@ void RendererStorageRD::update_particles() { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, ©_push_constant, sizeof(ParticlesShader::CopyPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1); RD::get_singleton()->compute_list_end(); } @@ -6980,7 +6980,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[1], 0); //fill [0] RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); /* Process */ @@ -6996,7 +6996,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); push_constant.stride = stride; RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); stride /= 2; swap = !swap; RD::get_singleton()->compute_list_add_barrier(compute_list); @@ -7007,7 +7007,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) { RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, rt_sdf.pipelines[shrink ? RenderTargetSDF::SHADER_STORE_SHRINK : RenderTargetSDF::SHADER_STORE]); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1); RD::get_singleton()->compute_list_end(); } diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.h b/servers/rendering/renderer_rd/renderer_storage_rd.h index aa7195232a..48d43568c4 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.h +++ b/servers/rendering/renderer_rd/renderer_storage_rd.h @@ -1482,13 +1482,7 @@ public: return s->lod_count > 0; } - _FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface) const { - Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); - - return s->index_array; - } - - _FORCE_INLINE_ RID mesh_surface_get_index_array_with_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const { + _FORCE_INLINE_ uint32_t mesh_surface_get_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const { Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); int32_t current_lod = -1; @@ -1500,9 +1494,19 @@ public: current_lod = i; } if (current_lod == -1) { + return 0; + } else { + return current_lod + 1; + } + } + + _FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface, uint32_t p_lod) const { + Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); + + if (p_lod == 0) { return s->index_array; } else { - return s->lods[current_lod].index_array; + return s->lods[p_lod - 1].index_array; } } diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index 2ae22a8a38..e4a39ff813 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -301,6 +301,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { builder.append(compute_codev.get_data()); // version info (if exists) builder.append("\n"); //make sure defines begin at newline + builder.append(base_compute_defines.get_data()); builder.append(general_defines.get_data()); builder.append(variant_defines[p_variant].get_data()); @@ -401,7 +402,6 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio builder.append(fragment_codev.get_data()); // version info (if exists) builder.append("\n"); //make sure defines begin at newline - builder.append(general_defines.get_data()); builder.append(variant_defines[i].get_data()); for (int j = 0; j < version->custom_defines.size(); j++) { @@ -440,6 +440,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio builder.append(compute_codev.get_data()); // version info (if exists) builder.append("\n"); //make sure defines begin at newline + builder.append(base_compute_defines.get_data()); builder.append(general_defines.get_data()); builder.append(variant_defines[i].get_data()); @@ -596,6 +597,22 @@ bool ShaderRD::is_variant_enabled(int p_variant) const { return variants_enabled[p_variant]; } +ShaderRD::ShaderRD() { + // Do not feel forced to use this, in most cases it makes little to no difference. + bool use_32_threads = false; + if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") { + use_32_threads = true; + } + String base_compute_define_text; + if (use_32_threads) { + base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 32\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 4\n"; + } else { + base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 64\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 8\n"; + } + + base_compute_defines = base_compute_define_text.ascii(); +} + void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines) { ERR_FAIL_COND(variant_defines.size()); ERR_FAIL_COND(p_variant_defines.size() == 0); diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index a3474c6f93..e0f4dcf2d0 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -99,8 +99,10 @@ class ShaderRD { const char *name; + CharString base_compute_defines; + protected: - ShaderRD() {} + ShaderRD(); void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); public: diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl index ea203c8abe..83e2ae9cbd 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl @@ -89,12 +89,6 @@ MATERIAL_UNIFORMS } material; #endif -/* clang-format off */ - -VERTEX_SHADER_GLOBALS - -/* clang-format on */ - invariant gl_Position; #ifdef MODE_DUAL_PARABOLOID @@ -103,28 +97,43 @@ layout(location = 8) out float dp_clip; #endif +layout(location = 9) out flat uint instance_index; + +/* clang-format off */ + +VERTEX_SHADER_GLOBALS + +/* clang-format on */ + void main() { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; #endif - mat4 world_matrix = draw_call.transform; + instance_index = draw_call.instance_index; + + bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); + if (!is_multimesh) { + instance_index += gl_InstanceIndex; + } + + mat4 world_matrix = instances.data[instance_index].transform; mat3 world_normal_matrix; - if (bool(draw_call.flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { world_normal_matrix = inverse(mat3(world_matrix)); } else { world_normal_matrix = mat3(world_matrix); } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH)) { + if (is_multimesh) { //multimesh, instances are for it - uint offset = (draw_call.flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; + uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; offset *= gl_InstanceIndex; mat4 matrix; - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); offset += 2; } else { @@ -132,14 +141,14 @@ void main() { offset += 3; } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { #ifdef COLOR_USED color_interp *= transforms.data[offset]; #endif offset += 1; } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { instance_custom = transforms.data[offset]; } @@ -161,7 +170,7 @@ void main() { #endif #if 0 - if (bool(draw_call.flags & INSTANCE_FLAGS_SKELETON)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_SKELETON)) { //multimesh, instances are for it uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3; @@ -304,7 +313,8 @@ VERTEX_SHADER_CODE #endif #ifdef MODE_RENDER_MATERIAL if (scene_data.material_uv2_mode) { - gl_Position.xy = (uv2_attrib.xy + draw_call.lightmap_uv_scale.xy) * 2.0 - 1.0; + vec2 uv_offset = unpackHalf2x16(draw_call.uv_offset); + gl_Position.xy = (uv2_attrib.xy + uv_offset) * 2.0 - 1.0; gl_Position.z = 0.00001; gl_Position.w = 1.0; } @@ -350,9 +360,11 @@ layout(location = 8) in float dp_clip; #endif +layout(location = 9) in flat uint instance_index; + //defines to keep compatibility with vertex -#define world_matrix draw_call.transform +#define world_matrix instances.data[instance_index].transform #define projection_matrix scene_data.projection_matrix #if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE) @@ -1770,7 +1782,7 @@ vec4 fog_process(vec3 vertex) { } } - float fog_amount = 1.0 - exp(vertex.z * scene_data.fog_density); + float fog_amount = 1.0 - exp(min(0.0, vertex.z * scene_data.fog_density)); if (abs(scene_data.fog_height_density) > 0.001) { float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; @@ -2083,7 +2095,7 @@ FRAGMENT_SHADER_CODE #endif uint decal_index = 32 * i + bit; - if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) { + if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2210,8 +2222,8 @@ FRAGMENT_SHADER_CODE #ifdef USE_LIGHTMAP //lightmap - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture - uint index = draw_call.gi_offset; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture + uint index = instances.data[instance_index].gi_offset; vec3 wnormal = mat3(scene_data.camera_matrix) * normal; const float c1 = 0.429043; @@ -2230,12 +2242,12 @@ FRAGMENT_SHADER_CODE 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); - } else if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap - bool uses_sh = bool(draw_call.flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); - uint ofs = draw_call.gi_offset & 0xFFFF; + } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap + bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); + uint ofs = instances.data[instance_index].gi_offset & 0xFFFF; vec3 uvw; - uvw.xy = uv2 * draw_call.lightmap_uv_scale.zw + draw_call.lightmap_uv_scale.xy; - uvw.z = float((draw_call.gi_offset >> 16) & 0xFFFF); + uvw.xy = uv2 * instances.data[instance_index].lightmap_uv_scale.zw + instances.data[instance_index].lightmap_uv_scale.xy; + uvw.z = float((instances.data[instance_index].gi_offset >> 16) & 0xFFFF); if (uses_sh) { uvw.z *= 4.0; //SH textures use 4 times more data @@ -2244,7 +2256,7 @@ FRAGMENT_SHADER_CODE vec3 lm_light_l1_0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb; vec3 lm_light_l1p1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb; - uint idx = draw_call.gi_offset >> 20; + uint idx = instances.data[instance_index].gi_offset >> 20; vec3 n = normalize(lightmaps.data[idx].normal_xform * normal); ambient_light += lm_light_l0 * 0.282095f; @@ -2264,7 +2276,7 @@ FRAGMENT_SHADER_CODE } #elif defined(USE_FORWARD_GI) - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture //make vertex orientation the world one, but still align to camera vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex; @@ -2336,9 +2348,9 @@ FRAGMENT_SHADER_CODE } } - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = draw_call.gi_offset & 0xFFFF; + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; vec3 ref_vec = normalize(reflect(normalize(vertex), normal)); //find arbitrary tangent and bitangent, then build a matrix vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); @@ -2350,7 +2362,7 @@ FRAGMENT_SHADER_CODE vec4 spec_accum = vec4(0.0); gi_probe_compute(index1, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); - uint index2 = draw_call.gi_offset >> 16; + uint index2 = instances.data[instance_index].gi_offset >> 16; if (index2 != 0xFFFF) { gi_probe_compute(index2, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); @@ -2369,7 +2381,7 @@ FRAGMENT_SHADER_CODE } #elif !defined(LOW_END_MODE) - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers vec2 coord; @@ -2448,7 +2460,7 @@ FRAGMENT_SHADER_CODE #endif uint reflection_index = 32 * i + bit; - if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) { + if (!bool(reflections.data[reflection_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2519,7 +2531,7 @@ FRAGMENT_SHADER_CODE break; } - if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2838,7 +2850,7 @@ FRAGMENT_SHADER_CODE break; } - if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2968,7 +2980,7 @@ FRAGMENT_SHADER_CODE #endif uint light_index = 32 * i + bit; - if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + if (!bool(omni_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -3041,7 +3053,7 @@ FRAGMENT_SHADER_CODE uint light_index = 32 * i + bit; - if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) { + if (!bool(spot_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -3214,9 +3226,9 @@ FRAGMENT_SHADER_CODE normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); #ifdef MODE_RENDER_GIPROBE - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = draw_call.gi_offset & 0xFFFF; - uint index2 = draw_call.gi_offset >> 16; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; + uint index2 = instances.data[instance_index].gi_offset >> 16; giprobe_buffer.x = index1 & 0xFF; giprobe_buffer.y = index2 & 0xFF; } else { @@ -3275,6 +3287,7 @@ FRAGMENT_SHADER_CODE // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + ; #endif //MODE_MULTIPLE_RENDER_TARGETS diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl index e9b79e1560..d78890fa9e 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl @@ -21,12 +21,10 @@ #endif layout(push_constant, binding = 0, std430) uniform DrawCall { - mat4 transform; - uint flags; - uint instance_uniforms_ofs; //base offset in global buffer for instance variables - uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) - uint layer_mask; - vec4 lightmap_uv_scale; + uint instance_index; + uint uv_offset; + uint pad0; + uint pad1; } draw_call; @@ -45,96 +43,13 @@ draw_call; #define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 #define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 -layout(set = 0, binding = 1) uniform sampler material_samplers[12]; - -layout(set = 0, binding = 2) uniform sampler shadow_sampler; - #define SDFGI_MAX_CASCADES 8 -layout(set = 0, binding = 3, std140) uniform SceneData { - mat4 projection_matrix; - mat4 inv_projection_matrix; - - mat4 camera_matrix; - mat4 inv_camera_matrix; - - vec2 viewport_size; - vec2 screen_pixel_size; - - uint cluster_shift; - uint cluster_width; - uint cluster_type_size; - uint max_cluster_element_count_div_32; - - //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted - vec4 directional_penumbra_shadow_kernel[32]; - vec4 directional_soft_shadow_kernel[32]; - vec4 penumbra_shadow_kernel[32]; - vec4 soft_shadow_kernel[32]; - - uint directional_penumbra_shadow_samples; - uint directional_soft_shadow_samples; - uint penumbra_shadow_samples; - uint soft_shadow_samples; - - vec4 ambient_light_color_energy; +/* Set 1: Base Pass (never changes) */ - float ambient_color_sky_mix; - bool use_ambient_light; - bool use_ambient_cubemap; - bool use_reflection_cubemap; - - mat3 radiance_inverse_xform; - - vec2 shadow_atlas_pixel_size; - vec2 directional_shadow_pixel_size; - - uint directional_light_count; - float dual_paraboloid_side; - float z_far; - float z_near; - - bool ssao_enabled; - float ssao_light_affect; - float ssao_ao_affect; - bool roughness_limiter_enabled; - - float roughness_limiter_amount; - float roughness_limiter_limit; - uvec2 roughness_limiter_pad; - - vec4 ao_color; - - mat4 sdf_to_bounds; - - ivec3 sdf_offset; - bool material_uv2_mode; - - ivec3 sdf_size; - bool gi_upscale_for_msaa; - - bool volumetric_fog_enabled; - float volumetric_fog_inv_length; - float volumetric_fog_detail_spread; - uint volumetric_fog_pad; - - bool fog_enabled; - float fog_density; - float fog_height; - float fog_height_density; - - vec3 fog_light_color; - float fog_sun_scatter; - - float fog_aerial_perspective; - - float time; - float reflection_multiplier; // one normally, zero when rendering reflections - - bool pancake_shadows; -} +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; -scene_data; +layout(set = 0, binding = 2) uniform sampler shadow_sampler; #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 6) #define INSTANCE_FLAGS_USE_SDFGI (1 << 7) @@ -153,22 +68,22 @@ scene_data; #define INSTANCE_FLAGS_SKELETON (1 << 19) #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20) -layout(set = 0, binding = 4, std430) restrict readonly buffer OmniLights { +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { LightData data[]; } omni_lights; -layout(set = 0, binding = 5, std430) restrict readonly buffer SpotLights { +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { LightData data[]; } spot_lights; -layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData { +layout(set = 0, binding = 5) buffer restrict readonly ReflectionProbeData { ReflectionData data[]; } reflections; -layout(set = 0, binding = 7, std140) uniform DirectionalLights { +layout(set = 0, binding = 6, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; @@ -180,7 +95,7 @@ struct Lightmap { mat3 normal_xform; }; -layout(set = 0, binding = 8, std140) restrict readonly buffer Lightmaps { +layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps { Lightmap data[]; } lightmaps; @@ -189,20 +104,20 @@ struct LightmapCapture { vec4 sh[9]; }; -layout(set = 0, binding = 9, std140) restrict readonly buffer LightmapCaptures { +layout(set = 0, binding = 8, std140) restrict readonly buffer LightmapCaptures { LightmapCapture data[]; } lightmap_captures; -layout(set = 0, binding = 10) uniform texture2D decal_atlas; -layout(set = 0, binding = 11) uniform texture2D decal_atlas_srgb; +layout(set = 0, binding = 9) uniform texture2D decal_atlas; +layout(set = 0, binding = 10) uniform texture2D decal_atlas_srgb; -layout(set = 0, binding = 12, std430) restrict readonly buffer Decals { +layout(set = 0, binding = 11, std430) restrict readonly buffer Decals { DecalData data[]; } decals; -layout(set = 0, binding = 13, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 12, std430) restrict readonly buffer GlobalVariableData { vec4 data[]; } global_variables; @@ -216,7 +131,7 @@ struct SDFGIProbeCascadeData { float to_cell; // 1/bounds * grid_size }; -layout(set = 0, binding = 14, std140) uniform SDFGI { +layout(set = 0, binding = 13, std140) uniform SDFGI { vec3 grid_size; uint max_cascades; @@ -246,47 +161,140 @@ sdfgi; #endif //LOW_END_MODE -// decal atlas +/* Set 2: Render Pass (changes per render pass) */ -/* Set 1, Radiance */ +layout(set = 1, binding = 0, std140) uniform SceneData { + mat4 projection_matrix; + mat4 inv_projection_matrix; + + mat4 camera_matrix; + mat4 inv_camera_matrix; + + vec2 viewport_size; + vec2 screen_pixel_size; + + uint cluster_shift; + uint cluster_width; + uint cluster_type_size; + uint max_cluster_element_count_div_32; + + //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted + vec4 directional_penumbra_shadow_kernel[32]; + vec4 directional_soft_shadow_kernel[32]; + vec4 penumbra_shadow_kernel[32]; + vec4 soft_shadow_kernel[32]; + + uint directional_penumbra_shadow_samples; + uint directional_soft_shadow_samples; + uint penumbra_shadow_samples; + uint soft_shadow_samples; + + vec4 ambient_light_color_energy; + + float ambient_color_sky_mix; + bool use_ambient_light; + bool use_ambient_cubemap; + bool use_reflection_cubemap; + + mat3 radiance_inverse_xform; + + vec2 shadow_atlas_pixel_size; + vec2 directional_shadow_pixel_size; + + uint directional_light_count; + float dual_paraboloid_side; + float z_far; + float z_near; + + bool ssao_enabled; + float ssao_light_affect; + float ssao_ao_affect; + bool roughness_limiter_enabled; + + float roughness_limiter_amount; + float roughness_limiter_limit; + uvec2 roughness_limiter_pad; + + vec4 ao_color; + + mat4 sdf_to_bounds; + + ivec3 sdf_offset; + bool material_uv2_mode; + + ivec3 sdf_size; + bool gi_upscale_for_msaa; + + bool volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + uint volumetric_fog_pad; + + bool fog_enabled; + float fog_density; + float fog_height; + float fog_height_density; + + vec3 fog_light_color; + float fog_sun_scatter; + + float fog_aerial_perspective; + + float time; + float reflection_multiplier; // one normally, zero when rendering reflections + + bool pancake_shadows; +} + +scene_data; + +struct InstanceData { + mat4 transform; + uint flags; + uint instance_uniforms_ofs; //base offset in global buffer for instance variables + uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint layer_mask; + vec4 lightmap_uv_scale; +}; + +layout(set = 1, binding = 1, std430) buffer restrict readonly InstanceDataBuffer { + InstanceData data[]; +} +instances; #ifdef USE_RADIANCE_CUBEMAP_ARRAY -layout(set = 1, binding = 0) uniform textureCubeArray radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCubeArray radiance_cubemap; #else -layout(set = 1, binding = 0) uniform textureCube radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCube radiance_cubemap; #endif -/* Set 2, Reflection and Shadow Atlases (view dependent) */ - -layout(set = 1, binding = 1) uniform textureCubeArray reflection_atlas; +layout(set = 1, binding = 3) uniform textureCubeArray reflection_atlas; -layout(set = 1, binding = 2) uniform texture2D shadow_atlas; +layout(set = 1, binding = 4) uniform texture2D shadow_atlas; -layout(set = 1, binding = 3) uniform texture2D directional_shadow_atlas; +layout(set = 1, binding = 5) uniform texture2D directional_shadow_atlas; -layout(set = 1, binding = 4) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; +layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; -#ifndef LOW_END_MODE -layout(set = 1, binding = 5) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; +#ifndef LOW_END_MOD +layout(set = 1, binding = 7) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; #endif -layout(set = 1, binding = 6, std430) buffer restrict readonly ClusterBuffer { +layout(set = 1, binding = 8, std430) buffer restrict readonly ClusterBuffer { uint data[]; } cluster_buffer; -/* Set 3, Render Buffers */ - #ifdef MODE_RENDER_SDF -layout(r16ui, set = 1, binding = 7) uniform restrict writeonly uimage3D albedo_volume_grid; -layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_grid; -layout(r32ui, set = 1, binding = 9) uniform restrict writeonly uimage3D emission_aniso_grid; -layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid; +layout(r16ui, set = 1, binding = 9) uniform restrict writeonly uimage3D albedo_volume_grid; +layout(r32ui, set = 1, binding = 10) uniform restrict writeonly uimage3D emission_grid; +layout(r32ui, set = 1, binding = 11) uniform restrict writeonly uimage3D emission_aniso_grid; +layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid; //still need to be present for shaders that use it, so remap them to something #define depth_buffer shadow_atlas @@ -295,17 +303,17 @@ layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid; #else -layout(set = 1, binding = 7) uniform texture2D depth_buffer; -layout(set = 1, binding = 8) uniform texture2D color_buffer; +layout(set = 1, binding = 9) uniform texture2D depth_buffer; +layout(set = 1, binding = 10) uniform texture2D color_buffer; #ifndef LOW_END_MODE -layout(set = 1, binding = 9) uniform texture2D normal_roughness_buffer; -layout(set = 1, binding = 10) uniform texture2D ao_buffer; -layout(set = 1, binding = 11) uniform texture2D ambient_buffer; -layout(set = 1, binding = 12) uniform texture2D reflection_buffer; -layout(set = 1, binding = 13) uniform texture2DArray sdfgi_lightprobe_texture; -layout(set = 1, binding = 14) uniform texture3D sdfgi_occlusion_cascades; +layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer; +layout(set = 1, binding = 12) uniform texture2D ao_buffer; +layout(set = 1, binding = 13) uniform texture2D ambient_buffer; +layout(set = 1, binding = 14) uniform texture2D reflection_buffer; +layout(set = 1, binding = 15) uniform texture2DArray sdfgi_lightprobe_texture; +layout(set = 1, binding = 16) uniform texture3D sdfgi_occlusion_cascades; struct GIProbeData { mat4 xform; @@ -323,22 +331,22 @@ struct GIProbeData { uint mipmaps; }; -layout(set = 1, binding = 15, std140) uniform GIProbes { +layout(set = 1, binding = 17, std140) uniform GIProbes { GIProbeData data[MAX_GI_PROBES]; } gi_probes; -layout(set = 1, binding = 16) uniform texture3D volumetric_fog_texture; +layout(set = 1, binding = 18) uniform texture3D volumetric_fog_texture; #endif // LOW_END_MODE #endif -/* Set 4 Skeleton & Instancing (Multimesh) */ +/* Set 2 Skeleton & Instancing (can change per item) */ layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms { vec4 data[]; } transforms; -/* Set 5 User Material */ +/* Set 3 User Material */ diff --git a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl index 29443ae7db..a29b24e560 100644 --- a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl +++ b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl @@ -6,8 +6,20 @@ VERSION_DEFINES #define BLOCK_SIZE 8 +#ifdef MODE_REDUCE_SUBGROUP + +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable + +//nvidia friendly, max 32 +layout(local_size_x = 8, local_size_y = 4, local_size_z = 1) in; + +#else + layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; +#endif + #ifdef MODE_REDUCE shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE]; @@ -16,8 +28,12 @@ const uint unswizzle_table[BLOCK_SIZE] = uint[](0, 0, 0, 1, 0, 2, 1, 3); #endif -layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_depth; -layout(r32f, set = 0, binding = 1) uniform restrict writeonly image2D dst_depth; +#if defined(MODE_REDUCE) || defined(MODE_REDUCE_SUBGROUP) +layout(set = 0, binding = 0) uniform sampler2D source_depth; +#else +layout(r16, set = 0, binding = 0) uniform restrict readonly image2D source_depth; +#endif +layout(r16, set = 1, binding = 0) uniform restrict writeonly image2D dst_depth; layout(push_constant, binding = 1, std430) uniform Params { ivec2 source_size; @@ -29,6 +45,48 @@ layout(push_constant, binding = 1, std430) uniform Params { params; void main() { +#ifdef MODE_REDUCE_SUBGROUP + + uvec2 local_pos = gl_LocalInvocationID.xy; + ivec2 image_offset = params.source_offset; + ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy * ivec2(1, 2)); + + float depth = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r; + depth += texelFetch(source_depth, min(image_pos + ivec2(0, 1), params.source_size - ivec2(1)), 0).r; + depth *= 0.5; + +#ifdef MODE_REDUCE_8 + //fast version, reduce all + float depth_average = subgroupAdd(depth) / 32.0; + if (local_pos == uvec2(0)) { + imageStore(dst_depth, image_pos / 8, vec4(depth_average)); + } +#else + //bit slower version, reduce by regions + uint group_size = (8 / params.min_size); + uvec2 group_id = local_pos / (8 / params.min_size); + + uvec4 mask; + float depth_average = 0; + + while (true) { + uvec2 first = subgroupBroadcastFirst(group_id); + mask = subgroupBallot(first == group_id); + if (first == group_id) { + depth_average = subgroupAdd(depth); + break; + } + } + + depth_average /= float(group_size * group_size); + + if (local_pos == group_id) { + imageStore(dst_depth, image_pos / int(group_size), vec4(depth_average)); + } +#endif + +#endif + #ifdef MODE_REDUCE uvec2 pos = gl_LocalInvocationID.xy; @@ -36,7 +94,7 @@ void main() { ivec2 image_offset = params.source_offset; ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy); uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x]; - tmp_data[dst_t] = imageLoad(source_depth, min(image_pos, params.source_size - ivec2(1))).r; + tmp_data[dst_t] = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r; ivec2 image_size = params.source_size; uint t = pos.y * BLOCK_SIZE + pos.x; diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp index db601ba49c..8067f9574c 100644 --- a/servers/rendering/renderer_scene_cull.cpp +++ b/servers/rendering/renderer_scene_cull.cpp @@ -1906,6 +1906,9 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons RS::LightOmniShadowMode shadow_mode = RSG::storage->light_omni_get_shadow_mode(p_instance->base); if (shadow_mode == RS::LIGHT_OMNI_SHADOW_DUAL_PARABOLOID || !scene_render->light_instances_can_render_shadow_cube()) { + if (max_shadows_used + 2 > MAX_UPDATE_SHADOWS) { + return true; + } for (int i = 0; i < 2; i++) { //using this one ensures that raster deferred will have it RENDER_TIMESTAMP("Culling Shadow Paraboloid" + itos(i)); @@ -1922,7 +1925,6 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons planes.write[4] = light_transform.xform(Plane(Vector3(0, -1, z).normalized(), radius)); planes.write[5] = light_transform.xform(Plane(Vector3(0, 0, -z), 0)); - geometry_instances_to_shadow_render.clear(); instance_shadow_cull_result.clear(); Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size()); @@ -1943,6 +1945,8 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons Plane near_plane(light_transform.origin, light_transform.basis.get_axis(2) * z); + RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++]; + for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows) { @@ -1957,16 +1961,21 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons } } - geometry_instances_to_shadow_render.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); + shadow_data.instances.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); } RSG::storage->update_mesh_instances(); scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, i, 0); - scene_render->render_shadow(light->instance, p_shadow_atlas, i, geometry_instances_to_shadow_render); + shadow_data.light = light->instance; + shadow_data.pass = i; } } else { //shadow cube + if (max_shadows_used + 6 > MAX_UPDATE_SHADOWS) { + return true; + } + real_t radius = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_RANGE); CameraMatrix cm; cm.set_perspective(90, 1, 0.01, radius); @@ -1996,7 +2005,6 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons Vector<Plane> planes = cm.get_projection_planes(xform); - geometry_instances_to_shadow_render.clear(); instance_shadow_cull_result.clear(); Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size()); @@ -2015,7 +2023,7 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons p_scenario->indexers[Scenario::INDEXER_GEOMETRY].convex_query(planes.ptr(), planes.size(), points.ptr(), points.size(), cull_convex); - Plane near_plane(xform.origin, -xform.basis.get_axis(2)); + RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++]; for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; @@ -2030,22 +2038,28 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons } } - geometry_instances_to_shadow_render.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); + shadow_data.instances.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); } RSG::storage->update_mesh_instances(); scene_render->light_instance_set_shadow_transform(light->instance, cm, xform, radius, 0, i, 0); - scene_render->render_shadow(light->instance, p_shadow_atlas, i, geometry_instances_to_shadow_render); + + shadow_data.light = light->instance; + shadow_data.pass = i; } //restore the regular DP matrix - scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, 0, 0); + //scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, 0, 0); } } break; case RS::LIGHT_SPOT: { RENDER_TIMESTAMP("Culling Spot Light"); + if (max_shadows_used + 1 > MAX_UPDATE_SHADOWS) { + return true; + } + real_t radius = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_RANGE); real_t angle = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_SPOT_ANGLE); @@ -2054,7 +2068,6 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons Vector<Plane> planes = cm.get_projection_planes(light_transform); - geometry_instances_to_shadow_render.clear(); instance_shadow_cull_result.clear(); Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size()); @@ -2073,7 +2086,7 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons p_scenario->indexers[Scenario::INDEXER_GEOMETRY].convex_query(planes.ptr(), planes.size(), points.ptr(), points.size(), cull_convex); - Plane near_plane(light_transform.origin, -light_transform.basis.get_axis(2)); + RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++]; for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; @@ -2088,13 +2101,14 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons RSG::storage->mesh_instance_check_for_update(instance->mesh_instance); } } - geometry_instances_to_shadow_render.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); + shadow_data.instances.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); } RSG::storage->update_mesh_instances(); scene_render->light_instance_set_shadow_transform(light->instance, cm, light_transform, radius, 0, 0, 0); - scene_render->render_shadow(light->instance, p_shadow_atlas, 0, geometry_instances_to_shadow_render); + shadow_data.light = light->instance; + shadow_data.pass = 0; } break; } @@ -2147,14 +2161,13 @@ void RendererSceneCull::render_camera(RID p_render_buffers, RID p_camera, RID p_ RID environment = _render_get_environment(p_camera, p_scenario); - _prepare_scene(camera->transform, camera_matrix, ortho, camera->vaspect, p_render_buffers, environment, camera->visible_layers, p_scenario, p_shadow_atlas, RID(), p_screen_lod_threshold); - _render_scene(p_render_buffers, camera->transform, camera_matrix, ortho, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold); + _render_scene(camera->transform, camera_matrix, ortho, camera->vaspect, p_render_buffers, environment, camera->effects, camera->visible_layers, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold); #endif } void RendererSceneCull::render_camera(RID p_render_buffers, Ref<XRInterface> &p_interface, XRInterface::Eyes p_eye, RID p_camera, RID p_scenario, Size2 p_viewport_size, float p_screen_lod_threshold, RID p_shadow_atlas) { // render for AR/VR interface - +#if 0 Camera *camera = camera_owner.getornull(p_camera); ERR_FAIL_COND(!camera); @@ -2234,6 +2247,7 @@ void RendererSceneCull::render_camera(RID p_render_buffers, Ref<XRInterface> &p_ // And render our scene... _render_scene(p_render_buffers, cam_transform, camera_matrix, false, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold); +#endif }; void RendererSceneCull::_frustum_cull_threaded(uint32_t p_thread, FrustumCullData *cull_data) { @@ -2452,7 +2466,7 @@ void RendererSceneCull::_frustum_cull(FrustumCullData &cull_data, FrustumCullRes } } -void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows) { +void RendererSceneCull::_render_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, RID p_force_camera_effects, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, bool p_using_shadows) { // Note, in stereo rendering: // - p_cam_transform will be a transform in the middle of our two eyes // - p_cam_projection is a wider frustrum that encompasses both eyes @@ -2466,6 +2480,7 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca scene_render->set_scene_pass(render_pass); if (p_render_buffers.is_valid()) { + //no rendering code here, this is only to set up what needs to be done, request regions, etc. scene_render->sdfgi_update(p_render_buffers, p_environment, p_cam_transform.origin); //update conditions for SDFGI (whether its used or not) } @@ -2596,62 +2611,28 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca //render shadows - for (uint32_t i = 0; i < cull.shadow_count; i++) { - for (uint32_t j = 0; j < cull.shadows[i].cascade_count; j++) { - const Cull::Shadow::Cascade &c = cull.shadows[i].cascades[j]; - // print_line("shadow " + itos(i) + " cascade " + itos(j) + " elements: " + itos(c.cull_result.size())); - scene_render->light_instance_set_shadow_transform(cull.shadows[i].light_instance, c.projection, c.transform, c.zfar, c.split, j, c.shadow_texel_size, c.bias_scale, c.range_begin, c.uv_scale); - scene_render->render_shadow(cull.shadows[i].light_instance, p_shadow_atlas, j, frustum_cull_result.directional_shadows[i].cascade_geometry_instances[j], near_plane, p_cam_projection.get_lod_multiplier(), p_screen_lod_threshold); - } - } + max_shadows_used = 0; - //render SDFGI + if (p_using_shadows) { //setup shadow maps - { - if (cull.sdfgi.region_count > 0) { - //update regions - for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) { - scene_render->render_sdfgi(p_render_buffers, i, frustum_cull_result.sdfgi_region_geometry_instances[i]); - } - //check if static lights were culled - bool static_lights_culled = false; - for (uint32_t i = 0; i < cull.sdfgi.cascade_light_count; i++) { - if (frustum_cull_result.sdfgi_cascade_lights[i].size()) { - static_lights_culled = true; - break; - } - } + // Directional Shadows - if (static_lights_culled) { - scene_render->render_sdfgi_static_lights(p_render_buffers, cull.sdfgi.cascade_light_count, cull.sdfgi.cascade_light_index, frustum_cull_result.sdfgi_cascade_lights); + for (uint32_t i = 0; i < cull.shadow_count; i++) { + for (uint32_t j = 0; j < cull.shadows[i].cascade_count; j++) { + const Cull::Shadow::Cascade &c = cull.shadows[i].cascades[j]; + // print_line("shadow " + itos(i) + " cascade " + itos(j) + " elements: " + itos(c.cull_result.size())); + scene_render->light_instance_set_shadow_transform(cull.shadows[i].light_instance, c.projection, c.transform, c.zfar, c.split, j, c.shadow_texel_size, c.bias_scale, c.range_begin, c.uv_scale); + if (max_shadows_used == MAX_UPDATE_SHADOWS) { + continue; + } + render_shadow_data[max_shadows_used].light = cull.shadows[i].light_instance; + render_shadow_data[max_shadows_used].pass = j; + render_shadow_data[max_shadows_used].instances.merge_unordered(frustum_cull_result.directional_shadows[i].cascade_geometry_instances[j]); + max_shadows_used++; } } - if (p_render_buffers.is_valid()) { - scene_render->sdfgi_update_probes(p_render_buffers, p_environment, directional_lights, scenario->dynamic_lights.ptr(), scenario->dynamic_lights.size()); - } - } - - //light_samplers_culled=0; - - /* - print_line("OT: "+rtos( (OS::get_singleton()->get_ticks_usec()-t)/1000.0)); - print_line("OTO: "+itos(p_scenario->octree.get_octant_count())); - print_line("OTE: "+itos(p_scenario->octree.get_elem_count())); - print_line("OTP: "+itos(p_scenario->octree.get_pair_count())); - */ - - /* STEP 3 - PROCESS PORTALS, VALIDATE ROOMS */ - //removed, will replace with culling - - /* STEP 4 - REMOVE FURTHER CULLED OBJECTS, ADD LIGHTS */ - - /* STEP 5 - PROCESS POSITIONAL LIGHTS */ - - if (p_using_shadows) { //setup shadow maps - - //SortArray<Instance*,_InstanceLightsort> sorter; - //sorter.sort(light_cull_result,light_cull_count); + // Positional Shadowss for (uint32_t i = 0; i < (uint32_t)frustum_cull_result.lights.size(); i++) { Instance *ins = frustum_cull_result.lights[i]; @@ -2738,12 +2719,49 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca bool redraw = scene_render->shadow_atlas_update_light(p_shadow_atlas, light->instance, coverage, light->last_version); - if (redraw) { + if (redraw && max_shadows_used < MAX_UPDATE_SHADOWS) { //must redraw! RENDER_TIMESTAMP(">Rendering Light " + itos(i)); light->shadow_dirty = _light_instance_update_shadow(ins, p_cam_transform, p_cam_projection, p_cam_orthogonal, p_cam_vaspect, p_shadow_atlas, scenario, p_screen_lod_threshold); RENDER_TIMESTAMP("<Rendering Light " + itos(i)); + } else { + light->shadow_dirty = redraw; + } + } + } + + //render SDFGI + + { + sdfgi_update_data.update_static = false; + + if (cull.sdfgi.region_count > 0) { + //update regions + for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) { + render_sdfgi_data[i].instances.merge_unordered(frustum_cull_result.sdfgi_region_geometry_instances[i]); + render_sdfgi_data[i].region = i; } + //check if static lights were culled + bool static_lights_culled = false; + for (uint32_t i = 0; i < cull.sdfgi.cascade_light_count; i++) { + if (frustum_cull_result.sdfgi_cascade_lights[i].size()) { + static_lights_culled = true; + break; + } + } + + if (static_lights_culled) { + sdfgi_update_data.static_cascade_count = cull.sdfgi.cascade_light_count; + sdfgi_update_data.static_cascade_indices = cull.sdfgi.cascade_light_index; + sdfgi_update_data.static_positional_lights = frustum_cull_result.sdfgi_cascade_lights; + sdfgi_update_data.update_static = true; + } + } + + if (p_render_buffers.is_valid()) { + sdfgi_update_data.directional_lights = &directional_lights; + sdfgi_update_data.positional_light_instances = scenario->dynamic_lights.ptr(); + sdfgi_update_data.positional_light_count = scenario->dynamic_lights.size(); } } @@ -2751,6 +2769,28 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca for (int i = 0; i < directional_lights.size(); i++) { frustum_cull_result.light_instances.push_back(directional_lights[i]); } + + RID camera_effects; + if (p_force_camera_effects.is_valid()) { + camera_effects = p_force_camera_effects; + } else { + camera_effects = scenario->camera_effects; + } + /* PROCESS GEOMETRY AND DRAW SCENE */ + + RENDER_TIMESTAMP("Render Scene "); + scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, frustum_cull_result.geometry_instances, frustum_cull_result.light_instances, frustum_cull_result.reflections, frustum_cull_result.gi_probes, frustum_cull_result.decals, frustum_cull_result.lightmaps, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold, render_shadow_data, max_shadows_used, render_sdfgi_data, cull.sdfgi.region_count, &sdfgi_update_data); + + for (uint32_t i = 0; i < max_shadows_used; i++) { + render_shadow_data[i].instances.clear(); + } + max_shadows_used = 0; + + for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) { + render_sdfgi_data[i].instances.clear(); + } + + // virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold,const RenderShadowData *p_render_shadows,int p_render_shadow_count,const RenderSDFGIData *p_render_sdfgi_regions,int p_render_sdfgi_region_count,const RenderSDFGIStaticLightData *p_render_sdfgi_static_lights=nullptr) = 0; } RID RendererSceneCull::_render_get_environment(RID p_camera, RID p_scenario) { @@ -2774,21 +2814,6 @@ RID RendererSceneCull::_render_get_environment(RID p_camera, RID p_scenario) { return RID(); } -void RendererSceneCull::_render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) { - Scenario *scenario = scenario_owner.getornull(p_scenario); - - RID camera_effects; - if (p_force_camera_effects.is_valid()) { - camera_effects = p_force_camera_effects; - } else { - camera_effects = scenario->camera_effects; - } - /* PROCESS GEOMETRY AND DRAW SCENE */ - - RENDER_TIMESTAMP("Render Scene "); - scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, frustum_cull_result.geometry_instances, frustum_cull_result.light_instances, frustum_cull_result.reflections, frustum_cull_result.gi_probes, frustum_cull_result.decals, frustum_cull_result.lightmaps, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold); -} - void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas) { #ifndef _3D_DISABLED @@ -2801,7 +2826,7 @@ void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario, environment = scenario->fallback_environment; } RENDER_TIMESTAMP("Render Empty Scene "); - scene_render->render_scene(p_render_buffers, Transform(), CameraMatrix(), true, PagedArray<RendererSceneRender::GeometryInstance *>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), RID(), RID(), p_shadow_atlas, scenario->reflection_atlas, RID(), 0, 0); + scene_render->render_scene(p_render_buffers, Transform(), CameraMatrix(), true, PagedArray<RendererSceneRender::GeometryInstance *>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), RID(), RID(), p_shadow_atlas, scenario->reflection_atlas, RID(), 0, 0, nullptr, 0, nullptr, 0, nullptr); #endif } @@ -2864,8 +2889,7 @@ bool RendererSceneCull::_render_reflection_probe_step(Instance *p_instance, int } RENDER_TIMESTAMP("Render Reflection Probe, Step " + itos(p_step)); - _prepare_scene(xform, cm, false, false, RID(), RID(), RSG::storage->reflection_probe_get_cull_mask(p_instance->base), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, lod_threshold, use_shadows); - _render_scene(RID(), xform, cm, false, RID(), RID(), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, p_step, lod_threshold); + _render_scene(xform, cm, false, false, RID(), RID(), RID(), RSG::storage->reflection_probe_get_cull_mask(p_instance->base), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, p_step, lod_threshold, use_shadows); } else { //do roughness postprocess step until it believes it's done @@ -3493,7 +3517,12 @@ RendererSceneCull::RendererSceneCull() { instance_cull_result.set_page_pool(&instance_cull_page_pool); instance_shadow_cull_result.set_page_pool(&instance_cull_page_pool); - geometry_instances_to_shadow_render.set_page_pool(&geometry_instance_cull_page_pool); + for (uint32_t i = 0; i < MAX_UPDATE_SHADOWS; i++) { + render_shadow_data[i].instances.set_page_pool(&geometry_instance_cull_page_pool); + } + for (uint32_t i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { + render_sdfgi_data[i].instances.set_page_pool(&geometry_instance_cull_page_pool); + } frustum_cull_result.init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool); frustum_cull_result_threads.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); @@ -3510,7 +3539,12 @@ RendererSceneCull::~RendererSceneCull() { instance_cull_result.reset(); instance_shadow_cull_result.reset(); - geometry_instances_to_shadow_render.reset(); + for (uint32_t i = 0; i < MAX_UPDATE_SHADOWS; i++) { + render_shadow_data[i].instances.reset(); + } + for (uint32_t i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) { + render_sdfgi_data[i].instances.reset(); + } frustum_cull_result.reset(); for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) { diff --git a/servers/rendering/renderer_scene_cull.h b/servers/rendering/renderer_scene_cull.h index 2ffaf48675..a04e336f10 100644 --- a/servers/rendering/renderer_scene_cull.h +++ b/servers/rendering/renderer_scene_cull.h @@ -54,7 +54,8 @@ public: enum { SDFGI_MAX_CASCADES = 8, SDFGI_MAX_REGIONS_PER_CASCADE = 3, - MAX_INSTANCE_PAIRS = 32 + MAX_INSTANCE_PAIRS = 32, + MAX_UPDATE_SHADOWS = 512 }; uint64_t render_pass; @@ -696,7 +697,6 @@ public: PagedArray<Instance *> instance_cull_result; PagedArray<Instance *> instance_shadow_cull_result; - PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances_to_shadow_render; struct FrustumCullResult { PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances; @@ -816,6 +816,12 @@ public: FrustumCullResult frustum_cull_result; LocalVector<FrustumCullResult> frustum_cull_result_threads; + RendererSceneRender::RenderShadowData render_shadow_data[MAX_UPDATE_SHADOWS]; + uint32_t max_shadows_used = 0; + + RendererSceneRender::RenderSDFGIData render_sdfgi_data[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; + RendererSceneRender::RenderSDFGIUpdateData sdfgi_update_data; + uint32_t thread_cull_threshold = 200; RID_PtrOwner<Instance> instance_owner; @@ -924,8 +930,7 @@ public: void _frustum_cull(FrustumCullData &cull_data, FrustumCullResult &cull_result, uint64_t p_from, uint64_t p_to); bool _render_reflection_probe_step(Instance *p_instance, int p_step); - void _prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows = true); - void _render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold); + void _render_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, RID p_force_camera_effects, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, bool p_using_shadows = true); void render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas); void render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, Size2 p_viewport_size, float p_screen_lod_threshold, RID p_shadow_atlas); diff --git a/servers/rendering/renderer_scene_render.h b/servers/rendering/renderer_scene_render.h index ecec03db94..015327f9d9 100644 --- a/servers/rendering/renderer_scene_render.h +++ b/servers/rendering/renderer_scene_render.h @@ -87,7 +87,6 @@ public: virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const = 0; virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const = 0; virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const = 0; - virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) = 0; /* SKY API */ @@ -195,12 +194,31 @@ public: virtual void gi_probe_set_quality(RS::GIProbeQuality) = 0; - virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) = 0; + struct RenderShadowData { + RID light; + int pass = 0; + PagedArray<GeometryInstance *> instances; + }; + + struct RenderSDFGIData { + int region = 0; + PagedArray<GeometryInstance *> instances; + }; + + struct RenderSDFGIUpdateData { + bool update_static = false; + uint32_t static_cascade_count; + uint32_t *static_cascade_indices; + PagedArray<RID> *static_positional_lights; + + const Vector<RID> *directional_lights; + const RID *positional_light_instances; + uint32_t positional_light_count; + }; + + virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr) = 0; - virtual void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0) = 0; virtual void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; - virtual void render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances) = 0; - virtual void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_lights) = 0; virtual void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances) = 0; virtual void set_scene_pass(uint64_t p_pass) = 0; diff --git a/servers/rendering/renderer_storage.h b/servers/rendering/renderer_storage.h index 7a80c2b0bf..f015b50eee 100644 --- a/servers/rendering/renderer_storage.h +++ b/servers/rendering/renderer_storage.h @@ -98,6 +98,7 @@ public: while (to_clean_up.size()) { to_clean_up.front()->get().first->instances.erase(to_clean_up.front()->get().second); + dependencies.erase(to_clean_up.front()->get().first); to_clean_up.pop_front(); } } diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 70497bcdb3..4b0eafe369 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -240,10 +240,6 @@ void RenderingDevice::_compute_list_set_push_constant(ComputeListID p_list, cons compute_list_set_push_constant(p_list, p_data.ptr(), p_data_size); } -void RenderingDevice::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads, uint32_t p_x_local_group, uint32_t p_y_local_group, uint32_t p_z_local_group) { - compute_list_dispatch(p_list, (p_x_threads - 1) / p_x_local_group + 1, (p_y_threads - 1) / p_y_local_group + 1, (p_z_threads - 1) / p_z_local_group + 1); -} - void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("texture_create", "format", "view", "data"), &RenderingDevice::_texture_create, DEFVAL(Array())); ClassDB::bind_method(D_METHOD("texture_create_shared", "view", "with_texture"), &RenderingDevice::_texture_create_shared); @@ -319,7 +315,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_end", "post_barrier"), &RenderingDevice::draw_list_end, DEFVAL(BARRIER_MASK_ALL)); - ClassDB::bind_method(D_METHOD("compute_list_begin"), &RenderingDevice::compute_list_begin); + ClassDB::bind_method(D_METHOD("compute_list_begin", "allow_draw_overlap"), &RenderingDevice::compute_list_begin, DEFVAL(false)); ClassDB::bind_method(D_METHOD("compute_list_bind_compute_pipeline", "compute_list", "compute_pipeline"), &RenderingDevice::compute_list_bind_compute_pipeline); ClassDB::bind_method(D_METHOD("compute_list_set_push_constant", "compute_list", "buffer", "size_bytes"), &RenderingDevice::_compute_list_set_push_constant); ClassDB::bind_method(D_METHOD("compute_list_bind_uniform_set", "compute_list", "uniform_set", "set_index"), &RenderingDevice::compute_list_bind_uniform_set); @@ -352,10 +348,15 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_command_insert_label", "name", "color"), &RenderingDevice::draw_command_insert_label); ClassDB::bind_method(D_METHOD("draw_command_end_label"), &RenderingDevice::draw_command_end_label); + ClassDB::bind_method(D_METHOD("get_device_vendor_name"), &RenderingDevice::get_device_vendor_name); + ClassDB::bind_method(D_METHOD("get_device_name"), &RenderingDevice::get_device_name); + ClassDB::bind_method(D_METHOD("get_device_pipeline_cache_uuid"), &RenderingDevice::get_device_pipeline_cache_uuid); + BIND_CONSTANT(BARRIER_MASK_RASTER); BIND_CONSTANT(BARRIER_MASK_COMPUTE); BIND_CONSTANT(BARRIER_MASK_TRANSFER); BIND_CONSTANT(BARRIER_MASK_ALL); + BIND_CONSTANT(BARRIER_MASK_NO_BARRIER); BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4_UNORM_PACK8); BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4B4A4_UNORM_PACK16); @@ -760,6 +761,7 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR); //start rendering and clear the framebuffer (supply params) BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION); //start rendering and clear the framebuffer (supply params) + BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION_CONTINUE); //continue rendering and clear the framebuffer (supply params) BIND_ENUM_CONSTANT(INITIAL_ACTION_KEEP); //start rendering); but keep attached color texture contents (depth will be cleared) BIND_ENUM_CONSTANT(INITIAL_ACTION_DROP); //start rendering); ignore what is there); just write above it BIND_ENUM_CONSTANT(INITIAL_ACTION_CONTINUE); //continue rendering (framebuffer must have been left in "continue" state as final action previously) diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 47ef54cef7..9fbf58d131 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -343,6 +343,7 @@ public: BARRIER_MASK_RASTER = 1, BARRIER_MASK_COMPUTE = 2, BARRIER_MASK_TRANSFER = 4, + BARRIER_MASK_NO_BARRIER = 8, BARRIER_MASK_ALL = BARRIER_MASK_RASTER | BARRIER_MASK_COMPUTE | BARRIER_MASK_TRANSFER }; @@ -944,6 +945,7 @@ public: enum InitialAction { INITIAL_ACTION_CLEAR, //start rendering and clear the whole framebuffer (region or not) (supply params) INITIAL_ACTION_CLEAR_REGION, //start rendering and clear the framebuffer in the specified region (supply params) + INITIAL_ACTION_CLEAR_REGION_CONTINUE, //countinue rendering and clear the framebuffer in the specified region (supply params) INITIAL_ACTION_KEEP, //start rendering, but keep attached color texture contents (depth will be cleared) INITIAL_ACTION_DROP, //start rendering, ignore what is there, just write above it INITIAL_ACTION_CONTINUE, //continue rendering (framebuffer must have been left in "continue" state as final action previously) @@ -983,12 +985,12 @@ public: typedef int64_t ComputeListID; - virtual ComputeListID compute_list_begin() = 0; + virtual ComputeListID compute_list_begin(bool p_allow_draw_overlap = false) = 0; virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) = 0; virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) = 0; virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size) = 0; virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0; - virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads, uint32_t p_x_local_group, uint32_t p_y_local_group, uint32_t p_z_local_group); + virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) = 0; virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) = 0; virtual void compute_list_add_barrier(ComputeListID p_list) = 0; @@ -1078,6 +1080,10 @@ public: virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)) = 0; virtual void draw_command_end_label() = 0; + virtual String get_device_vendor_name() const = 0; + virtual String get_device_name() const = 0; + virtual String get_device_pipeline_cache_uuid() const = 0; + static RenderingDevice *get_singleton(); RenderingDevice(); |