From 280f334f81c439d391d9934f9cf1791f074c3773 Mon Sep 17 00:00:00 2001 From: reduz Date: Mon, 25 Jan 2021 21:52:58 -0300 Subject: Reorganize RenderingDevice barriers -Removed sync to draw, now everything syncs to draw by default. -Fixed many validation layer errors. -Added support for VkImageViewUsageCreateInfo to fix validation layer warnings. -Texture, buffer, raster and compute functions now all allow spcifying which barriers will be used. --- drivers/vulkan/rendering_device_vulkan.cpp | 381 ++++++++++++++++----- drivers/vulkan/rendering_device_vulkan.h | 21 +- drivers/vulkan/vulkan_context.cpp | 19 + modules/lightmapper_rd/lightmapper_rd.cpp | 4 +- .../rendering/renderer_rd/cluster_builder_rd.cpp | 8 +- servers/rendering/renderer_rd/effects_rd.cpp | 10 +- .../renderer_rd/renderer_canvas_render_rd.cpp | 4 +- .../renderer_rd/renderer_scene_render_forward.cpp | 16 +- .../renderer_rd/renderer_scene_render_rd.cpp | 53 +-- .../rendering/renderer_rd/renderer_storage_rd.cpp | 28 +- .../renderer_rd/shaders/sdfgi_integrate.glsl | 9 +- servers/rendering/rendering_device.cpp | 30 +- servers/rendering/rendering_device.h | 32 +- 13 files changed, 443 insertions(+), 172 deletions(-) diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 0689b3f2dd..ef331ec4b6 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -41,28 +41,60 @@ //#define FORCE_FULL_BARRIER // Get the Vulkan object information and possible stage access types (bitwise OR'd with incoming values) -RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &stage_mask, VkAccessFlags &access_mask) { +RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &r_stage_mask, VkAccessFlags &r_access_mask, uint32_t p_post_barrier) { Buffer *buffer = nullptr; if (vertex_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; buffer = vertex_buffer_owner.getornull(p_buffer); + + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + r_access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + if (buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + } } else if (index_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - access_mask |= VK_ACCESS_INDEX_READ_BIT; + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + r_access_mask |= VK_ACCESS_INDEX_READ_BIT; buffer = index_buffer_owner.getornull(p_buffer); } else if (uniform_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - access_mask |= VK_ACCESS_UNIFORM_READ_BIT; + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + r_access_mask |= VK_ACCESS_UNIFORM_READ_BIT; buffer = uniform_buffer_owner.getornull(p_buffer); } else if (texture_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - access_mask |= VK_ACCESS_SHADER_READ_BIT; + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + r_access_mask |= VK_ACCESS_SHADER_READ_BIT; buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; } else if (storage_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; buffer = storage_buffer_owner.getornull(p_buffer); + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + + if (buffer->usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) { + r_stage_mask |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + r_access_mask |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } } return buffer; } @@ -2067,6 +2099,48 @@ RID RenderingDeviceVulkan::texture_create_shared(const TextureView &p_view, RID image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; } + VkImageViewUsageCreateInfo usage_info; + usage_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; + usage_info.pNext = nullptr; + if (p_view.format_override != DATA_FORMAT_MAX) { + //need to validate usage with vulkan + + usage_info.usage = 0; + + if (texture.usage_flags & TEXTURE_USAGE_SAMPLING_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (texture.usage_flags & TEXTURE_USAGE_STORAGE_BIT) { + if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_STORAGE_BIT)) { + usage_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } + } + + if (texture.usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) { + usage_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + } + + if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } + + if (texture.usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + } + + if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + + image_view_create_info.pNext = &usage_info; + } + VkResult err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view); ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateImageView failed with error " + itos(err) + "."); @@ -2196,11 +2270,11 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p return id; } -Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating textures in 'sync to draw' mode is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(draw_list || compute_list, ERR_INVALID_PARAMETER, + "Updating textures in is forbidden during creation of a draw or compute list"); Texture *texture = texture_owner.getornull(p_texture); ERR_FAIL_COND_V(!texture, ERR_INVALID_PARAMETER); @@ -2241,7 +2315,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con const uint8_t *r = p_data.ptr(); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = p_post_barrier ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; //barrier to transfer { @@ -2266,6 +2340,10 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con } uint32_t mipmap_offset = 0; + + uint32_t logic_width = texture->width; + uint32_t logic_height = texture->height; + for (uint32_t mm_i = 0; mm_i < texture->mipmaps; mm_i++) { uint32_t depth; uint32_t image_total = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, mm_i + 1, &width, &height, &depth); @@ -2282,12 +2360,15 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con uint32_t region_w = MIN(region_size, width - x); uint32_t region_h = MIN(region_size, height - y); + uint32_t region_logic_w = MIN(region_size, logic_width - x); + uint32_t region_logic_h = MIN(region_size, logic_height - y); + uint32_t pixel_size = get_image_format_pixel_size(texture->format); uint32_t to_allocate = region_w * region_h * pixel_size; to_allocate >>= get_compressed_image_format_pixel_rshift(texture->format); uint32_t alloc_offset, alloc_size; - Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false, p_sync_with_draw); + Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false, p_post_barrier); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); uint8_t *write_ptr; @@ -2363,8 +2444,8 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con buffer_image_copy.imageOffset.y = y; buffer_image_copy.imageOffset.z = z; - buffer_image_copy.imageExtent.width = region_w; - buffer_image_copy.imageExtent.height = region_h; + buffer_image_copy.imageExtent.width = region_logic_w; + buffer_image_copy.imageExtent.height = region_logic_h; buffer_image_copy.imageExtent.depth = 1; vkCmdCopyBufferToImage(command_buffer, staging_buffer_blocks[staging_buffer_current].buffer, texture->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy); @@ -2375,15 +2456,32 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con } mipmap_offset = image_total; + logic_width = MAX(1, logic_width >> 1); + logic_height = MAX(1, logic_height >> 1); } //barrier to restore layout { + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.newLayout = texture->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2395,7 +2493,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } return OK; @@ -2608,13 +2706,13 @@ bool RenderingDeviceVulkan::texture_is_valid(RID p_texture) { return texture_owner.owns(p_texture); } -Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_from_texture); ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && src_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER, "Source texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER, "Source texture requires the TEXTURE_USAGE_CAN_COPY_FROM_BIT in order to be retrieved."); @@ -2635,7 +2733,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, Texture *dst_tex = texture_owner.getornull(p_to_texture); ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && dst_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER, "Destination texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER, "Destination texture requires the TEXTURE_USAGE_CAN_COPY_TO_BIT in order to be retrieved."); @@ -2656,7 +2754,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; { //PRE Copy the image @@ -2731,12 +2829,27 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, // RESTORE LAYOUT for SRC and DST + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.newLayout = src_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2748,7 +2861,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //make dst readable @@ -2757,7 +2870,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.newLayout = dst_tex->layout; @@ -2770,20 +2883,20 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } } return OK; } -Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_from_texture); ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && src_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER, "Source texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER, "Source texture requires the TEXTURE_USAGE_CAN_COPY_FROM_BIT in order to be retrieved."); @@ -2794,7 +2907,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID Texture *dst_tex = texture_owner.getornull(p_to_texture); ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && dst_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER, "Destination texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER, "Destination texture requires the TEXTURE_USAGE_CAN_COPY_TO_BIT in order to be retrieved."); @@ -2808,7 +2921,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; { //PRE Copy the image @@ -2883,12 +2996,27 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID // RESTORE LAYOUT for SRC and DST + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.newLayout = src_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2900,7 +3028,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //make dst readable @@ -2909,7 +3037,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.newLayout = dst_tex->layout; @@ -2922,20 +3050,20 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = dst_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } } return OK; } -Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_texture); ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && src_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER, "Source texture can't be cleared while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V(p_layers == 0, ERR_INVALID_PARAMETER); @@ -2952,7 +3080,7 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, ERR_FAIL_COND_V(p_base_mipmap + p_mipmaps > src_tex->mipmaps, ERR_INVALID_PARAMETER); ERR_FAIL_COND_V(p_base_layer + p_layers > src_layer_count, ERR_INVALID_PARAMETER); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; VkImageLayout clear_layout = (src_tex->layout == VK_IMAGE_LAYOUT_GENERAL) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; @@ -2999,11 +3127,27 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, vkCmdClearColorImage(command_buffer, src_tex->image, clear_layout, &clear_color, 1, &range); { // Barrier to post clear accesses (changing back the layout if needed) + + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = valid_texture_access; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = clear_layout; image_memory_barrier.newLayout = src_tex->layout; @@ -3016,7 +3160,7 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer; image_memory_barrier.subresourceRange.layerCount = p_layers; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, valid_texture_stages, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } return OK; @@ -5038,19 +5182,22 @@ bool RenderingDeviceVulkan::uniform_set_is_valid(RID p_uniform_set) { return uniform_set_owner.owns(p_uniform_set); } -Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating buffers in 'sync to draw' mode is forbidden during creation of a draw list"); - ERR_FAIL_COND_V_MSG(compute_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating buffers in 'sync to draw' mode is forbidden during creation of a compute list"); + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a compute list"); - // Protect subsequent updates... - VkPipelineStageFlags dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; - VkAccessFlags dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; - - Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access); + VkPipelineStageFlags dst_stage_mask = 0; + VkAccessFlags dst_access = 0; + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + // Protect subsequent updates... + dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + } + Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier); if (!buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); } @@ -5058,35 +5205,41 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_sync_with_draw); - Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, p_sync_with_draw); + // no barrier should be needed here + // _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, true); + + Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, p_post_barrier); if (err) { return err; } #ifdef FORCE_FULL_BARRIER - _full_barrier(p_sync_with_draw); + _full_barrier(true); #else - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_sync_with_draw); + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); #endif return err; } -Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER, "Size must be a multiple of four"); - ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating buffers in 'sync to draw' mode is forbidden during creation of a draw list"); - ERR_FAIL_COND_V_MSG(compute_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating buffers in 'sync to draw' mode is forbidden during creation of a compute list"); + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Updating buffers in is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a compute list"); - // Protect subsequent updates... - VkPipelineStageFlags dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; - VkAccessFlags dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + VkPipelineStageFlags dst_stage_mask = 0; + VkAccessFlags dst_access = 0; + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + // Protect subsequent updates... + dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + } - Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access); + Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier); if (!buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); } @@ -5094,14 +5247,15 @@ Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint3 ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_sync_with_draw); + // should not be needed + // _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_post_barrier); - vkCmdFillBuffer(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, buffer->buffer, p_offset, p_size, 0); + vkCmdFillBuffer(frames[frame].draw_command_buffer, buffer->buffer, p_offset, p_size, 0); #ifdef FORCE_FULL_BARRIER - _full_barrier(p_sync_with_draw); + _full_barrier(true); #else - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_sync_with_draw); + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_post_barrier); #endif return OK; } @@ -5113,7 +5267,7 @@ Vector RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { VkPipelineShaderStageCreateFlags src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; VkAccessFlags src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; // Get the vulkan buffer and the potential stage/access possible - Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask); + Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_ALL); if (!buffer) { ERR_FAIL_V_MSG(Vector(), "Buffer is either invalid or this type of buffer can't be retrieved. Only Index and Vertex buffers allow retrieving."); } @@ -6476,7 +6630,7 @@ void RenderingDeviceVulkan::draw_list_disable_scissor(DrawListID p_list) { vkCmdSetScissor(dl->command_buffer, 0, 1, &scissor); } -void RenderingDeviceVulkan::draw_list_end() { +void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_MSG(!draw_list, "Immediate draw list is already inactive."); @@ -6512,6 +6666,21 @@ void RenderingDeviceVulkan::draw_list_end() { } } + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + draw_list_bound_textures.clear(); for (int i = 0; i < draw_list_storage_textures.size(); i++) { @@ -6521,7 +6690,7 @@ void RenderingDeviceVulkan::draw_list_end() { image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = texture->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6534,7 +6703,7 @@ void RenderingDeviceVulkan::draw_list_end() { image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer; image_memory_barrier.subresourceRange.layerCount = texture->layers; - vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -6548,7 +6717,7 @@ void RenderingDeviceVulkan::draw_list_end() { #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, true); + _memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, barrier_flags, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, access_flags, true); #endif } @@ -6871,14 +7040,30 @@ void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) { #endif } -void RenderingDeviceVulkan::compute_list_end() { +void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { ERR_FAIL_COND(!compute_list); + + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + for (Set::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) { VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = E->get()->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6892,7 +7077,7 @@ void RenderingDeviceVulkan::compute_list_end() { image_memory_barrier.subresourceRange.layerCount = E->get()->layers; // TODO: Look at the usages in the compute list and determine tighter dst stage and access masks based on some "final" usage equivalent - vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -6902,10 +7087,44 @@ void RenderingDeviceVulkan::compute_list_end() { #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); + _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); #endif } +void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { + uint32_t src_barrier_flags = 0; + uint32_t src_access_flags = 0; + if (p_from & BARRIER_MASK_COMPUTE) { + src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_from & BARRIER_MASK_RASTER) { + src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + if (p_from & BARRIER_MASK_TRANSFER) { + src_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + uint32_t dst_barrier_flags = 0; + uint32_t dst_access_flags = 0; + if (p_to & BARRIER_MASK_COMPUTE) { + dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_to & BARRIER_MASK_RASTER) { + dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_to & BARRIER_MASK_TRANSFER) { + dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + _memory_barrier(src_barrier_flags, dst_barrier_flags, src_access_flags, dst_access_flags, true); +} + void RenderingDeviceVulkan::full_barrier() { #ifndef DEBUG_ENABLED ERR_PRINT("Full barrier is debug-only, should not be used in production"); @@ -7524,9 +7743,10 @@ void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) { } } -void RenderingDeviceVulkan::capture_timestamp(const String &p_name, bool p_sync_to_draw) { +void RenderingDeviceVulkan::capture_timestamp(const String &p_name) { ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements); + //this should be optional for profiling, else it will slow things down { VkMemoryBarrier memoryBarrier; @@ -7563,9 +7783,10 @@ void RenderingDeviceVulkan::capture_timestamp(const String &p_name, bool p_sync_ VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; - vkCmdPipelineBarrier(p_sync_to_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); + vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); } - vkCmdWriteTimestamp(p_sync_to_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frames[frame].timestamp_pool, frames[frame].timestamp_count); + + vkCmdWriteTimestamp(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frames[frame].timestamp_pool, frames[frame].timestamp_count); frames[frame].timestamp_names[frames[frame].timestamp_count] = p_name; frames[frame].timestamp_cpu_values[frames[frame].timestamp_count] = OS::get_singleton()->get_ticks_usec(); frames[frame].timestamp_count++; diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index ba3e1b243c..4bea17e4a1 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -785,7 +785,7 @@ class RenderingDeviceVulkan : public RenderingDevice { Error _draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass); Error _draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents, const Vector &p_storage_textures); _FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id); - Buffer *_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &dst_stage_mask, VkAccessFlags &dst_access); + Buffer *_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &dst_stage_mask, VkAccessFlags &dst_access, uint32_t p_post_barrier); /**********************/ /**** COMPUTE LIST ****/ @@ -913,16 +913,16 @@ public: virtual RID texture_create_shared(const TextureView &p_view, RID p_with_texture); virtual RID texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, TextureSliceType p_slice_type = TEXTURE_SLICE_2D); - virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_sync_with_draw = false); + virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); virtual Vector texture_get_data(RID p_texture, uint32_t p_layer); virtual bool texture_is_format_supported_for_usage(DataFormat p_format, uint32_t p_usage) const; virtual bool texture_is_shared(RID p_texture); virtual bool texture_is_valid(RID p_texture); - virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, bool p_sync_with_draw = false); - virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, bool p_sync_with_draw = false); - virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, bool p_sync_with_draw = false); + virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier = BARRIER_MASK_ALL); /*********************/ /**** FRAMEBUFFER ****/ @@ -975,8 +975,8 @@ public: virtual RID uniform_set_create(const Vector &p_uniforms, RID p_shader, uint32_t p_shader_set); virtual bool uniform_set_is_valid(RID p_uniform_set); - virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_sync_with_draw = false); //works for any buffer - virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, bool p_sync_with_draw = false); + virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); //works for any buffer + virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier = BARRIER_MASK_ALL); virtual Vector buffer_get_data(RID p_buffer); /*************************/ @@ -1022,7 +1022,7 @@ public: virtual void draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect); virtual void draw_list_disable_scissor(DrawListID p_list); - virtual void draw_list_end(); + virtual void draw_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); /***********************/ /**** COMPUTE LISTS ****/ @@ -1036,8 +1036,9 @@ public: virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); - virtual void compute_list_end(); + virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual void barrier(uint32_t p_from = BARRIER_MASK_ALL, uint32_t p_to = BARRIER_MASK_ALL); virtual void full_barrier(); /**************/ @@ -1050,7 +1051,7 @@ public: /**** Timing ****/ /****************/ - virtual void capture_timestamp(const String &p_name, bool p_sync_to_draw); + virtual void capture_timestamp(const String &p_name); virtual uint32_t get_captured_timestamps_count() const; virtual uint64_t get_captured_timestamps_frame() const; virtual uint64_t get_captured_timestamp_gpu_time(uint32_t p_index) const; diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index c644395b54..7ac860087e 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -54,11 +54,30 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanContext::_debug_messenger_callback( strstr(pCallbackData->pMessage, "can result in undefined behavior if this memory is used by the device") != nullptr) { return VK_FALSE; } + // This needs to be ignored because Validator is wrong here. + if (strstr(pCallbackData->pMessage, "Invalid SPIR-V binary version 1.3") != nullptr) { + return VK_FALSE; + } + // This needs to be ignored because Validator is wrong here. + if (strstr(pCallbackData->pMessage, "Shader requires flag") != nullptr) { + return VK_FALSE; + } + // This needs to be ignored because Validator is wrong here. if (strstr(pCallbackData->pMessage, "SPIR-V module not valid: Pointer operand") != nullptr && strstr(pCallbackData->pMessage, "must be a memory object") != nullptr) { return VK_FALSE; } + /* + // This is a valid warning because its illegal in Vulkan, but in practice it should work according to VK_KHR_maintenance2 + if (strstr(pCallbackData->pMessage, "VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 with tiling VK_IMAGE_TILING_OPTIMAL does not support usage that includes VK_IMAGE_USAGE_STORAGE_BIT") != nullptr) { + return VK_FALSE; + } + + if (strstr(pCallbackData->pMessage, "VK_FORMAT_R4G4B4A4_UNORM_PACK16 with tiling VK_IMAGE_TILING_OPTIMAL does not support usage that includes VK_IMAGE_USAGE_STORAGE_BIT") != nullptr) { + return VK_FALSE; + } +*/ // Workaround for Vulkan-Loader usability bug: https://github.com/KhronosGroup/Vulkan-Loader/issues/262. if (strstr(pCallbackData->pMessage, "wrong ELF class: ELFCLASS32") != nullptr) { return VK_FALSE; diff --git a/modules/lightmapper_rd/lightmapper_rd.cpp b/modules/lightmapper_rd/lightmapper_rd.cpp index 3067e002d8..f31eb3f066 100644 --- a/modules/lightmapper_rd/lightmapper_rd.cpp +++ b/modules/lightmapper_rd/lightmapper_rd.cpp @@ -1436,7 +1436,7 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d dst[j + 3] = src[j + 3]; } } - rd->texture_update(light_accum_tex, i, ds, true); + rd->texture_update(light_accum_tex, i, ds); } } } @@ -1537,7 +1537,7 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d { //pre copy for (int i = 0; i < atlas_slices * (p_bake_sh ? 4 : 1); i++) { - rd->texture_copy(light_accum_tex, light_accum_tex2, Vector3(), Vector3(), Vector3(atlas_size.width, atlas_size.height, 1), 0, 0, i, i, true); + rd->texture_copy(light_accum_tex, light_accum_tex2, Vector3(), Vector3(), Vector3(atlas_size.width, atlas_size.height, 1), 0, 0, i, i); } Vector framebuffers; diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp index 8d9cff0f43..c35e5e1730 100644 --- a/servers/rendering/renderer_rd/cluster_builder_rd.cpp +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -401,11 +401,11 @@ void ClusterBuilderRD::bake_cluster() { RENDER_TIMESTAMP(">Bake Cluster"); //clear cluster buffer - RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, true); + RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size); if (render_element_count > 0) { //clear render buffer - RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, true); + RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size); { //fill state uniform @@ -420,12 +420,12 @@ void ClusterBuilderRD::bake_cluster() { state.cluster_depth_offset = (render_element_max / 32); state.cluster_data_size = state.cluster_depth_offset + render_element_max; - RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, true); + RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state); } //update instances - RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, true); + RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements); RENDER_TIMESTAMP("Render Elements"); diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp index 5a6a4d2a55..f1bab19445 100644 --- a/servers/rendering/renderer_rd/effects_rd.cpp +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -1324,7 +1324,7 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep RD::get_singleton()->compute_list_end(); int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, false); + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); } void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve) { @@ -1787,7 +1787,7 @@ EffectsRD::EffectsRD() { } } - RD::get_singleton()->buffer_update(ssao.gather_constants_buffer, 0, sizeof(SSAOGatherConstants), &gather_constants, false); + RD::get_singleton()->buffer_update(ssao.gather_constants_buffer, 0, sizeof(SSAOGatherConstants), &gather_constants); } { Vector ssao_modes; @@ -1806,7 +1806,7 @@ EffectsRD::EffectsRD() { } ssao.importance_map_load_counter = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t)); int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, false); + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); RD::get_singleton()->set_resource_name(ssao.importance_map_load_counter, "Importance Map Load Counter"); Vector uniforms; @@ -1896,10 +1896,10 @@ EffectsRD::EffectsRD() { if (filter.use_high_quality) { filter.coefficient_buffer = RD::get_singleton()->storage_buffer_create(sizeof(high_quality_coeffs)); - RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(high_quality_coeffs), &high_quality_coeffs[0], false); + RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(high_quality_coeffs), &high_quality_coeffs[0]); } else { filter.coefficient_buffer = RD::get_singleton()->storage_buffer_create(sizeof(low_quality_coeffs)); - RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(low_quality_coeffs), &low_quality_coeffs[0], false); + RD::get_singleton()->buffer_update(filter.coefficient_buffer, 0, sizeof(low_quality_coeffs), &low_quality_coeffs[0]); } Vector uniforms; diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp index 792fcb0b59..c354ad8c1c 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp @@ -1367,7 +1367,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p } if (light_count > 0) { - RD::get_singleton()->buffer_update(state.lights_uniform_buffer, 0, sizeof(LightUniform) * light_count, &state.light_uniforms[0], true); + RD::get_singleton()->buffer_update(state.lights_uniform_buffer, 0, sizeof(LightUniform) * light_count, &state.light_uniforms[0]); } { @@ -1421,7 +1421,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p //print_line("w: " + itos(ssize.width) + " s: " + rtos(canvas_scale)); state_buffer.tex_to_sdf = 1.0 / ((canvas_scale.x + canvas_scale.y) * 0.5); - RD::get_singleton()->buffer_update(state.canvas_state_buffer, 0, sizeof(State::Buffer), &state_buffer, true); + RD::get_singleton()->buffer_update(state.canvas_state_buffer, 0, sizeof(State::Buffer), &state_buffer); } { //default filter/repeat diff --git a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp index eebf8debcd..a20a5073c3 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_forward.cpp @@ -1287,7 +1287,7 @@ void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_ren scene_state.ubo.roughness_limiter_amount = screen_space_roughness_limiter_get_amount(); scene_state.ubo.roughness_limiter_limit = screen_space_roughness_limiter_get_limit(); - RD::get_singleton()->buffer_update(scene_state.uniform_buffer, 0, sizeof(SceneState::UBO), &scene_state.ubo, true); + RD::get_singleton()->buffer_update(scene_state.uniform_buffer, 0, sizeof(SceneState::UBO), &scene_state.ubo); } void RendererSceneRenderForward::_fill_render_list(const PagedArray &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi, bool p_using_opaque_gi) { @@ -1444,7 +1444,7 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArraybuffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures, true); + RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures); } } @@ -1473,7 +1473,7 @@ void RendererSceneRenderForward::_setup_lightmaps(const PagedArray &p_light scene_state.lightmaps_used++; } if (scene_state.lightmaps_used > 0) { - RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps, true); + RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps); } } @@ -1707,7 +1707,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf static int texture_samples[RS::VIEWPORT_MSAA_MAX] = { 1, 2, 4, 8, 16 }; storage->get_effects()->resolve_gi(render_buffer->depth_msaa, render_buffer->normal_roughness_buffer_msaa, using_giprobe ? render_buffer->giprobe_buffer_msaa : RID(), render_buffer->depth, render_buffer->normal_roughness_buffer, using_giprobe ? render_buffer->giprobe_buffer : RID(), Vector2i(render_buffer->width, render_buffer->height), texture_samples[render_buffer->msaa]); } else if (finish_depth) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth); } } @@ -1805,14 +1805,14 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } if (render_buffer && !can_continue_color && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); if (using_separate_specular) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->specular_msaa, render_buffer->specular, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->specular_msaa, render_buffer->specular); } } if (render_buffer && !can_continue_depth && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->depth_msaa, render_buffer->depth); } if (using_separate_specular) { @@ -1849,7 +1849,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf } if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color, true); + RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); } } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index 2f35a6db23..1461be8088 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -1148,7 +1148,7 @@ void RendererSceneRenderRD::_sdfgi_update_cascades(RID p_render_buffers) { cascade_data[i].pad = 0; } - RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, true); + RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data); } void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) { @@ -1257,7 +1257,7 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi } if (idx > 0) { - RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, true); + RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights); } cascade_light_count[i] = idx; @@ -1500,7 +1500,7 @@ void RendererSceneRenderRD::_setup_giprobes(RID p_render_buffers, const Transfor } if (p_gi_probes.size() > 0) { - RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data, true); + RD::get_singleton()->buffer_update(gi_probe_buffer, 0, sizeof(GI::GIProbeData) * MIN((uint64_t)RenderBuffers::MAX_GIPROBES, p_gi_probes.size()), gi_probe_data); } } @@ -1640,7 +1640,7 @@ void RendererSceneRenderRD::_process_gi(RID p_render_buffers, RID p_normal_rough c.to_cell = 1.0 / rb->sdfgi->cascades[i].cell_size; } - RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data, true); + RD::get_singleton()->buffer_update(gi.sdfgi_ubo, 0, sizeof(GI::SDFGIData), &sdfgi_data); } if (rb->gi_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->gi_uniform_set)) { @@ -2334,7 +2334,7 @@ void RendererSceneRenderRD::_setup_sky(RID p_environment, RID p_render_buffers, } if (light_data_dirty) { - RD::get_singleton()->buffer_update(sky_scene_state.directional_light_buffer, 0, sizeof(SkyDirectionalLightData) * sky_scene_state.max_directional_lights, sky_scene_state.directional_lights, true); + RD::get_singleton()->buffer_update(sky_scene_state.directional_light_buffer, 0, sizeof(SkyDirectionalLightData) * sky_scene_state.max_directional_lights, sky_scene_state.directional_lights); RendererSceneRenderRD::SkyDirectionalLightData *temp = sky_scene_state.last_frame_directional_lights; sky_scene_state.last_frame_directional_lights = sky_scene_state.directional_lights; @@ -2386,7 +2386,7 @@ void RendererSceneRenderRD::_setup_sky(RID p_environment, RID p_render_buffers, sky_scene_state.ubo.fog_light_color[2] = fog_color.b * fog_energy; sky_scene_state.ubo.fog_sun_scatter = environment_get_fog_sun_scatter(p_environment); - RD::get_singleton()->buffer_update(sky_scene_state.uniform_buffer, 0, sizeof(SkySceneState::UBO), &sky_scene_state.ubo, true); + RD::get_singleton()->buffer_update(sky_scene_state.uniform_buffer, 0, sizeof(SkySceneState::UBO), &sky_scene_state.ubo); } void RendererSceneRenderRD::_update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform) { @@ -4165,7 +4165,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins gi_probe->texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, levels.size(), 0, 1, false); + RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, levels.size(), 0, 1); { int total_elements = 0; @@ -4477,7 +4477,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins if (gi_probe->has_dynamic_object_data) { //if it has dynamic object data, it needs to be cleared - RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, gi_probe->mipmaps.size(), 0, 1, true); + RD::get_singleton()->texture_clear(gi_probe->texture, Color(0, 0, 0, 0), 0, gi_probe->mipmaps.size(), 0, 1); } uint32_t light_count = 0; @@ -4528,7 +4528,7 @@ void RendererSceneRenderRD::gi_probe_update(RID p_probe, bool p_update_light_ins l.has_shadow = storage->light_has_shadow(light); } - RD::get_singleton()->buffer_update(gi_probe_lights_uniform, 0, sizeof(GIProbeLight) * light_count, gi_probe_lights, true); + RD::get_singleton()->buffer_update(gi_probe_lights_uniform, 0, sizeof(GIProbeLight) * light_count, gi_probe_lights); } } @@ -6179,7 +6179,7 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray &p_reflecti } if (cluster.reflection_count) { - RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections, true); + RD::get_singleton()->buffer_update(cluster.reflection_buffer, 0, cluster.reflection_count * sizeof(ReflectionData), cluster.reflections); } } @@ -6572,15 +6572,15 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray &p_lights, const } if (cluster.omni_light_count) { - RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights, true); + RD::get_singleton()->buffer_update(cluster.omni_light_buffer, 0, sizeof(Cluster::LightData) * cluster.omni_light_count, cluster.omni_lights); } if (cluster.spot_light_count) { - RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights, true); + RD::get_singleton()->buffer_update(cluster.spot_light_buffer, 0, sizeof(Cluster::LightData) * cluster.spot_light_count, cluster.spot_lights); } if (r_directional_light_count) { - RD::get_singleton()->buffer_update(cluster.directional_light_buffer, 0, sizeof(Cluster::DirectionalLightData) * r_directional_light_count, cluster.directional_lights, true); + RD::get_singleton()->buffer_update(cluster.directional_light_buffer, 0, sizeof(Cluster::DirectionalLightData) * r_directional_light_count, cluster.directional_lights); } } @@ -6741,7 +6741,7 @@ void RendererSceneRenderRD::_setup_decals(const PagedArray &p_decals, const } if (cluster.decal_count > 0) { - RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals, true); + RD::get_singleton()->buffer_update(cluster.decal_buffer, 0, sizeof(Cluster::DecalData) * cluster.decal_count, cluster.decals); } } @@ -7276,7 +7276,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RENDER_TIMESTAMP(">Volumetric Fog"); RENDER_TIMESTAMP("Render Fog"); - RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms, true); + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -7305,7 +7305,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e //need restart for buffer update params.filter_axis = 1; - RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms, true); + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms); compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, volumetric_fog.pipelines[VOLUMETRIC_FOG_SHADER_FILTER]); @@ -7641,10 +7641,10 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con if (cascade_prev != cascade) { //initialize render - RD::get_singleton()->texture_clear(rb->sdfgi->render_albedo, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->render_emission, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->render_emission_aniso, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->render_geom_facing, Color(0, 0, 0, 0), 0, 1, 0, 1, true); + RD::get_singleton()->texture_clear(rb->sdfgi->render_albedo, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->render_emission, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->render_emission_aniso, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->render_geom_facing, Color(0, 0, 0, 0), 0, 1, 0, 1); } //print_line("rendering cascade " + itos(p_region) + " objects: " + itos(p_cull_count) + " bounds: " + bounds + " from: " + from + " size: " + size + " cell size: " + rtos(rb->sdfgi->cascades[cascade].cell_size)); @@ -7776,7 +7776,7 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con //clear dispatch indirect data uint32_t dispatch_indirct_data[4] = { 0, 0, 0, 0 }; - RD::get_singleton()->buffer_update(rb->sdfgi->cascades[cascade].solid_cell_dispatch_buffer, 0, sizeof(uint32_t) * 4, dispatch_indirct_data, true); + RD::get_singleton()->buffer_update(rb->sdfgi->cascades[cascade].solid_cell_dispatch_buffer, 0, sizeof(uint32_t) * 4, dispatch_indirct_data); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -7947,9 +7947,9 @@ void RendererSceneRenderRD::render_sdfgi(RID p_render_buffers, int p_region, con RD::get_singleton()->compute_list_end(); //clear these textures, as they will have previous garbage on next draw - RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1, true); - RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1, true); + RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(rb->sdfgi->cascades[cascade].light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); #if 0 Vector data = RD::get_singleton()->texture_get_data(rb->sdfgi->cascades[cascade].sdf, 0); @@ -8070,7 +8070,7 @@ void RendererSceneRenderRD::render_sdfgi_static_lights(RID p_render_buffers, uin } if (idx > 0) { - RD::get_singleton()->buffer_update(cc.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights, true); + RD::get_singleton()->buffer_update(cc.lights_buffer, 0, idx * sizeof(SDGIShader::Light), lights); } light_count[i] = idx; @@ -8693,6 +8693,9 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) { //calculate tables String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; defines += "\n#define SH_SIZE " + itos(SDFGI::SH_SIZE) + "\n"; + if (sky_use_cubemap_array) { + defines += "\n#define USE_CUBEMAP_ARRAY\n"; + } Vector integrate_modes; integrate_modes.push_back("\n#define MODE_PROCESS\n"); diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp index 96dd5a6669..6d4343e183 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp @@ -873,7 +873,7 @@ void RendererStorageRD::_texture_2d_update(RID p_texture, const Ref &p_im TextureToRDFormat f; Ref validated = _validate_texture_format(p_image, f); - RD::get_singleton()->texture_update(tex->rd_texture, p_layer, validated->get_data(), !p_immediate); + RD::get_singleton()->texture_update(tex->rd_texture, p_layer, validated->get_data()); } void RendererStorageRD::texture_2d_update_immediate(RID p_texture, const Ref &p_image, int p_layer) { @@ -918,7 +918,7 @@ void RendererStorageRD::texture_3d_update(RID p_texture, const Vector } } - RD::get_singleton()->texture_update(tex->rd_texture, 0, all_data, true); + RD::get_singleton()->texture_update(tex->rd_texture, 0, all_data); } void RendererStorageRD::texture_proxy_update(RID p_texture, RID p_proxy_to) { @@ -3044,7 +3044,7 @@ void RendererStorageRD::update_mesh_instances() { MeshInstance *mi = dirty_mesh_instance_weights.first()->self(); if (mi->blend_weights_buffer.is_valid()) { - RD::get_singleton()->buffer_update(mi->blend_weights_buffer, 0, mi->blend_weights.size() * sizeof(float), mi->blend_weights.ptr(), true); + RD::get_singleton()->buffer_update(mi->blend_weights_buffer, 0, mi->blend_weights.size() * sizeof(float), mi->blend_weights.ptr()); } dirty_mesh_instance_weights.remove(&mi->weight_update_list); mi->weights_dirty = false; @@ -3712,7 +3712,7 @@ void RendererStorageRD::multimesh_set_buffer(RID p_multimesh, const Vectorbuffer_update(multimesh->buffer, 0, p_buffer.size() * sizeof(float), r, false); + RD::get_singleton()->buffer_update(multimesh->buffer, 0, p_buffer.size() * sizeof(float), r); multimesh->buffer_set = true; } @@ -3811,14 +3811,14 @@ void RendererStorageRD::_update_dirty_multimeshes() { if (multimesh->data_cache_used_dirty_regions > 32 || multimesh->data_cache_used_dirty_regions > visible_region_count / 2) { //if there too many dirty regions, or represent the majority of regions, just copy all, else transfer cost piles up too much - RD::get_singleton()->buffer_update(multimesh->buffer, 0, MIN(visible_region_count * region_size, multimesh->instances * multimesh->stride_cache * sizeof(float)), data, false); + RD::get_singleton()->buffer_update(multimesh->buffer, 0, MIN(visible_region_count * region_size, multimesh->instances * multimesh->stride_cache * sizeof(float)), data); } else { //not that many regions? update them all for (uint32_t i = 0; i < visible_region_count; i++) { if (multimesh->data_cache_dirty_regions[i]) { uint64_t offset = i * region_size; uint64_t size = multimesh->stride_cache * multimesh->instances * sizeof(float); - RD::get_singleton()->buffer_update(multimesh->buffer, offset, MIN(region_size, size - offset), &data[i * region_size], false); + RD::get_singleton()->buffer_update(multimesh->buffer, offset, MIN(region_size, size - offset), &data[i * region_size]); } } } @@ -4509,7 +4509,7 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta if (sub_emitter && sub_emitter->emission_storage_buffer.is_valid()) { // print_line("updating subemitter buffer"); int32_t zero[4] = { 0, sub_emitter->amount, 0, 0 }; - RD::get_singleton()->buffer_update(sub_emitter->emission_storage_buffer, 0, sizeof(uint32_t) * 4, zero, true); + RD::get_singleton()->buffer_update(sub_emitter->emission_storage_buffer, 0, sizeof(uint32_t) * 4, zero); push_constant.can_emit = true; if (sub_emitter->emitting) { @@ -4527,13 +4527,13 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta } if (p_particles->emission_buffer && p_particles->emission_buffer->particle_count) { - RD::get_singleton()->buffer_update(p_particles->emission_storage_buffer, 0, sizeof(uint32_t) * 4 + sizeof(ParticleEmissionBuffer::Data) * p_particles->emission_buffer->particle_count, p_particles->emission_buffer, true); + RD::get_singleton()->buffer_update(p_particles->emission_storage_buffer, 0, sizeof(uint32_t) * 4 + sizeof(ParticleEmissionBuffer::Data) * p_particles->emission_buffer->particle_count, p_particles->emission_buffer); p_particles->emission_buffer->particle_count = 0; } p_particles->clear = false; - RD::get_singleton()->buffer_update(p_particles->frame_params_buffer, 0, sizeof(ParticlesFrameParams), &frame_params, true); + RD::get_singleton()->buffer_update(p_particles->frame_params_buffer, 0, sizeof(ParticlesFrameParams), &frame_params); ParticlesMaterialData *m = (ParticlesMaterialData *)material_get_data(p_particles->process_material, SHADER_TYPE_PARTICLES); if (!m) { @@ -5332,7 +5332,7 @@ void RendererStorageRD::_update_dirty_skeletons() { Skeleton *skeleton = skeleton_dirty_list; if (skeleton->size) { - RD::get_singleton()->buffer_update(skeleton->buffer, 0, skeleton->data.size() * sizeof(float), skeleton->data.ptr(), false); + RD::get_singleton()->buffer_update(skeleton->buffer, 0, skeleton->data.size() * sizeof(float), skeleton->data.ptr()); } skeleton_dirty_list = skeleton->dirty_list; @@ -7371,7 +7371,7 @@ void RendererStorageRD::_update_decal_atlas() { tformat.shareable_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB); decal_atlas.texture = RD::get_singleton()->texture_create(tformat, RD::TextureView()); - RD::get_singleton()->texture_clear(decal_atlas.texture, Color(0, 0, 0, 0), 0, decal_atlas.mipmaps, 0, 1, true); + RD::get_singleton()->texture_clear(decal_atlas.texture, Color(0, 0, 0, 0), 0, decal_atlas.mipmaps, 0, 1); { //create the framebuffer @@ -7426,7 +7426,7 @@ void RendererStorageRD::_update_decal_atlas() { prev_texture = mm.texture; } } else { - RD::get_singleton()->texture_clear(mm.texture, clear_color, 0, 1, 0, 1, false); + RD::get_singleton()->texture_clear(mm.texture, clear_color, 0, 1, 0, 1); } } } @@ -8297,11 +8297,11 @@ EffectsRD *RendererStorageRD::get_effects() { } void RendererStorageRD::capture_timestamps_begin() { - RD::get_singleton()->capture_timestamp("Frame Begin", false); + RD::get_singleton()->capture_timestamp("Frame Begin"); } void RendererStorageRD::capture_timestamp(const String &p_name) { - RD::get_singleton()->capture_timestamp(p_name, true); + RD::get_singleton()->capture_timestamp(p_name); } uint32_t RendererStorageRD::get_captured_timestamps_count() const { diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl index d122e7a38a..e4f6f4b7ea 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl @@ -39,8 +39,11 @@ layout(rgba32i, set = 0, binding = 13) uniform restrict iimage2D lightprobe_aver layout(rgba16f, set = 0, binding = 14) uniform restrict writeonly image2DArray lightprobe_ambient_texture; +#ifdef USE_CUBEMAP_ARRAY +layout(set = 1, binding = 0) uniform textureCubeArray sky_irradiance; +#else layout(set = 1, binding = 0) uniform textureCube sky_irradiance; - +#endif layout(set = 1, binding = 1) uniform sampler linear_sampler_mipmaps; #define HISTORY_BITS 10 @@ -256,7 +259,11 @@ void main() { light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); light.a = 1.0; } else if (params.sky_mode == SKY_MODE_SKY) { +#ifdef USE_CUBEMAP_ARRAY + light.rgb = textureLod(samplerCubeArray(sky_irradiance, linear_sampler_mipmaps), vec4(ray_dir, 0.0), 2.0).rgb; //use second mipmap because we dont usually throw a lot of rays, so this compensates +#else light.rgb = textureLod(samplerCube(sky_irradiance, linear_sampler_mipmaps), ray_dir, 2.0).rgb; //use second mipmap because we dont usually throw a lot of rays, so this compensates +#endif light.rgb *= params.sky_energy; light.a = 0.0; diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 9a254c5a7a..67f9246b5b 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -174,8 +174,8 @@ RID RenderingDevice::_uniform_set_create(const Array &p_uniforms, RID p_shader, return uniform_set_create(uniforms, p_shader, p_shader_set); } -Error RenderingDevice::_buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data, bool p_sync_with_draw) { - return buffer_update(p_buffer, p_offset, p_size, p_data.ptr(), p_sync_with_draw); +Error RenderingDevice::_buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data, uint32_t p_post_barrier) { + return buffer_update(p_buffer, p_offset, p_size, p_data.ptr(), p_post_barrier); } RID RenderingDevice::_render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const Ref &p_rasterization_state, const Ref &p_multisample_state, const Ref &p_depth_stencil_state, const Ref &p_blend_state, int p_dynamic_state_flags) { @@ -249,7 +249,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("texture_create_shared", "view", "with_texture"), &RenderingDevice::_texture_create_shared); ClassDB::bind_method(D_METHOD("texture_create_shared_from_slice", "view", "with_texture", "layer", "mipmap", "slice_type"), &RenderingDevice::_texture_create_shared_from_slice, DEFVAL(TEXTURE_SLICE_2D)); - ClassDB::bind_method(D_METHOD("texture_update", "texture", "layer", "data", "sync_with_draw"), &RenderingDevice::texture_update, DEFVAL(false)); + ClassDB::bind_method(D_METHOD("texture_update", "texture", "layer", "data", "post_barrier"), &RenderingDevice::texture_update, DEFVAL(BARRIER_MASK_ALL)); ClassDB::bind_method(D_METHOD("texture_get_data", "texture", "layer"), &RenderingDevice::texture_get_data); ClassDB::bind_method(D_METHOD("texture_is_format_supported_for_usage", "format", "usage_flags"), &RenderingDevice::texture_is_format_supported_for_usage); @@ -257,9 +257,9 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("texture_is_shared", "texture"), &RenderingDevice::texture_is_shared); ClassDB::bind_method(D_METHOD("texture_is_valid", "texture"), &RenderingDevice::texture_is_valid); - ClassDB::bind_method(D_METHOD("texture_copy", "from_texture", "to_texture", "from_pos", "to_pos", "size", "src_mipmap", "dst_mipmap", "src_layer", "dst_layer", "sync_with_draw"), &RenderingDevice::texture_copy, DEFVAL(false)); - ClassDB::bind_method(D_METHOD("texture_clear", "texture", "color", "base_mipmap", "mipmap_count", "base_layer", "layer_count", "sync_with_draw"), &RenderingDevice::texture_clear, DEFVAL(false)); - ClassDB::bind_method(D_METHOD("texture_resolve_multisample", "from_texture", "to_texture", "sync_with_draw"), &RenderingDevice::texture_resolve_multisample, DEFVAL(false)); + ClassDB::bind_method(D_METHOD("texture_copy", "from_texture", "to_texture", "from_pos", "to_pos", "size", "src_mipmap", "dst_mipmap", "src_layer", "dst_layer", "post_barrier"), &RenderingDevice::texture_copy, DEFVAL(BARRIER_MASK_ALL)); + ClassDB::bind_method(D_METHOD("texture_clear", "texture", "color", "base_mipmap", "mipmap_count", "base_layer", "layer_count", "post_barrier"), &RenderingDevice::texture_clear, DEFVAL(BARRIER_MASK_ALL)); + ClassDB::bind_method(D_METHOD("texture_resolve_multisample", "from_texture", "to_texture", "post_barrier"), &RenderingDevice::texture_resolve_multisample, DEFVAL(BARRIER_MASK_ALL)); ClassDB::bind_method(D_METHOD("framebuffer_format_create", "attachments"), &RenderingDevice::_framebuffer_format_create); ClassDB::bind_method(D_METHOD("framebuffer_format_create_empty", "samples"), &RenderingDevice::framebuffer_format_create_empty, DEFVAL(TEXTURE_SAMPLES_1)); @@ -287,8 +287,8 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("uniform_set_create", "uniforms", "shader", "shader_set"), &RenderingDevice::_uniform_set_create); ClassDB::bind_method(D_METHOD("uniform_set_is_valid", "uniform_set"), &RenderingDevice::uniform_set_is_valid); - ClassDB::bind_method(D_METHOD("buffer_update", "buffer", "offset", "size_bytes", "data", "sync_with_draw"), &RenderingDevice::_buffer_update, DEFVAL(true)); - ClassDB::bind_method(D_METHOD("buffer_clear", "buffer", "offset", "size_bytes", "sync_with_draw"), &RenderingDevice::_buffer_update, DEFVAL(true)); + ClassDB::bind_method(D_METHOD("buffer_update", "buffer", "offset", "size_bytes", "data", "post_barrier"), &RenderingDevice::_buffer_update, DEFVAL(BARRIER_MASK_ALL)); + ClassDB::bind_method(D_METHOD("buffer_clear", "buffer", "offset", "size_bytes", "post_barrier"), &RenderingDevice::_buffer_update, DEFVAL(BARRIER_MASK_ALL)); ClassDB::bind_method(D_METHOD("buffer_get_data", "buffer"), &RenderingDevice::buffer_get_data); ClassDB::bind_method(D_METHOD("render_pipeline_create", "shader", "framebuffer_format", "vertex_format", "primitive", "rasterization_state", "multisample_state", "stencil_state", "color_blend_state", "dynamic_state_flags"), &RenderingDevice::_render_pipeline_create, DEFVAL(0)); @@ -317,7 +317,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_enable_scissor", "draw_list", "rect"), &RenderingDevice::draw_list_enable_scissor, DEFVAL(Rect2i())); ClassDB::bind_method(D_METHOD("draw_list_disable_scissor", "draw_list"), &RenderingDevice::draw_list_disable_scissor); - ClassDB::bind_method(D_METHOD("draw_list_end"), &RenderingDevice::draw_list_end); + ClassDB::bind_method(D_METHOD("draw_list_end", "post_barrier"), &RenderingDevice::draw_list_end, DEFVAL(BARRIER_MASK_ALL)); ClassDB::bind_method(D_METHOD("compute_list_begin"), &RenderingDevice::compute_list_begin); ClassDB::bind_method(D_METHOD("compute_list_bind_compute_pipeline", "compute_list", "compute_pipeline"), &RenderingDevice::compute_list_bind_compute_pipeline); @@ -325,11 +325,11 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("compute_list_bind_uniform_set", "compute_list", "uniform_set", "set_index"), &RenderingDevice::compute_list_bind_uniform_set); ClassDB::bind_method(D_METHOD("compute_list_dispatch", "compute_list", "x_groups", "y_groups", "z_groups"), &RenderingDevice::compute_list_dispatch); ClassDB::bind_method(D_METHOD("compute_list_add_barrier", "compute_list"), &RenderingDevice::compute_list_add_barrier); - ClassDB::bind_method(D_METHOD("compute_list_end"), &RenderingDevice::compute_list_end); + ClassDB::bind_method(D_METHOD("compute_list_end", "post_barrier"), &RenderingDevice::compute_list_end, DEFVAL(BARRIER_MASK_ALL)); ClassDB::bind_method(D_METHOD("free", "rid"), &RenderingDevice::free); - ClassDB::bind_method(D_METHOD("capture_timestamp", "name", "sync_to_draw"), &RenderingDevice::capture_timestamp); + ClassDB::bind_method(D_METHOD("capture_timestamp", "name"), &RenderingDevice::capture_timestamp); ClassDB::bind_method(D_METHOD("get_captured_timestamps_count"), &RenderingDevice::get_captured_timestamps_count); ClassDB::bind_method(D_METHOD("get_captured_timestamps_frame"), &RenderingDevice::get_captured_timestamps_frame); ClassDB::bind_method(D_METHOD("get_captured_timestamp_gpu_time", "index"), &RenderingDevice::get_captured_timestamp_gpu_time); @@ -341,6 +341,9 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("submit"), &RenderingDevice::submit); ClassDB::bind_method(D_METHOD("sync"), &RenderingDevice::sync); + ClassDB::bind_method(D_METHOD("barrier", "from", "to"), &RenderingDevice::barrier, DEFVAL(BARRIER_MASK_ALL), DEFVAL(BARRIER_MASK_ALL)); + ClassDB::bind_method(D_METHOD("full_barrier"), &RenderingDevice::full_barrier); + ClassDB::bind_method(D_METHOD("create_local_device"), &RenderingDevice::create_local_device); ClassDB::bind_method(D_METHOD("set_resource_name"), &RenderingDevice::set_resource_name); @@ -349,6 +352,11 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_command_insert_label", "name", "color"), &RenderingDevice::draw_command_insert_label); ClassDB::bind_method(D_METHOD("draw_command_end_label"), &RenderingDevice::draw_command_end_label); + BIND_CONSTANT(BARRIER_MASK_RASTER); + BIND_CONSTANT(BARRIER_MASK_COMPUTE); + BIND_CONSTANT(BARRIER_MASK_TRANSFER); + BIND_CONSTANT(BARRIER_MASK_ALL); + BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4_UNORM_PACK8); BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4B4A4_UNORM_PACK16); BIND_ENUM_CONSTANT(DATA_FORMAT_B4G4R4A4_UNORM_PACK16); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 4e499b72d4..47ef54cef7 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -335,6 +335,17 @@ public: DATA_FORMAT_MAX }; + /*****************/ + /**** BARRIER ****/ + /*****************/ + + enum BarrierMask { + BARRIER_MASK_RASTER = 1, + BARRIER_MASK_COMPUTE = 2, + BARRIER_MASK_TRANSFER = 4, + BARRIER_MASK_ALL = BARRIER_MASK_RASTER | BARRIER_MASK_COMPUTE | BARRIER_MASK_TRANSFER + }; + /*****************/ /**** TEXTURE ****/ /*****************/ @@ -438,16 +449,16 @@ public: virtual RID texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, TextureSliceType p_slice_type = TEXTURE_SLICE_2D) = 0; - virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_sync_with_draw = false) = 0; //this function can be used from any thread and it takes effect at the beginning of the frame, unless sync with draw is used, which is used to mix updates with draw calls + virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL) = 0; virtual Vector texture_get_data(RID p_texture, uint32_t p_layer) = 0; // CPU textures will return immediately, while GPU textures will most likely force a flush virtual bool texture_is_format_supported_for_usage(DataFormat p_format, uint32_t p_usage) const = 0; virtual bool texture_is_shared(RID p_texture) = 0; virtual bool texture_is_valid(RID p_texture) = 0; - virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, bool p_sync_with_draw = false) = 0; - virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, bool p_sync_with_draw = false) = 0; - virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, bool p_sync_with_draw = false) = 0; + virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier = BARRIER_MASK_ALL) = 0; + virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier = BARRIER_MASK_ALL) = 0; + virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier = BARRIER_MASK_ALL) = 0; /*********************/ /**** FRAMEBUFFER ****/ @@ -649,8 +660,8 @@ public: virtual RID uniform_set_create(const Vector &p_uniforms, RID p_shader, uint32_t p_shader_set) = 0; virtual bool uniform_set_is_valid(RID p_uniform_set) = 0; - virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_sync_with_draw = false) = 0; //this function can be used from any thread and it takes effect at the beginning of the frame, unless sync with draw is used, which is used to mix updates with draw calls - virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, bool p_sync_with_draw = false) = 0; + virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL) = 0; + virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier = BARRIER_MASK_ALL) = 0; virtual Vector buffer_get_data(RID p_buffer) = 0; //this causes stall, only use to retrieve large buffers for saving /*************************/ @@ -964,7 +975,7 @@ public: virtual void draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect) = 0; virtual void draw_list_disable_scissor(DrawListID p_list) = 0; - virtual void draw_list_end() = 0; + virtual void draw_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL) = 0; /***********************/ /**** COMPUTE LISTS ****/ @@ -981,8 +992,9 @@ public: virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) = 0; virtual void compute_list_add_barrier(ComputeListID p_list) = 0; - virtual void compute_list_end() = 0; + virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL) = 0; + virtual void barrier(uint32_t p_from = BARRIER_MASK_ALL, uint32_t p_to = BARRIER_MASK_ALL) = 0; virtual void full_barrier() = 0; /***************/ @@ -995,7 +1007,7 @@ public: /**** Timing ****/ /****************/ - virtual void capture_timestamp(const String &p_name, bool p_sync_to_draw) = 0; + virtual void capture_timestamp(const String &p_name) = 0; virtual uint32_t get_captured_timestamps_count() const = 0; virtual uint64_t get_captured_timestamps_frame() const = 0; virtual uint64_t get_captured_timestamp_gpu_time(uint32_t p_index) const = 0; @@ -1085,7 +1097,7 @@ protected: RID _uniform_set_create(const Array &p_uniforms, RID p_shader, uint32_t p_shader_set); - Error _buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data, bool p_sync_with_draw = false); + Error _buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); RID _render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const Ref &p_rasterization_state, const Ref &p_multisample_state, const Ref &p_depth_stencil_state, const Ref &p_blend_state, int p_dynamic_state_flags = 0); -- cgit v1.2.3