diff options
Diffstat (limited to 'drivers/vulkan/rendering_device_vulkan.cpp')
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.cpp | 757 |
1 files changed, 645 insertions, 112 deletions
diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 3dcdaef699..9584dd3f67 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -41,28 +41,62 @@ //#define FORCE_FULL_BARRIER // Get the Vulkan object information and possible stage access types (bitwise OR'd with incoming values) -RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &stage_mask, VkAccessFlags &access_mask) { +RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &r_stage_mask, VkAccessFlags &r_access_mask, uint32_t p_post_barrier) { Buffer *buffer = nullptr; if (vertex_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; buffer = vertex_buffer_owner.getornull(p_buffer); + + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + r_access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + if (buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + } } else if (index_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - access_mask |= VK_ACCESS_INDEX_READ_BIT; + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + r_access_mask |= VK_ACCESS_INDEX_READ_BIT; buffer = index_buffer_owner.getornull(p_buffer); } else if (uniform_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - access_mask |= VK_ACCESS_UNIFORM_READ_BIT; + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + r_access_mask |= VK_ACCESS_UNIFORM_READ_BIT; buffer = uniform_buffer_owner.getornull(p_buffer); } else if (texture_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - access_mask |= VK_ACCESS_SHADER_READ_BIT; + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT; + } + buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; } else if (storage_buffer_owner.owns(p_buffer)) { - stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; buffer = storage_buffer_owner.getornull(p_buffer); + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + + if (buffer->usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) { + r_stage_mask |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + r_access_mask |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } } return buffer; } @@ -1595,6 +1629,9 @@ void RenderingDeviceVulkan::_memory_barrier(VkPipelineStageFlags p_src_stage_mas mem_barrier.srcAccessMask = p_src_access; mem_barrier.dstAccessMask = p_dst_sccess; + if (p_src_stage_mask == 0 || p_dst_stage_mask == 0) { + return; //no barrier, since this is invalid + } vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 1, &mem_barrier, 0, nullptr, 0, nullptr); } @@ -2067,6 +2104,48 @@ RID RenderingDeviceVulkan::texture_create_shared(const TextureView &p_view, RID image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; } + VkImageViewUsageCreateInfo usage_info; + usage_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; + usage_info.pNext = nullptr; + if (p_view.format_override != DATA_FORMAT_MAX) { + //need to validate usage with vulkan + + usage_info.usage = 0; + + if (texture.usage_flags & TEXTURE_USAGE_SAMPLING_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (texture.usage_flags & TEXTURE_USAGE_STORAGE_BIT) { + if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_STORAGE_BIT)) { + usage_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } + } + + if (texture.usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) { + usage_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + } + + if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } + + if (texture.usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + } + + if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + + image_view_create_info.pNext = &usage_info; + } + VkResult err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view); ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateImageView failed with error " + itos(err) + "."); @@ -2196,11 +2275,11 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p return id; } -Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating textures in 'sync to draw' mode is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(draw_list || compute_list, ERR_INVALID_PARAMETER, + "Updating textures in is forbidden during creation of a draw or compute list"); Texture *texture = texture_owner.getornull(p_texture); ERR_FAIL_COND_V(!texture, ERR_INVALID_PARAMETER); @@ -2241,7 +2320,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con const uint8_t *r = p_data.ptr(); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = p_post_barrier ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; //barrier to transfer { @@ -2266,6 +2345,10 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con } uint32_t mipmap_offset = 0; + + uint32_t logic_width = texture->width; + uint32_t logic_height = texture->height; + for (uint32_t mm_i = 0; mm_i < texture->mipmaps; mm_i++) { uint32_t depth; uint32_t image_total = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, mm_i + 1, &width, &height, &depth); @@ -2282,12 +2365,15 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con uint32_t region_w = MIN(region_size, width - x); uint32_t region_h = MIN(region_size, height - y); + uint32_t region_logic_w = MIN(region_size, logic_width - x); + uint32_t region_logic_h = MIN(region_size, logic_height - y); + uint32_t pixel_size = get_image_format_pixel_size(texture->format); uint32_t to_allocate = region_w * region_h * pixel_size; to_allocate >>= get_compressed_image_format_pixel_rshift(texture->format); uint32_t alloc_offset, alloc_size; - Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false, p_sync_with_draw); + Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false, p_post_barrier); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); uint8_t *write_ptr; @@ -2363,8 +2449,8 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con buffer_image_copy.imageOffset.y = y; buffer_image_copy.imageOffset.z = z; - buffer_image_copy.imageExtent.width = region_w; - buffer_image_copy.imageExtent.height = region_h; + buffer_image_copy.imageExtent.width = region_logic_w; + buffer_image_copy.imageExtent.height = region_logic_h; buffer_image_copy.imageExtent.depth = 1; vkCmdCopyBufferToImage(command_buffer, staging_buffer_blocks[staging_buffer_current].buffer, texture->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy); @@ -2375,15 +2461,36 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con } mipmap_offset = image_total; + logic_width = MAX(1, logic_width >> 1); + logic_height = MAX(1, logic_height >> 1); } //barrier to restore layout { + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.newLayout = texture->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2395,8 +2502,15 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + } + + if (texture->used_in_frame != frames_drawn) { + texture->used_in_raster = false; + texture->used_in_compute = false; + texture->used_in_frame = frames_drawn; } + texture->used_in_transfer = true; return OK; } @@ -2608,13 +2722,13 @@ bool RenderingDeviceVulkan::texture_is_valid(RID p_texture) { return texture_owner.owns(p_texture); } -Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_from_texture); ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && src_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER, "Source texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER, "Source texture requires the TEXTURE_USAGE_CAN_COPY_FROM_BIT in order to be retrieved."); @@ -2635,7 +2749,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, Texture *dst_tex = texture_owner.getornull(p_to_texture); ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && dst_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER, "Destination texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER, "Destination texture requires the TEXTURE_USAGE_CAN_COPY_TO_BIT in order to be retrieved."); @@ -2656,7 +2770,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; { //PRE Copy the image @@ -2731,12 +2845,31 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, // RESTORE LAYOUT for SRC and DST + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.newLayout = src_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2748,7 +2881,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //make dst readable @@ -2757,7 +2890,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.newLayout = dst_tex->layout; @@ -2770,20 +2903,20 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } } return OK; } -Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_from_texture); ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && src_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER, "Source texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER, "Source texture requires the TEXTURE_USAGE_CAN_COPY_FROM_BIT in order to be retrieved."); @@ -2794,7 +2927,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID Texture *dst_tex = texture_owner.getornull(p_to_texture); ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && dst_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER, "Destination texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER, "Destination texture requires the TEXTURE_USAGE_CAN_COPY_TO_BIT in order to be retrieved."); @@ -2808,7 +2941,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; { //PRE Copy the image @@ -2883,12 +3016,31 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID // RESTORE LAYOUT for SRC and DST + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.newLayout = src_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2900,7 +3052,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //make dst readable @@ -2909,7 +3061,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.newLayout = dst_tex->layout; @@ -2922,20 +3074,20 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = dst_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } } return OK; } -Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_texture); ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && src_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER, "Source texture can't be cleared while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V(p_layers == 0, ERR_INVALID_PARAMETER); @@ -2952,7 +3104,7 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, ERR_FAIL_COND_V(p_base_mipmap + p_mipmaps > src_tex->mipmaps, ERR_INVALID_PARAMETER); ERR_FAIL_COND_V(p_base_layer + p_layers > src_layer_count, ERR_INVALID_PARAMETER); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; VkImageLayout clear_layout = (src_tex->layout == VK_IMAGE_LAYOUT_GENERAL) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; @@ -2999,11 +3151,31 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, vkCmdClearColorImage(command_buffer, src_tex->image, clear_layout, &clear_color, 1, &range); { // Barrier to post clear accesses (changing back the layout if needed) + + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = valid_texture_access; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = clear_layout; image_memory_barrier.newLayout = src_tex->layout; @@ -3016,8 +3188,15 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer; image_memory_barrier.subresourceRange.layerCount = p_layers; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, valid_texture_stages, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + } + + if (src_tex->used_in_frame != frames_drawn) { + src_tex->used_in_raster = false; + src_tex->used_in_compute = false; + src_tex->used_in_frame = frames_drawn; } + src_tex->used_in_transfer = true; return OK; } @@ -3104,6 +3283,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF // the read. If this is a performance issue, one could track the actual last accessor of each resource, adding only that // stage switch (is_depth_stencil ? p_initial_depth_action : p_initial_color_action) { + case INITIAL_ACTION_CLEAR_REGION: case INITIAL_ACTION_CLEAR: { description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; @@ -3116,9 +3296,9 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there - description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; dependency_from_external.srcStageMask |= reading_stages; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; @@ -3144,6 +3324,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF dependency_from_external.srcStageMask |= reading_stages; } } break; + case INITIAL_ACTION_CLEAR_REGION_CONTINUE: case INITIAL_ACTION_CONTINUE: { if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; @@ -3151,7 +3332,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; //don't care what is there + description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; @@ -3280,8 +3461,13 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF render_pass_create_info.pAttachments = attachments.ptr(); render_pass_create_info.subpassCount = 1; render_pass_create_info.pSubpasses = &subpass; - render_pass_create_info.dependencyCount = 2; - render_pass_create_info.pDependencies = dependencies; + // Commenting this because it seems it just avoids raster and compute to work at the same time. + // Other barriers seem to be protecting the render pass fine. + // render_pass_create_info.dependencyCount = 2; + // render_pass_create_info.pDependencies = dependencies; + + render_pass_create_info.dependencyCount = 0; + render_pass_create_info.pDependencies = nullptr; VkRenderPass render_pass; VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass); @@ -3963,6 +4149,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages bool is_compute = false; + uint32_t compute_local_size[3] = { 0, 0, 0 }; + for (int i = 0; i < p_stages.size(); i++) { if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) { is_compute = true; @@ -3979,6 +4167,11 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed parsing shader."); + if (is_compute) { + compute_local_size[0] = module.entry_points->local_size.x; + compute_local_size[1] = module.entry_points->local_size.y; + compute_local_size[2] = module.entry_points->local_size.z; + } uint32_t binding_count = 0; result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -4183,6 +4376,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } } } + uint32_t pc_count = 0; result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -4231,6 +4425,9 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages shader.fragment_outputs = fragment_outputs; shader.push_constant = push_constant; shader.is_compute = is_compute; + shader.compute_local_size[0] = compute_local_size[0]; + shader.compute_local_size[1] = compute_local_size[1]; + shader.compute_local_size[2] = compute_local_size[2]; String error_text; @@ -5037,19 +5234,22 @@ bool RenderingDeviceVulkan::uniform_set_is_valid(RID p_uniform_set) { return uniform_set_owner.owns(p_uniform_set); } -Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating buffers in 'sync to draw' mode is forbidden during creation of a draw list"); - ERR_FAIL_COND_V_MSG(compute_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating buffers in 'sync to draw' mode is forbidden during creation of a compute list"); - - // Protect subsequent updates... - VkPipelineStageFlags dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; - VkAccessFlags dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a compute list"); - Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access); + VkPipelineStageFlags dst_stage_mask = 0; + VkAccessFlags dst_access = 0; + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + // Protect subsequent updates... + dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + } + Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier); if (!buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); } @@ -5057,35 +5257,48 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_sync_with_draw); - Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, p_sync_with_draw); + // no barrier should be needed here + // _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, true); + + Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, p_post_barrier); if (err) { return err; } #ifdef FORCE_FULL_BARRIER - _full_barrier(p_sync_with_draw); + _full_barrier(true); #else - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_sync_with_draw); + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) { + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); + } + #endif return err; } -Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER, "Size must be a multiple of four"); - ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating buffers in 'sync to draw' mode is forbidden during creation of a draw list"); - ERR_FAIL_COND_V_MSG(compute_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating buffers in 'sync to draw' mode is forbidden during creation of a compute list"); + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Updating buffers in is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a compute list"); - // Protect subsequent updates... - VkPipelineStageFlags dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; - VkAccessFlags dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + VkPipelineStageFlags dst_stage_mask = 0; + VkAccessFlags dst_access = 0; + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + // Protect subsequent updates... + dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + } - Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access); + Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier); if (!buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); } @@ -5093,14 +5306,20 @@ Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint3 ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_sync_with_draw); + // should not be needed + // _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_post_barrier); - vkCmdFillBuffer(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, buffer->buffer, p_offset, p_size, 0); + vkCmdFillBuffer(frames[frame].draw_command_buffer, buffer->buffer, p_offset, p_size, 0); #ifdef FORCE_FULL_BARRIER - _full_barrier(p_sync_with_draw); + _full_barrier(true); #else - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_sync_with_draw); + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, dst_stage_mask); + #endif return OK; } @@ -5112,7 +5331,7 @@ Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { VkPipelineShaderStageCreateFlags src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; VkAccessFlags src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; // Get the vulkan buffer and the potential stage/access possible - Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask); + Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_ALL); if (!buffer) { ERR_FAIL_V_MSG(Vector<uint8_t>(), "Buffer is either invalid or this type of buffer can't be retrieved. Only Index and Vertex buffers allow retrieving."); } @@ -5555,6 +5774,9 @@ RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) { pipeline.pipeline_layout = shader->pipeline_layout; pipeline.shader = p_shader; pipeline.push_constant_size = shader->push_constant.push_constant_size; + pipeline.local_group_size[0] = shader->compute_local_size[0]; + pipeline.local_group_size[1] = shader->compute_local_size[1]; + pipeline.local_group_size[2] = shader->compute_local_size[2]; //create ID to associate with this pipeline RID id = compute_pipeline_owner.make_rid(pipeline); @@ -5726,11 +5948,18 @@ Error RenderingDeviceVulkan::_draw_list_render_pass_begin(Framebuffer *framebuff render_pass_begin.pNext = nullptr; render_pass_begin.renderPass = render_pass; render_pass_begin.framebuffer = vkframebuffer; - + /* + * Given how API works, it makes sense to always fully operate on the whole framebuffer. + * This allows better continue operations for operations like shadowmapping. render_pass_begin.renderArea.extent.width = viewport_size.width; render_pass_begin.renderArea.extent.height = viewport_size.height; render_pass_begin.renderArea.offset.x = viewport_offset.x; render_pass_begin.renderArea.offset.y = viewport_offset.y; + */ + render_pass_begin.renderArea.extent.width = framebuffer->size.width; + render_pass_begin.renderArea.extent.height = framebuffer->size.height; + render_pass_begin.renderArea.offset.x = 0; + render_pass_begin.renderArea.offset.y = 0; Vector<VkClearValue> clear_values; clear_values.resize(framebuffer->texture_ids.size()); @@ -5857,7 +6086,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); - ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, INVALID_ID, "Only one draw/compute list can be active at the same time."); Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer); ERR_FAIL_COND_V(!framebuffer, INVALID_ID); @@ -5878,12 +6107,19 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu viewport_offset = regioni.position; viewport_size = regioni.size; - - if (p_initial_color_action == INITIAL_ACTION_CLEAR) { + if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { + needs_clear_color = true; + p_initial_color_action = INITIAL_ACTION_CONTINUE; + } + if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { + needs_clear_depth = true; + p_initial_depth_action = INITIAL_ACTION_CONTINUE; + } + if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_color = true; p_initial_color_action = INITIAL_ACTION_KEEP; } - if (p_initial_depth_action == INITIAL_ACTION_CLEAR) { + if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_depth = true; p_initial_depth_action = INITIAL_ACTION_KEEP; } @@ -5969,11 +6205,11 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p viewport_offset = regioni.position; viewport_size = regioni.size; - if (p_initial_color_action == INITIAL_ACTION_CLEAR) { + if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_color = true; p_initial_color_action = INITIAL_ACTION_KEEP; } - if (p_initial_depth_action == INITIAL_ACTION_CLEAR) { + if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_depth = true; p_initial_depth_action = INITIAL_ACTION_KEEP; } @@ -6226,6 +6462,19 @@ void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_ dl->state.sets[p_index].uniform_set_format = uniform_set->format; dl->state.sets[p_index].uniform_set = p_uniform_set; + uint32_t mst_count = uniform_set->mutable_storage_textures.size(); + if (mst_count) { + Texture **mst_textures = const_cast<UniformSet *>(uniform_set)->mutable_storage_textures.ptrw(); + for (uint32_t i = 0; i < mst_count; i++) { + if (mst_textures[i]->used_in_frame != frames_drawn) { + mst_textures[i]->used_in_frame = frames_drawn; + mst_textures[i]->used_in_transfer = false; + mst_textures[i]->used_in_compute = false; + } + mst_textures[i]->used_in_raster = true; + } + } + #ifdef DEBUG_ENABLED { //validate that textures bound are not attached as framebuffer bindings uint32_t attachable_count = uniform_set->attachable_textures.size(); @@ -6468,7 +6717,7 @@ void RenderingDeviceVulkan::draw_list_disable_scissor(DrawListID p_list) { vkCmdSetScissor(dl->command_buffer, 0, 1, &scissor); } -void RenderingDeviceVulkan::draw_list_end() { +void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_MSG(!draw_list, "Immediate draw list is already inactive."); @@ -6504,16 +6753,51 @@ void RenderingDeviceVulkan::draw_list_end() { } } + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + draw_list_bound_textures.clear(); - for (int i = 0; i < draw_list_storage_textures.size(); i++) { + VkImageMemoryBarrier *image_barriers = nullptr; + + uint32_t image_barrier_count = draw_list_storage_textures.size(); + + if (image_barrier_count) { + image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * draw_list_storage_textures.size()); + } + + uint32_t src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + uint32_t src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + + if (image_barrier_count) { + src_stage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + src_access |= VK_ACCESS_SHADER_WRITE_BIT; + } + + for (uint32_t i = 0; i < image_barrier_count; i++) { Texture *texture = texture_owner.getornull(draw_list_storage_textures[i]); - VkImageMemoryBarrier image_memory_barrier; + VkImageMemoryBarrier &image_memory_barrier = image_barriers[i]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.srcAccessMask = src_access; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = texture->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6526,8 +6810,6 @@ void RenderingDeviceVulkan::draw_list_end() { image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer; image_memory_barrier.subresourceRange.layerCount = texture->layers; - vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -6540,7 +6822,17 @@ void RenderingDeviceVulkan::draw_list_end() { #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, true); + + VkMemoryBarrier mem_barrier; + mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mem_barrier.pNext = nullptr; + mem_barrier.srcAccessMask = src_access; + mem_barrier.dstAccessMask = access_flags; + + if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { + vkCmdPipelineBarrier(frames[frame].draw_command_buffer, src_stage, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); + } + #endif } @@ -6548,12 +6840,13 @@ void RenderingDeviceVulkan::draw_list_end() { /**** COMPUTE LISTS ****/ /***********************/ -RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin() { - ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); +RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin(bool p_allow_draw_overlap) { + ERR_FAIL_COND_V_MSG(!p_allow_draw_overlap && draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); compute_list = memnew(ComputeList); compute_list->command_buffer = frames[frame].draw_command_buffer; + compute_list->state.allow_draw_overlap = p_allow_draw_overlap; return ID_TYPE_COMPUTE_LIST; } @@ -6610,6 +6903,9 @@ void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_l } cl->state.pipeline_shader = pipeline->shader; + cl->state.local_group_size[0] = pipeline->local_group_size[0]; + cl->state.local_group_size[1] = pipeline->local_group_size[1]; + cl->state.local_group_size[2] = pipeline->local_group_size[2]; } #ifdef DEBUG_ENABLED @@ -6647,11 +6943,24 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, cl->state.sets[p_index].uniform_set = p_uniform_set; uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size(); + uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); + Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw(); + VkImageMemoryBarrier *texture_barriers = nullptr; + + if (textures_to_sampled_count + textures_to_storage_count) { + texture_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * (textures_to_sampled_count + textures_to_storage_count)); + } + uint32_t texture_barrier_count = 0; + + uint32_t src_stage_flags = 0; + for (uint32_t i = 0; i < textures_to_sampled_count; i++) { if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { - VkImageMemoryBarrier image_memory_barrier; + src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + + VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; @@ -6668,23 +6977,55 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]); } + + if (textures_to_sampled[i]->used_in_frame != frames_drawn) { + textures_to_sampled[i]->used_in_frame = frames_drawn; + textures_to_sampled[i]->used_in_transfer = false; + textures_to_sampled[i]->used_in_raster = false; + } + textures_to_sampled[i]->used_in_compute = true; } - uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw(); for (uint32_t i = 0; i < textures_to_storage_count; i++) { if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) { - VkImageMemoryBarrier image_memory_barrier; + uint32_t src_access_flags = 0; + + if (textures_to_storage[i]->used_in_frame == frames_drawn) { + if (textures_to_storage[i]->used_in_compute) { + src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (textures_to_storage[i]->used_in_raster) { + src_stage_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (textures_to_storage[i]->used_in_transfer) { + src_stage_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_raster = false; + textures_to_storage[i]->used_in_compute = false; + + } else { + src_access_flags = 0; + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_raster = false; + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_frame = frames_drawn; + } + + VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.srcAccessMask = src_access_flags; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; image_memory_barrier.oldLayout = textures_to_storage[i]->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; @@ -6698,14 +7039,20 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_storage[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL; cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards } } + if (texture_barrier_count) { + if (src_stage_flags == 0) { + src_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + + vkCmdPipelineBarrier(cl->command_buffer, src_stage_flags, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, texture_barrier_count, texture_barriers); + } + #if 0 { //validate that textures bound are not attached as framebuffer bindings uint32_t attachable_count = uniform_set->attachable_textures.size(); @@ -6799,6 +7146,27 @@ void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups); } +void RenderingDeviceVulkan::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) { + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_COND(!compute_list); + + ComputeList *cl = compute_list; + +#ifdef DEBUG_ENABLED + + ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw."); + + if (cl->validation.pipeline_push_constant_size > 0) { + //using push constants, check that they were supplied + ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + compute_list_dispatch(p_list, (p_x_threads - 1) / cl->state.local_group_size[0] + 1, (p_y_threads - 1) / cl->state.local_group_size[1] + 1, (p_z_threads - 1) / cl->state.local_group_size[2] + 1); +} + void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) { ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_COND(!compute_list); @@ -6863,14 +7231,44 @@ void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) { #endif } -void RenderingDeviceVulkan::compute_list_end() { +void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { ERR_FAIL_COND(!compute_list); + + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + VkImageMemoryBarrier *image_barriers = nullptr; + + uint32_t image_barrier_count = compute_list->state.textures_to_sampled_layout.size(); + + if (image_barrier_count) { + image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * image_barrier_count); + } + + uint32_t barrier_idx = 0; + for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) { - VkImageMemoryBarrier image_memory_barrier; + VkImageMemoryBarrier &image_memory_barrier = image_barriers[barrier_idx++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = E->get()->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6883,19 +7281,75 @@ void RenderingDeviceVulkan::compute_list_end() { image_memory_barrier.subresourceRange.baseArrayLayer = E->get()->base_layer; image_memory_barrier.subresourceRange.layerCount = E->get()->layers; - // TODO: Look at the usages in the compute list and determine tighter dst stage and access masks based on some "final" usage equivalent - vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + if (E->get()->used_in_frame != frames_drawn) { + E->get()->used_in_transfer = false; + E->get()->used_in_raster = false; + E->get()->used_in_compute = false; + E->get()->used_in_frame = frames_drawn; + } } - memdelete(compute_list); - compute_list = nullptr; #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); + VkMemoryBarrier mem_barrier; + mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mem_barrier.pNext = nullptr; + mem_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + mem_barrier.dstAccessMask = access_flags; + + if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); + } + #endif + + memdelete(compute_list); + compute_list = nullptr; +} + +void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { + uint32_t src_barrier_flags = 0; + uint32_t src_access_flags = 0; + if (p_from & BARRIER_MASK_COMPUTE) { + src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_from & BARRIER_MASK_RASTER) { + src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + if (p_from & BARRIER_MASK_TRANSFER) { + src_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (p_from == 0) { + src_barrier_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + + uint32_t dst_barrier_flags = 0; + uint32_t dst_access_flags = 0; + if (p_to & BARRIER_MASK_COMPUTE) { + dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_to & BARRIER_MASK_RASTER) { + dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_to & BARRIER_MASK_TRANSFER) { + dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + if (p_to == 0) { + dst_barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + _memory_barrier(src_barrier_flags, dst_barrier_flags, src_access_flags, dst_access_flags, true); } void RenderingDeviceVulkan::full_barrier() { @@ -7032,6 +7486,82 @@ void RenderingDeviceVulkan::free(RID p_id) { _free_internal(p_id); } +// The full list of resources that can be named is in the VkObjectType enum +// We just expose the resources that are owned and can be accessed easily. +void RenderingDeviceVulkan::set_resource_name(RID p_id, const String p_name) { + if (texture_owner.owns(p_id)) { + Texture *texture = texture_owner.getornull(p_id); + if (texture->owner.is_null()) { + // Don't set the source texture's name when calling on a texture view + context->set_object_name(VK_OBJECT_TYPE_IMAGE, uint64_t(texture->image), p_name); + } + context->set_object_name(VK_OBJECT_TYPE_IMAGE_VIEW, uint64_t(texture->view), p_name + " View"); + } else if (framebuffer_owner.owns(p_id)) { + //Framebuffer *framebuffer = framebuffer_owner.getornull(p_id); + // Not implemented for now as the relationship between Framebuffer and RenderPass is very complex + } else if (sampler_owner.owns(p_id)) { + VkSampler *sampler = sampler_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_SAMPLER, uint64_t(*sampler), p_name); + } else if (vertex_buffer_owner.owns(p_id)) { + Buffer *vertex_buffer = vertex_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(vertex_buffer->buffer), p_name); + } else if (index_buffer_owner.owns(p_id)) { + IndexBuffer *index_buffer = index_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(index_buffer->buffer), p_name); + } else if (shader_owner.owns(p_id)) { + Shader *shader = shader_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(shader->pipeline_layout), p_name + " Pipeline Layout"); + for (int i = 0; i < shader->sets.size(); i++) { + context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, uint64_t(shader->sets[i].descriptor_set_layout), p_name); + } + } else if (uniform_buffer_owner.owns(p_id)) { + Buffer *uniform_buffer = uniform_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(uniform_buffer->buffer), p_name); + } else if (texture_buffer_owner.owns(p_id)) { + TextureBuffer *texture_buffer = texture_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(texture_buffer->buffer.buffer), p_name); + context->set_object_name(VK_OBJECT_TYPE_BUFFER_VIEW, uint64_t(texture_buffer->view), p_name + " View"); + } else if (storage_buffer_owner.owns(p_id)) { + Buffer *storage_buffer = storage_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(storage_buffer->buffer), p_name); + } else if (uniform_set_owner.owns(p_id)) { + UniformSet *uniform_set = uniform_set_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET, uint64_t(uniform_set->descriptor_set), p_name); + } else if (render_pipeline_owner.owns(p_id)) { + RenderPipeline *pipeline = render_pipeline_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE, uint64_t(pipeline->pipeline), p_name); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(pipeline->pipeline_layout), p_name + " Layout"); + } else if (compute_pipeline_owner.owns(p_id)) { + ComputePipeline *pipeline = compute_pipeline_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE, uint64_t(pipeline->pipeline), p_name); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(pipeline->pipeline_layout), p_name + " Layout"); + } else { + ERR_PRINT("Attempted to name invalid ID: " + itos(p_id.get_id())); + } +} + +void RenderingDeviceVulkan::draw_command_begin_label(String p_label_name, const Color p_color) { + context->command_begin_label(frames[frame].draw_command_buffer, p_label_name, p_color); +} + +void RenderingDeviceVulkan::draw_command_insert_label(String p_label_name, const Color p_color) { + context->command_insert_label(frames[frame].draw_command_buffer, p_label_name, p_color); +} + +void RenderingDeviceVulkan::draw_command_end_label() { + context->command_end_label(frames[frame].draw_command_buffer); +} + +String RenderingDeviceVulkan::get_device_vendor_name() const { + return context->get_device_vendor_name(); +} +String RenderingDeviceVulkan::get_device_name() const { + return context->get_device_name(); +} +String RenderingDeviceVulkan::get_device_pipeline_cache_uuid() const { + return context->get_device_pipeline_cache_uuid(); +} + void RenderingDeviceVulkan::_finalize_command_bufers() { if (draw_list) { ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); @@ -7084,6 +7614,7 @@ void RenderingDeviceVulkan::_begin_frame() { if (frames[frame].timestamp_count) { vkGetQueryPoolResults(device, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count, sizeof(uint64_t) * max_timestamp_query_elements, frames[frame].timestamp_result_values, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); + vkCmdResetQueryPool(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count); SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names); SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values); } @@ -7450,9 +7981,10 @@ void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) { } } -void RenderingDeviceVulkan::capture_timestamp(const String &p_name, bool p_sync_to_draw) { +void RenderingDeviceVulkan::capture_timestamp(const String &p_name) { ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements); + //this should be optional for profiling, else it will slow things down { VkMemoryBarrier memoryBarrier; @@ -7489,9 +8021,10 @@ void RenderingDeviceVulkan::capture_timestamp(const String &p_name, bool p_sync_ VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; - vkCmdPipelineBarrier(p_sync_to_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); + vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); } - vkCmdWriteTimestamp(p_sync_to_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frames[frame].timestamp_pool, frames[frame].timestamp_count); + + vkCmdWriteTimestamp(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frames[frame].timestamp_pool, frames[frame].timestamp_count); frames[frame].timestamp_names[frames[frame].timestamp_count] = p_name; frames[frame].timestamp_cpu_values[frames[frame].timestamp_count] = OS::get_singleton()->get_ticks_usec(); frames[frame].timestamp_count++; |