From 3a1f14461a52e4a7c3c3b0472493856ba86892a9 Mon Sep 17 00:00:00 2001 From: John Zulauf Date: Wed, 29 Jul 2020 13:12:21 -0600 Subject: Additional synchronization fixes w/o FORCE_FULL_BARRIER Additional synchronization fixes from hazards arising from disabling FORCE_FULL_BARRIER. --- drivers/vulkan/rendering_device_vulkan.cpp | 94 ++++++++++++++++++++++++------ 1 file changed, 76 insertions(+), 18 deletions(-) diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 2947376032..54ec2479f0 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -38,7 +38,7 @@ #include "thirdparty/spirv-reflect/spirv_reflect.h" -#define FORCE_FULL_BARRIER +//#define FORCE_FULL_BARRIER // Get the Vulkan object information and possible stage access types (bitwise OR'd with incoming values) RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &stage_mask, VkAccessFlags &access_mask) { @@ -67,6 +67,30 @@ RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID return buffer; } +static void update_external_dependency_for_store(VkSubpassDependency &dependency, bool is_sampled, bool is_storage, bool is_depth) { + // Transitioning from write to read, protect the shaders that may use this next + // Allow for copies/image layout transitions + dependency.dstStageMask |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dependency.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT; + + if (is_sampled) { + dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT; + } else if (is_storage) { + dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } else { + dependency.dstStageMask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + if (is_depth) { + // Depth resources have addtional stages that may be interested in them + dependency.dstStageMask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dependency.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } +} + void RenderingDeviceVulkan::_add_dependency(RID p_id, RID p_depends_on) { if (!dependency_map.has(p_depends_on)) { dependency_map[p_depends_on] = Set(); @@ -1959,7 +1983,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_memory_barrier.subresourceRange.baseArrayLayer = 0; image_memory_barrier.subresourceRange.layerCount = image_create_info.arrayLayers; - vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } RID id = texture_owner.make_rid(texture); @@ -2216,7 +2240,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } uint32_t mipmap_offset = 0; @@ -2349,7 +2373,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } return OK; @@ -2512,6 +2536,9 @@ Vector RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + if (tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) { + image_memory_barrier.dstAccessMask |= VK_ACCESS_SHADER_WRITE_BIT; + } image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.newLayout = tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2523,7 +2550,7 @@ Vector RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } _flush(true); @@ -2782,7 +2809,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //Dest VkImageMemoryBarrier image_memory_barrier; @@ -2963,7 +2990,7 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer; image_memory_barrier.subresourceRange.layerCount = p_layers; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } return OK; @@ -3018,6 +3045,19 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector depth_stencil_references; Vector resolve_references; + // Set up a dependencies from/to external equivalent to the default (implicit) one, and then amend them + const VkPipelineStageFlags default_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; // From Section 7.1 of Vulkan API Spec v1.1.148 + + VkPipelineStageFlags reading_stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + VkSubpassDependency dependencies[2] = { { VK_SUBPASS_EXTERNAL, 0, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, default_access_mask, 0 }, + { 0, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, default_access_mask, 0, 0 } }; + VkSubpassDependency &dependency_from_external = dependencies[0]; + VkSubpassDependency &dependency_to_external = dependencies[1]; + for (int i = 0; i < p_format.size(); i++) { ERR_FAIL_INDEX_V(p_format[i].format, DATA_FORMAT_MAX, VK_NULL_HANDLE); ERR_FAIL_INDEX_V(p_format[i].samples, TEXTURE_SAMPLES_MAX, VK_NULL_HANDLE); @@ -3033,11 +3073,16 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vectorbase_layer; image_memory_barrier.subresourceRange.layerCount = texture->layers; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); texture->layout = VK_IMAGE_LAYOUT_GENERAL; @@ -6501,7 +6558,7 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6698,13 +6755,13 @@ void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) { void RenderingDeviceVulkan::compute_list_end() { ERR_FAIL_COND(!compute_list); - + const VkPipelineStageFlags dest_stage_mask = VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; for (Set::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) { VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; image_memory_barrier.oldLayout = E->get()->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6717,7 +6774,8 @@ void RenderingDeviceVulkan::compute_list_end() { image_memory_barrier.subresourceRange.baseArrayLayer = E->get()->base_layer; image_memory_barrier.subresourceRange.layerCount = E->get()->layers; - vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + // TODO: Look at the usages in the compute list and determine tighter dst stage and access masks based on some "final" usage equivalent + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dest_stage_mask, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -6727,7 +6785,7 @@ void RenderingDeviceVulkan::compute_list_end() { #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); + _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dest_stage_mask, VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); #endif } -- cgit v1.2.3