diff options
Diffstat (limited to 'drivers/vulkan')
-rw-r--r-- | drivers/vulkan/SCsub | 30 | ||||
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.cpp | 1100 | ||||
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.h | 300 | ||||
-rw-r--r-- | drivers/vulkan/vulkan_context.cpp | 495 | ||||
-rw-r--r-- | drivers/vulkan/vulkan_context.h | 148 |
5 files changed, 1647 insertions, 426 deletions
diff --git a/drivers/vulkan/SCsub b/drivers/vulkan/SCsub index 61d91711da..14b9d63204 100644 --- a/drivers/vulkan/SCsub +++ b/drivers/vulkan/SCsub @@ -2,7 +2,7 @@ Import("env") -env.add_source_files(env.drivers_sources, "*.cpp") +thirdparty_obj = [] # FIXME: Refactor all this to reduce code duplication. if env["platform"] == "android": @@ -22,7 +22,8 @@ if env["platform"] == "android": thirdparty_dir = "#thirdparty/vulkan" vma_sources = [thirdparty_dir + "/android/vk_mem_alloc.cpp"] - env_thirdparty.add_source_files(env.drivers_sources, vma_sources) + env_thirdparty.add_source_files(thirdparty_obj, vma_sources) + elif env["platform"] == "iphone": # Use bundled Vulkan headers thirdparty_dir = "#thirdparty/vulkan" @@ -33,7 +34,8 @@ elif env["platform"] == "iphone": env_thirdparty.disable_warnings() vma_sources = [thirdparty_dir + "/vk_mem_alloc.cpp"] - env_thirdparty.add_source_files(env.drivers_sources, vma_sources) + env_thirdparty.add_source_files(thirdparty_obj, vma_sources) + elif env["builtin_vulkan"]: # Use bundled Vulkan headers thirdparty_dir = "#thirdparty/vulkan" @@ -59,7 +61,6 @@ elif env["builtin_vulkan"]: if env["platform"] == "windows": loader_sources.append("dirent_on_windows.c") - loader_sources.append("dxgi_loader.c") env_thirdparty.AppendUnique( CPPDEFINES=[ "VK_USE_PLATFORM_WIN32_KHR", @@ -99,8 +100,9 @@ elif env["builtin_vulkan"]: env_thirdparty.AppendUnique(CPPDEFINES=["HAVE_SECURE_GETENV"]) loader_sources = [thirdparty_dir + "/loader/" + file for file in loader_sources] - env_thirdparty.add_source_files(env.drivers_sources, loader_sources) - env_thirdparty.add_source_files(env.drivers_sources, vma_sources) + env_thirdparty.add_source_files(thirdparty_obj, loader_sources) + env_thirdparty.add_source_files(thirdparty_obj, vma_sources) + else: # Always build VMA. thirdparty_dir = "#thirdparty/vulkan" env.Prepend(CPPPATH=[thirdparty_dir]) @@ -110,4 +112,18 @@ else: # Always build VMA. env_thirdparty.disable_warnings() vma_sources = [thirdparty_dir + "/vk_mem_alloc.cpp"] - env_thirdparty.add_source_files(env.drivers_sources, vma_sources) + env_thirdparty.add_source_files(thirdparty_obj, vma_sources) + + +env.drivers_sources += thirdparty_obj + + +# Godot source files + +driver_obj = [] + +env.add_source_files(driver_obj, "*.cpp") +env.drivers_sources += driver_obj + +# Needed to force rebuilding the driver files when the thirdparty code is updated. +env.Depends(driver_obj, thirdparty_obj) diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 9fe3f7e6c0..09e2b4546a 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -30,15 +30,100 @@ #include "rendering_device_vulkan.h" -#include "core/hashfuncs.h" +#include "core/config/project_settings.h" #include "core/os/file_access.h" #include "core/os/os.h" -#include "core/project_settings.h" +#include "core/templates/hashfuncs.h" #include "drivers/vulkan/vulkan_context.h" #include "thirdparty/spirv-reflect/spirv_reflect.h" -#define FORCE_FULL_BARRIER +//#define FORCE_FULL_BARRIER + +// Get the Vulkan object information and possible stage access types (bitwise OR'd with incoming values) +RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &r_stage_mask, VkAccessFlags &r_access_mask, uint32_t p_post_barrier) { + Buffer *buffer = nullptr; + if (vertex_buffer_owner.owns(p_buffer)) { + buffer = vertex_buffer_owner.getornull(p_buffer); + + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + r_access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + if (buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + } + } else if (index_buffer_owner.owns(p_buffer)) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + r_access_mask |= VK_ACCESS_INDEX_READ_BIT; + buffer = index_buffer_owner.getornull(p_buffer); + } else if (uniform_buffer_owner.owns(p_buffer)) { + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + r_access_mask |= VK_ACCESS_UNIFORM_READ_BIT; + buffer = uniform_buffer_owner.getornull(p_buffer); + } else if (texture_buffer_owner.owns(p_buffer)) { + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT; + } + + buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; + } else if (storage_buffer_owner.owns(p_buffer)) { + buffer = storage_buffer_owner.getornull(p_buffer); + if (p_post_barrier & BARRIER_MASK_RASTER) { + r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + + if (buffer->usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) { + r_stage_mask |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + r_access_mask |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + } + return buffer; +} + +static void update_external_dependency_for_store(VkSubpassDependency &dependency, bool is_sampled, bool is_storage, bool is_depth) { + // Transitioning from write to read, protect the shaders that may use this next + // Allow for copies/image layout transitions + dependency.dstStageMask |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dependency.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT; + + if (is_sampled) { + dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT; + } else if (is_storage) { + dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } else { + dependency.dstStageMask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + if (is_depth) { + // Depth resources have additional stages that may be interested in them + dependency.dstStageMask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dependency.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } +} void RenderingDeviceVulkan::_add_dependency(RID p_id, RID p_depends_on) { if (!dependency_map.has(p_depends_on)) { @@ -564,6 +649,7 @@ int RenderingDeviceVulkan::get_format_vertex_size(DataFormat p_format) { case DATA_FORMAT_B8G8R8A8_SNORM: case DATA_FORMAT_B8G8R8A8_UINT: case DATA_FORMAT_B8G8R8A8_SINT: + case DATA_FORMAT_A2B10G10R10_UNORM_PACK32: return 4; case DATA_FORMAT_R16_UNORM: case DATA_FORMAT_R16_SNORM: @@ -1310,6 +1396,7 @@ Error RenderingDeviceVulkan::_buffer_allocate(Buffer *p_buffer, uint32_t p_size, p_buffer->buffer_info.buffer = p_buffer->buffer; p_buffer->buffer_info.offset = 0; p_buffer->buffer_info.range = p_size; + p_buffer->usage = p_usage; return OK; } @@ -1399,7 +1486,7 @@ Error RenderingDeviceVulkan::_staging_buffer_allocate(uint32_t p_amount, uint32_ // possible in a single frame if (staging_buffer_blocks[staging_buffer_current].frame_used == frames_drawn) { //guess we did.. ok, let's see if we can insert a new block.. - if (staging_buffer_blocks.size() * staging_buffer_block_size < staging_buffer_max_size) { + if ((uint64_t)staging_buffer_blocks.size() * staging_buffer_block_size < staging_buffer_max_size) { //we can, so we are safe Error err = _insert_staging_block(); if (err) { @@ -1443,7 +1530,7 @@ Error RenderingDeviceVulkan::_staging_buffer_allocate(uint32_t p_amount, uint32_ staging_buffer_blocks.write[staging_buffer_current].fill_amount = 0; } else if (staging_buffer_blocks[staging_buffer_current].frame_used > frames_drawn - frame_count) { //this block may still be in use, let's not touch it unless we have to, so.. can we create a new one? - if (staging_buffer_blocks.size() * staging_buffer_block_size < staging_buffer_max_size) { + if ((uint64_t)staging_buffer_blocks.size() * staging_buffer_block_size < staging_buffer_max_size) { //we are still allowed to create a new block, so let's do that and insert it for current pos Error err = _insert_staging_block(); if (err) { @@ -1542,6 +1629,9 @@ void RenderingDeviceVulkan::_memory_barrier(VkPipelineStageFlags p_src_stage_mas mem_barrier.srcAccessMask = p_src_access; mem_barrier.dstAccessMask = p_dst_sccess; + if (p_src_stage_mask == 0 || p_dst_stage_mask == 0) { + return; //no barrier, since this is invalid + } vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 1, &mem_barrier, 0, nullptr, 0, nullptr); } @@ -1640,16 +1730,16 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T #endif } - if (p_format.type == TEXTURE_TYPE_CUBE || p_format.type == TEXTURE_TYPE_CUBE_ARRAY) { + if (p_format.texture_type == TEXTURE_TYPE_CUBE || p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) { image_create_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; } /*if (p_format.type == TEXTURE_TYPE_2D || p_format.type == TEXTURE_TYPE_2D_ARRAY) { image_create_info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; }*/ - ERR_FAIL_INDEX_V(p_format.type, TEXTURE_TYPE_MAX, RID()); + ERR_FAIL_INDEX_V(p_format.texture_type, TEXTURE_TYPE_MAX, RID()); - image_create_info.imageType = vulkan_image_type[p_format.type]; + image_create_info.imageType = vulkan_image_type[p_format.texture_type]; ERR_FAIL_COND_V_MSG(p_format.width < 1, RID(), "Width must be equal or greater than 1 for all textures"); @@ -1674,10 +1764,10 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_create_info.mipLevels = p_format.mipmaps; - if (p_format.type == TEXTURE_TYPE_1D_ARRAY || p_format.type == TEXTURE_TYPE_2D_ARRAY || p_format.type == TEXTURE_TYPE_CUBE_ARRAY || p_format.type == TEXTURE_TYPE_CUBE) { + if (p_format.texture_type == TEXTURE_TYPE_1D_ARRAY || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE) { ERR_FAIL_COND_V_MSG(p_format.array_layers < 1, RID(), "Amount of layers must be equal or greater than 1 for arrays and cubemaps."); - ERR_FAIL_COND_V_MSG((p_format.type == TEXTURE_TYPE_CUBE_ARRAY || p_format.type == TEXTURE_TYPE_CUBE) && (p_format.array_layers % 6) != 0, RID(), + ERR_FAIL_COND_V_MSG((p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE) && (p_format.array_layers % 6) != 0, RID(), "Cubemap and cubemap array textures must provide a layer number that is multiple of 6"); image_create_info.arrayLayers = p_format.array_layers; } else { @@ -1807,7 +1897,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T VkResult err = vmaCreateImage(allocator, &image_create_info, &allocInfo, &texture.image, &texture.allocation, &texture.allocation_info); ERR_FAIL_COND_V_MSG(err, RID(), "vmaCreateImage failed with error " + itos(err) + "."); - texture.type = p_format.type; + texture.type = p_format.texture_type; texture.format = p_format.format; texture.width = image_create_info.extent.width; texture.height = image_create_info.extent.height; @@ -1875,7 +1965,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, }; - image_view_create_info.viewType = view_types[p_format.type]; + image_view_create_info.viewType = view_types[p_format.texture_type]; if (p_view.format_override == DATA_FORMAT_MAX) { image_view_create_info.format = image_create_info.format; } else { @@ -1932,7 +2022,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_memory_barrier.subresourceRange.baseArrayLayer = 0; image_memory_barrier.subresourceRange.layerCount = image_create_info.arrayLayers; - vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } RID id = texture_owner.make_rid(texture); @@ -2014,6 +2104,48 @@ RID RenderingDeviceVulkan::texture_create_shared(const TextureView &p_view, RID image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; } + VkImageViewUsageCreateInfo usage_info; + usage_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; + usage_info.pNext = nullptr; + if (p_view.format_override != DATA_FORMAT_MAX) { + //need to validate usage with vulkan + + usage_info.usage = 0; + + if (texture.usage_flags & TEXTURE_USAGE_SAMPLING_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (texture.usage_flags & TEXTURE_USAGE_STORAGE_BIT) { + if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_STORAGE_BIT)) { + usage_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } + } + + if (texture.usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) { + usage_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + } + + if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } + + if (texture.usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + } + + if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + + image_view_create_info.pNext = &usage_info; + } + VkResult err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view); ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateImageView failed with error " + itos(err) + "."); @@ -2042,15 +2174,26 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_3D && src_texture->type != TEXTURE_TYPE_3D, RID(), "Can only create a 3D slice from a 3D texture"); + ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_2D_ARRAY && (src_texture->type != TEXTURE_TYPE_2D_ARRAY), RID(), + "Can only create an array slice from a 2D array mipmap"); + //create view ERR_FAIL_UNSIGNED_INDEX_V(p_mipmap, src_texture->mipmaps, RID()); ERR_FAIL_UNSIGNED_INDEX_V(p_layer, src_texture->layers, RID()); + int slice_layers = 1; + if (p_slice_type == TEXTURE_SLICE_2D_ARRAY) { + ERR_FAIL_COND_V_MSG(p_layer != 0, RID(), "layer must be 0 when obtaining a 2D array mipmap slice"); + slice_layers = src_texture->layers; + } else if (p_slice_type == TEXTURE_SLICE_CUBEMAP) { + slice_layers = 6; + } + Texture texture = *src_texture; get_image_format_required_size(texture.format, texture.width, texture.height, texture.depth, p_mipmap + 1, &texture.width, &texture.height); texture.mipmaps = 1; - texture.layers = p_slice_type == TEXTURE_SLICE_CUBEMAP ? 6 : 1; + texture.layers = slice_layers; texture.base_mipmap = p_mipmap; texture.base_layer = p_layer; @@ -2070,7 +2213,16 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p VK_IMAGE_VIEW_TYPE_2D, }; - image_view_create_info.viewType = p_slice_type == TEXTURE_SLICE_CUBEMAP ? VK_IMAGE_VIEW_TYPE_CUBE : (p_slice_type == TEXTURE_SLICE_3D ? VK_IMAGE_VIEW_TYPE_3D : view_types[texture.type]); + image_view_create_info.viewType = view_types[texture.type]; + + if (p_slice_type == TEXTURE_SLICE_CUBEMAP) { + image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; + } else if (p_slice_type == TEXTURE_SLICE_3D) { + image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + } else if (p_slice_type == TEXTURE_SLICE_2D_ARRAY) { + image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + } + if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) { image_view_create_info.format = vulkan_formats[texture.format]; } else { @@ -2104,7 +2256,7 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p } image_view_create_info.subresourceRange.baseMipLevel = p_mipmap; image_view_create_info.subresourceRange.levelCount = 1; - image_view_create_info.subresourceRange.layerCount = p_slice_type == TEXTURE_SLICE_CUBEMAP ? 6 : 1; + image_view_create_info.subresourceRange.layerCount = slice_layers; image_view_create_info.subresourceRange.baseArrayLayer = p_layer; if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { @@ -2123,11 +2275,11 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p return id; } -Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating textures in 'sync to draw' mode is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(draw_list || compute_list, ERR_INVALID_PARAMETER, + "Updating textures in is forbidden during creation of a draw or compute list"); Texture *texture = texture_owner.getornull(p_texture); ERR_FAIL_COND_V(!texture, ERR_INVALID_PARAMETER); @@ -2168,7 +2320,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con const uint8_t *r = p_data.ptr(); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = p_post_barrier ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; //barrier to transfer { @@ -2189,10 +2341,14 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } uint32_t mipmap_offset = 0; + + uint32_t logic_width = texture->width; + uint32_t logic_height = texture->height; + for (uint32_t mm_i = 0; mm_i < texture->mipmaps; mm_i++) { uint32_t depth; uint32_t image_total = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, mm_i + 1, &width, &height, &depth); @@ -2209,12 +2365,15 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con uint32_t region_w = MIN(region_size, width - x); uint32_t region_h = MIN(region_size, height - y); + uint32_t region_logic_w = MIN(region_size, logic_width - x); + uint32_t region_logic_h = MIN(region_size, logic_height - y); + uint32_t pixel_size = get_image_format_pixel_size(texture->format); uint32_t to_allocate = region_w * region_h * pixel_size; to_allocate >>= get_compressed_image_format_pixel_rshift(texture->format); uint32_t alloc_offset, alloc_size; - Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false, p_sync_with_draw); + Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false, p_post_barrier); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); uint8_t *write_ptr; @@ -2290,8 +2449,8 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con buffer_image_copy.imageOffset.y = y; buffer_image_copy.imageOffset.z = z; - buffer_image_copy.imageExtent.width = region_w; - buffer_image_copy.imageExtent.height = region_h; + buffer_image_copy.imageExtent.width = region_logic_w; + buffer_image_copy.imageExtent.height = region_logic_h; buffer_image_copy.imageExtent.depth = 1; vkCmdCopyBufferToImage(command_buffer, staging_buffer_blocks[staging_buffer_current].buffer, texture->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy); @@ -2302,15 +2461,36 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con } mipmap_offset = image_total; + logic_width = MAX(1, logic_width >> 1); + logic_height = MAX(1, logic_height >> 1); } //barrier to restore layout { + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.newLayout = texture->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2322,8 +2502,15 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + } + + if (texture->used_in_frame != frames_drawn) { + texture->used_in_raster = false; + texture->used_in_compute = false; + texture->used_in_frame = frames_drawn; } + texture->used_in_transfer = true; return OK; } @@ -2379,7 +2566,7 @@ Vector<uint8_t> RenderingDeviceVulkan::_texture_get_data_from_image(Texture *tex for (uint32_t y = 0; y < height; y++) { const uint8_t *rptr = slice_read_ptr + y * layout.rowPitch; uint8_t *wptr = write_ptr + y * pixel_size * width; - copymem(wptr, rptr, pixel_size * width); + copymem(wptr, rptr, (uint64_t)pixel_size * width); } } } @@ -2485,6 +2672,9 @@ Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + if (tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) { + image_memory_barrier.dstAccessMask |= VK_ACCESS_SHADER_WRITE_BIT; + } image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.newLayout = tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2496,7 +2686,7 @@ Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } _flush(true); @@ -2532,13 +2722,13 @@ bool RenderingDeviceVulkan::texture_is_valid(RID p_texture) { return texture_owner.owns(p_texture); } -Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_from_texture); ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && src_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER, "Source texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER, "Source texture requires the TEXTURE_USAGE_CAN_COPY_FROM_BIT in order to be retrieved."); @@ -2559,7 +2749,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, Texture *dst_tex = texture_owner.getornull(p_to_texture); ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && dst_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER, "Destination texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER, "Destination texture requires the TEXTURE_USAGE_CAN_COPY_TO_BIT in order to be retrieved."); @@ -2580,7 +2770,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; { //PRE Copy the image @@ -2655,12 +2845,31 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, // RESTORE LAYOUT for SRC and DST + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.newLayout = src_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2672,7 +2881,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //make dst readable @@ -2681,7 +2890,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.newLayout = dst_tex->layout; @@ -2694,20 +2903,20 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } } return OK; } -Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_from_texture); ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && src_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER, "Source texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER, "Source texture requires the TEXTURE_USAGE_CAN_COPY_FROM_BIT in order to be retrieved."); @@ -2718,7 +2927,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID Texture *dst_tex = texture_owner.getornull(p_to_texture); ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && dst_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER, "Destination texture can't be copied while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER, "Destination texture requires the TEXTURE_USAGE_CAN_COPY_TO_BIT in order to be retrieved."); @@ -2732,7 +2941,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; { //PRE Copy the image @@ -2755,7 +2964,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //Dest VkImageMemoryBarrier image_memory_barrier; @@ -2807,12 +3016,31 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID // RESTORE LAYOUT for SRC and DST + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + { //restore src VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.newLayout = src_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2824,7 +3052,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //make dst readable @@ -2833,7 +3061,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.newLayout = dst_tex->layout; @@ -2846,20 +3074,20 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = dst_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } } return OK; } -Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_texture); ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V_MSG(p_sync_with_draw && src_tex->bound, ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER, "Source texture can't be cleared while a render pass that uses it is being created. Ensure render pass is finalized (and that it was created with RENDER_PASS_CONTENTS_FINISH) to unbind this texture."); ERR_FAIL_COND_V(p_layers == 0, ERR_INVALID_PARAMETER); @@ -2876,18 +3104,24 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, ERR_FAIL_COND_V(p_base_mipmap + p_mipmaps > src_tex->mipmaps, ERR_INVALID_PARAMETER); ERR_FAIL_COND_V(p_base_layer + p_layers > src_layer_count, ERR_INVALID_PARAMETER); - VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; + + VkImageLayout clear_layout = (src_tex->layout == VK_IMAGE_LAYOUT_GENERAL) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - VkImageLayout layout = src_tex->layout; + // NOTE: Perhaps the valid stages/accesses for a given owner should be a property of the owner. (Here and places like _get_buffer_from_owner) + const VkPipelineStageFlags valid_texture_stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + constexpr VkAccessFlags read_access = VK_ACCESS_SHADER_READ_BIT; + constexpr VkAccessFlags read_write_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + const VkAccessFlags valid_texture_access = (src_tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) ? read_write_access : read_access; - if (src_tex->layout != VK_IMAGE_LAYOUT_GENERAL) { //storage may be in general state + { // Barrier from previous access with optional layout change (see clear_layout logic above) VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = 0; + image_memory_barrier.srcAccessMask = valid_texture_access; image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; image_memory_barrier.oldLayout = src_tex->layout; - image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_memory_barrier.newLayout = clear_layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2898,8 +3132,7 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer; image_memory_barrier.subresourceRange.layerCount = p_layers; - layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, valid_texture_stages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } VkClearColorValue clear_color; @@ -2915,16 +3148,35 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, range.baseMipLevel = src_tex->base_mipmap + p_base_mipmap; range.levelCount = p_mipmaps; - vkCmdClearColorImage(command_buffer, src_tex->image, layout, &clear_color, 1, &range); + vkCmdClearColorImage(command_buffer, src_tex->image, clear_layout, &clear_color, 1, &range); + + { // Barrier to post clear accesses (changing back the layout if needed) - if (src_tex->layout != VK_IMAGE_LAYOUT_GENERAL) { //storage may be in general state + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_memory_barrier.dstAccessMask = access_flags; + image_memory_barrier.oldLayout = clear_layout; image_memory_barrier.newLayout = src_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2936,9 +3188,16 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer; image_memory_barrier.subresourceRange.layerCount = p_layers; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } + if (src_tex->used_in_frame != frames_drawn) { + src_tex->used_in_raster = false; + src_tex->used_in_compute = false; + src_tex->used_in_frame = frames_drawn; + } + src_tex->used_in_transfer = true; + return OK; } @@ -2991,6 +3250,19 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF Vector<VkAttachmentReference> depth_stencil_references; Vector<VkAttachmentReference> resolve_references; + // Set up dependencies from/to external equivalent to the default (implicit) one, and then amend them + const VkPipelineStageFlags default_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; // From Section 7.1 of Vulkan API Spec v1.1.148 + + VkPipelineStageFlags reading_stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + VkSubpassDependency dependencies[2] = { { VK_SUBPASS_EXTERNAL, 0, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, default_access_mask, 0 }, + { 0, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, default_access_mask, 0, 0 } }; + VkSubpassDependency &dependency_from_external = dependencies[0]; + VkSubpassDependency &dependency_to_external = dependencies[1]; + for (int i = 0; i < p_format.size(); i++) { ERR_FAIL_INDEX_V(p_format[i].format, DATA_FORMAT_MAX, VK_NULL_HANDLE); ERR_FAIL_INDEX_V(p_format[i].samples, TEXTURE_SAMPLES_MAX, VK_NULL_HANDLE); @@ -3006,11 +3278,17 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF bool is_sampled = p_format[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT; bool is_storage = p_format[i].usage_flags & TEXTURE_USAGE_STORAGE_BIT; + // For each UNDEFINED, assume the prior use was a *read*, as we'd be discarding the output of a write + // Also, each UNDEFINED will do an immediate layout transition (write), s.t. we must ensure execution synchronization vs. + // the read. If this is a performance issue, one could track the actual last accessor of each resource, adding only that + // stage switch (is_depth_stencil ? p_initial_depth_action : p_initial_color_action) { + case INITIAL_ACTION_CLEAR_REGION: case INITIAL_ACTION_CLEAR: { description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; } break; case INITIAL_ACTION_KEEP: { if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { @@ -3018,13 +3296,15 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there - description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + dependency_from_external.srcStageMask |= reading_stages; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; } } break; case INITIAL_ACTION_DROP: { @@ -3036,12 +3316,15 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + dependency_from_external.srcStageMask |= reading_stages; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; } } break; + case INITIAL_ACTION_CLEAR_REGION_CONTINUE: case INITIAL_ACTION_CONTINUE: { if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; @@ -3049,12 +3332,13 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; //don't care what is there + description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; } } break; default: { @@ -3068,14 +3352,17 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + update_external_dependency_for_store(dependency_to_external, is_sampled, is_storage, false); } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + update_external_dependency_for_store(dependency_to_external, is_sampled, is_storage, true); } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + // TODO: What does this mean about the next usage (and thus appropriate dependency masks } } break; case FINAL_ACTION_DISCARD: { @@ -3128,9 +3415,24 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF } else if (p_format[i].usage_flags & TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT) { reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; resolve_references.push_back(reference); + // if resolves are done, we need to ensure the copy is safe + dependency_to_external.dstStageMask |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dependency_to_external.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT; } else { ERR_FAIL_V_MSG(VK_NULL_HANDLE, "Texture index " + itos(i) + " is neither color, depth stencil or resolve so it can't be used as attachment."); } + + // NOTE: Big Mallet Approach -- any layout transition causes a full barrier + if (reference.layout != description.initialLayout) { + // NOTE: this should be smarter based on the texture's knowledge of its previous role + dependency_from_external.srcStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + dependency_from_external.srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + } + if (reference.layout != description.finalLayout) { + // NOTE: this should be smarter based on the texture's knowledge of its subsequent role + dependency_to_external.dstStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + dependency_to_external.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + } } ERR_FAIL_COND_V_MSG(depth_stencil_references.size() > 1, VK_NULL_HANDLE, @@ -3159,6 +3461,11 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF render_pass_create_info.pAttachments = attachments.ptr(); render_pass_create_info.subpassCount = 1; render_pass_create_info.pSubpasses = &subpass; + // Commenting this because it seems it just avoids raster and compute to work at the same time. + // Other barriers seem to be protecting the render pass fine. + // render_pass_create_info.dependencyCount = 2; + // render_pass_create_info.pDependencies = dependencies; + render_pass_create_info.dependencyCount = 0; render_pass_create_info.pDependencies = nullptr; @@ -3185,7 +3492,7 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_c } int color_references; - VkRenderPass render_pass = _render_pass_create(p_format, INITIAL_ACTION_CLEAR, FINAL_ACTION_DISCARD, INITIAL_ACTION_CLEAR, FINAL_ACTION_DISCARD, &color_references); //actions don't matter for this use case + VkRenderPass render_pass = _render_pass_create(p_format, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, &color_references); //actions don't matter for this use case if (render_pass == VK_NULL_HANDLE) { //was likely invalid return INVALID_ID; @@ -3202,11 +3509,8 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_c return id; } -RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create_empty(const Size2i &p_size) { - ERR_FAIL_COND_V(p_size.width <= 0 || p_size.height <= 0, INVALID_FORMAT_ID); - +RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create_empty(TextureSamples p_samples) { FramebufferFormatKey key; - key.empty_size = p_size; const Map<FramebufferFormatKey, FramebufferFormatID>::Element *E = framebuffer_format_cache.find(key); if (E) { @@ -3254,7 +3558,7 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_c fb_format.E = E; fb_format.color_attachments = 0; fb_format.render_pass = render_pass; - fb_format.samples = TEXTURE_SAMPLES_1; + fb_format.samples = p_samples; framebuffer_formats[id] = fb_format; return id; } @@ -3270,10 +3574,10 @@ RenderingDevice::TextureSamples RenderingDeviceVulkan::framebuffer_format_get_te /**** RENDER TARGET ****/ /***********************/ -RID RenderingDeviceVulkan::framebuffer_create_empty(const Size2i &p_size, FramebufferFormatID p_format_check) { +RID RenderingDeviceVulkan::framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples, FramebufferFormatID p_format_check) { _THREAD_SAFE_METHOD_ Framebuffer framebuffer; - framebuffer.format_id = framebuffer_format_create_empty(p_size); + framebuffer.format_id = framebuffer_format_create_empty(p_samples); ERR_FAIL_COND_V(p_format_check != INVALID_FORMAT_ID && framebuffer.format_id != p_format_check, RID()); framebuffer.size = p_size; @@ -3385,13 +3689,21 @@ RID RenderingDeviceVulkan::sampler_create(const SamplerState &p_state) { /**** VERTEX ARRAY ****/ /**********************/ -RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data) { +RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data, bool p_use_as_storage) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + uint32_t usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + if (p_use_as_storage) { + usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + } Buffer buffer; - _buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY); + _buffer_allocate(&buffer, p_size_bytes, usage, VMA_MEMORY_USAGE_GPU_ONLY); if (p_data.size()) { uint64_t data_size = p_data.size(); const uint8_t *r = p_data.ptr(); @@ -3425,7 +3737,7 @@ RenderingDevice::VertexFormatID RenderingDeviceVulkan::vertex_format_create(cons ERR_FAIL_COND_V(used_locations.has(p_vertex_formats[i].location), INVALID_ID); ERR_FAIL_COND_V_MSG(get_format_vertex_size(p_vertex_formats[i].format) == 0, INVALID_ID, - "Data format for attachment (" + itos(i) + ") is not valid for a vertex array."); + "Data format for attachment (" + itos(i) + "), '" + named_formats[p_vertex_formats[i].format] + "', is not valid for a vertex array."); vdcache.bindings[i].binding = i; vdcache.bindings[i].stride = p_vertex_formats[i].stride; @@ -3512,6 +3824,10 @@ RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFo RID RenderingDeviceVulkan::index_buffer_create(uint32_t p_index_count, IndexBufferFormat p_format, const Vector<uint8_t> &p_data, bool p_use_restart_indices) { _THREAD_SAFE_METHOD_ + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); ERR_FAIL_COND_V(p_index_count == 0, RID()); @@ -3626,13 +3942,11 @@ String RenderingDeviceVulkan::_shader_uniform_debug(RID p_shader, int p_set) { } #if 0 bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLayoutBinding> > &bindings, Vector<Vector<UniformInfo> > &uniform_infos, const glslang::TObjectReflection &reflection, RenderingDevice::ShaderStage p_stage, Shader::PushConstant &push_constant, String *r_error) { - VkDescriptorSetLayoutBinding layout_binding; UniformInfo info; switch (reflection.getType()->getBasicType()) { case glslang::EbtSampler: { - //print_line("DEBUG: IsSampler"); if (reflection.getType()->getSampler().dim == glslang::EsdBuffer) { //texture buffers @@ -3730,13 +4044,10 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa } break; /*case glslang::EbtReference: { - } break;*/ /*case glslang::EbtAtomicUint: { - } break;*/ default: { - if (reflection.getType()->getQualifier().hasOffset() || reflection.name.find(".") != std::string::npos) { //member of uniform block? return true; @@ -3838,6 +4149,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages bool is_compute = false; + uint32_t compute_local_size[3] = { 0, 0, 0 }; + for (int i = 0; i < p_stages.size(); i++) { if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) { is_compute = true; @@ -3854,6 +4167,11 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed parsing shader."); + if (is_compute) { + compute_local_size[0] = module.entry_points->local_size.x; + compute_local_size[1] = module.entry_points->local_size.y; + compute_local_size[2] = module.entry_points->local_size.z; + } uint32_t binding_count = 0; result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -3933,6 +4251,10 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; info.type = UNIFORM_TYPE_INPUT_ATTACHMENT; } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { + ERR_PRINT("Acceleration structure not supported."); + continue; + } break; } if (need_array_dimensions) { @@ -4054,6 +4376,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } } } + uint32_t pc_count = 0; result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -4102,6 +4425,9 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages shader.fragment_outputs = fragment_outputs; shader.push_constant = push_constant; shader.is_compute = is_compute; + shader.compute_local_size[0] = compute_local_size[0]; + shader.compute_local_size[1] = compute_local_size[1]; + shader.compute_local_size[2] = compute_local_size[2]; String error_text; @@ -4205,8 +4531,10 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } pipeline_layout_create_info.pSetLayouts = layouts.ptr(); + // Needs to be declared in this outer scope, otherwise it may not outlive its assignment + // to pipeline_layout_create_info. + VkPushConstantRange push_constant_range; if (push_constant.push_constant_size) { - VkPushConstantRange push_constant_range; push_constant_range.stageFlags = push_constant.push_constants_vk_stage; push_constant_range.offset = 0; push_constant_range.size = push_constant.push_constant_size; @@ -4258,6 +4586,10 @@ RID RenderingDeviceVulkan::uniform_buffer_create(uint32_t p_size_bytes, const Ve _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); Buffer buffer; Error err = _buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY); @@ -4273,6 +4605,10 @@ RID RenderingDeviceVulkan::uniform_buffer_create(uint32_t p_size_bytes, const Ve RID RenderingDeviceVulkan::storage_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data, uint32_t p_usage) { _THREAD_SAFE_METHOD_ + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); @@ -4296,6 +4632,10 @@ RID RenderingDeviceVulkan::storage_buffer_create(uint32_t p_size_bytes, const Ve RID RenderingDeviceVulkan::texture_buffer_create(uint32_t p_size_elements, DataFormat p_format, const Vector<uint8_t> &p_data) { _THREAD_SAFE_METHOD_ + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); uint32_t element_size = get_format_vertex_size(p_format); ERR_FAIL_COND_V_MSG(element_size == 0, RID(), "Format requested is not supported for texture buffers"); @@ -4444,7 +4784,7 @@ void RenderingDeviceVulkan::_descriptor_pool_free(const DescriptorPoolKey &p_key vkDestroyDescriptorPool(device, p_pool->pool, nullptr); descriptor_pools[p_key].erase(p_pool); memdelete(p_pool); - if (descriptor_pools[p_key].empty()) { + if (descriptor_pools[p_key].is_empty()) { descriptor_pools.erase(p_key); } } @@ -4478,7 +4818,7 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, List<Vector<VkBufferView>> buffer_views; List<Vector<VkDescriptorImageInfo>> image_infos; //used for verification to make sure a uniform set does not use a framebuffer bound texture - Vector<RID> attachable_textures; + LocalVector<UniformSet::AttachableTexture> attachable_textures; Vector<Texture *> mutable_sampled_textures; Vector<Texture *> mutable_storage_textures; @@ -4491,12 +4831,12 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, } } ERR_FAIL_COND_V_MSG(uniform_idx == -1, RID(), - "All the shader bindings for the given set must be covered by the uniforms provided. Binding (" + itos(set_uniform.binding) + ") was not provided."); + "All the shader bindings for the given set must be covered by the uniforms provided. Binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + ") was not provided."); const Uniform &uniform = uniforms[uniform_idx]; - ERR_FAIL_COND_V_MSG(uniform.type != set_uniform.type, RID(), - "Mismatch uniform type for binding (" + itos(set_uniform.binding) + "). Expected '" + shader_uniform_names[set_uniform.type] + "', supplied: '" + shader_uniform_names[uniform.type] + "'."); + ERR_FAIL_COND_V_MSG(uniform.uniform_type != set_uniform.type, RID(), + "Mismatch uniform type for binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + "). Expected '" + shader_uniform_names[set_uniform.type] + "', supplied: '" + shader_uniform_names[uniform.uniform_type] + "'."); VkWriteDescriptorSet write; //common header write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -4511,7 +4851,7 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, write.pTexelBufferView = nullptr; uint32_t type_size = 1; - switch (uniform.type) { + switch (uniform.uniform_type) { case UNIFORM_TYPE_SAMPLER: { if (uniform.ids.size() != set_uniform.length) { if (set_uniform.length > 1) { @@ -4571,7 +4911,10 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, img_info.imageView = texture->view; if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)) { - attachable_textures.push_back(texture->owner.is_valid() ? texture->owner : uniform.ids[j + 1]); + UniformSet::AttachableTexture attachable_texture; + attachable_texture.bind = set_uniform.binding; + attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.ids[j + 1]; + attachable_textures.push_back(attachable_texture); } if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) { @@ -4621,7 +4964,10 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, img_info.imageView = texture->view; if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)) { - attachable_textures.push_back(texture->owner.is_valid() ? texture->owner : uniform.ids[j]); + UniformSet::AttachableTexture attachable_texture; + attachable_texture.bind = set_uniform.binding; + attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.ids[j]; + attachable_textures.push_back(attachable_texture); } if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) { @@ -4794,10 +5140,18 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, ERR_FAIL_COND_V_MSG(uniform.ids.size() != 1, RID(), "Storage buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.ids.size()) + " provided)."); - Buffer *buffer = storage_buffer_owner.getornull(uniform.ids[0]); + Buffer *buffer = nullptr; + + if (storage_buffer_owner.owns(uniform.ids[0])) { + buffer = storage_buffer_owner.getornull(uniform.ids[0]); + } else if (vertex_buffer_owner.owns(uniform.ids[0])) { + buffer = vertex_buffer_owner.getornull(uniform.ids[0]); + + ERR_FAIL_COND_V_MSG(!(buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), RID(), "Vertex buffer supplied (binding: " + itos(uniform.binding) + ") was not created with storage flag."); + } ERR_FAIL_COND_V_MSG(!buffer, RID(), "Storage buffer supplied (binding: " + itos(uniform.binding) + ") is invalid."); - //if 0, then its sized on link time + //if 0, then it's sized on link time ERR_FAIL_COND_V_MSG(set_uniform.length > 0 && buffer->size != (uint32_t)set_uniform.length, RID(), "Storage buffer supplied (binding: " + itos(uniform.binding) + ") size (" + itos(buffer->size) + " does not match size of shader uniform: (" + itos(set_uniform.length) + ")."); @@ -4880,74 +5234,113 @@ bool RenderingDeviceVulkan::uniform_set_is_valid(RID p_uniform_set) { return uniform_set_owner.owns(p_uniform_set); } -Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_sync_with_draw) { +Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, - "Updating buffers in 'sync to draw' mode is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a compute list"); - VkPipelineStageFlags dst_stage_mask; - VkAccessFlags dst_access; - - Buffer *buffer = nullptr; - if (vertex_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - dst_access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - buffer = vertex_buffer_owner.getornull(p_buffer); - } else if (index_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - dst_access = VK_ACCESS_INDEX_READ_BIT; - buffer = index_buffer_owner.getornull(p_buffer); - } else if (uniform_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access = VK_ACCESS_UNIFORM_READ_BIT; - buffer = uniform_buffer_owner.getornull(p_buffer); - } else if (texture_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access = VK_ACCESS_SHADER_READ_BIT; - buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; - } else if (storage_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - buffer = storage_buffer_owner.getornull(p_buffer); - } else { + VkPipelineStageFlags dst_stage_mask = 0; + VkAccessFlags dst_access = 0; + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + // Protect subsequent updates... + dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + } + Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier); + if (!buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); } ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); - Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, p_sync_with_draw); + // no barrier should be needed here + // _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, true); + + Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, p_post_barrier); if (err) { return err; } - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_sync_with_draw); #ifdef FORCE_FULL_BARRIER - _full_barrier(p_sync_with_draw); + _full_barrier(true); #else - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_sync_with_draw); + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) { + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); + } + #endif return err; } +Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier) { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER, + "Size must be a multiple of four"); + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Updating buffers in is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a compute list"); + + VkPipelineStageFlags dst_stage_mask = 0; + VkAccessFlags dst_access = 0; + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + // Protect subsequent updates... + dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + } + + Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier); + if (!buffer) { + ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); + } + + ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, + "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); + + // should not be needed + // _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_post_barrier); + + vkCmdFillBuffer(frames[frame].draw_command_buffer, buffer->buffer, p_offset, p_size, 0); + +#ifdef FORCE_FULL_BARRIER + _full_barrier(true); +#else + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, dst_stage_mask); + +#endif + return OK; +} + Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { _THREAD_SAFE_METHOD_ - Buffer *buffer = nullptr; - if (vertex_buffer_owner.owns(p_buffer)) { - buffer = vertex_buffer_owner.getornull(p_buffer); - } else if (index_buffer_owner.owns(p_buffer)) { - buffer = index_buffer_owner.getornull(p_buffer); - } else if (texture_buffer_owner.owns(p_buffer)) { - buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; - } else if (storage_buffer_owner.owns(p_buffer)) { - buffer = storage_buffer_owner.getornull(p_buffer); - } else { + // It could be this buffer was just created + VkPipelineShaderStageCreateFlags src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + VkAccessFlags src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + // Get the vulkan buffer and the potential stage/access possible + Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_ALL); + if (!buffer) { ERR_FAIL_V_MSG(Vector<uint8_t>(), "Buffer is either invalid or this type of buffer can't be retrieved. Only Index and Vertex buffers allow retrieving."); } + // Make sure no one is using the buffer -- the "false" gets us to the same command buffer as below. + _buffer_memory_barrier(buffer->buffer, 0, buffer->size, src_stage_mask, src_access_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, false); + VkCommandBuffer command_buffer = frames[frame].setup_command_buffer; + Buffer tmp_buffer; _buffer_allocate(&tmp_buffer, buffer->size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_CPU_ONLY); VkBufferCopy region; @@ -5336,7 +5729,7 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma #endif //create ID to associate with this pipeline RID id = render_pipeline_owner.make_rid(pipeline); - //now add aall the dependencies + //now add all the dependencies _add_dependency(id, p_shader); return id; } @@ -5381,10 +5774,13 @@ RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) { pipeline.pipeline_layout = shader->pipeline_layout; pipeline.shader = p_shader; pipeline.push_constant_size = shader->push_constant.push_constant_size; + pipeline.local_group_size[0] = shader->compute_local_size[0]; + pipeline.local_group_size[1] = shader->compute_local_size[1]; + pipeline.local_group_size[2] = shader->compute_local_size[2]; //create ID to associate with this pipeline RID id = compute_pipeline_owner.make_rid(pipeline); - //now add aall the dependencies + //now add all the dependencies _add_dependency(id, p_shader); return id; } @@ -5499,7 +5895,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(Di vkCmdSetScissor(command_buffer, 0, 1, &scissor); - return ID_TYPE_DRAW_LIST; + return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } Error RenderingDeviceVulkan::_draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass) { @@ -5552,11 +5948,18 @@ Error RenderingDeviceVulkan::_draw_list_render_pass_begin(Framebuffer *framebuff render_pass_begin.pNext = nullptr; render_pass_begin.renderPass = render_pass; render_pass_begin.framebuffer = vkframebuffer; - + /* + * Given how API works, it makes sense to always fully operate on the whole framebuffer. + * This allows better continue operations for operations like shadowmapping. render_pass_begin.renderArea.extent.width = viewport_size.width; render_pass_begin.renderArea.extent.height = viewport_size.height; render_pass_begin.renderArea.offset.x = viewport_offset.x; render_pass_begin.renderArea.offset.y = viewport_offset.y; + */ + render_pass_begin.renderArea.extent.width = framebuffer->size.width; + render_pass_begin.renderArea.extent.height = framebuffer->size.height; + render_pass_begin.renderArea.offset.x = 0; + render_pass_begin.renderArea.offset.y = 0; Vector<VkClearValue> clear_values; clear_values.resize(framebuffer->texture_ids.size()); @@ -5614,7 +6017,7 @@ Error RenderingDeviceVulkan::_draw_list_render_pass_begin(Framebuffer *framebuff image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer; image_memory_barrier.subresourceRange.layerCount = texture->layers; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); texture->layout = VK_IMAGE_LAYOUT_GENERAL; @@ -5683,7 +6086,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); - ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, INVALID_ID, "Only one draw/compute list can be active at the same time."); Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer); ERR_FAIL_COND_V(!framebuffer, INVALID_ID); @@ -5704,12 +6107,19 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu viewport_offset = regioni.position; viewport_size = regioni.size; - - if (p_initial_color_action == INITIAL_ACTION_CLEAR) { + if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { + needs_clear_color = true; + p_initial_color_action = INITIAL_ACTION_CONTINUE; + } + if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { + needs_clear_depth = true; + p_initial_depth_action = INITIAL_ACTION_CONTINUE; + } + if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_color = true; p_initial_color_action = INITIAL_ACTION_KEEP; } - if (p_initial_depth_action == INITIAL_ACTION_CLEAR) { + if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_depth = true; p_initial_depth_action = INITIAL_ACTION_KEEP; } @@ -5766,7 +6176,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu vkCmdSetScissor(command_buffer, 0, 1, &scissor); draw_list->viewport = Rect2i(viewport_offset, viewport_size); - return ID_TYPE_DRAW_LIST; + return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) { @@ -5795,11 +6205,11 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p viewport_offset = regioni.position; viewport_size = regioni.size; - if (p_initial_color_action == INITIAL_ACTION_CLEAR) { + if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_color = true; p_initial_color_action = INITIAL_ACTION_KEEP; } - if (p_initial_depth_action == INITIAL_ACTION_CLEAR) { + if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) { needs_clear_depth = true; p_initial_depth_action = INITIAL_ACTION_KEEP; } @@ -5863,7 +6273,7 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p for (uint32_t i = 0; i < p_splits; i++) { //take a command buffer and initialize it - VkCommandBuffer command_buffer = split_draw_list_allocators[p_splits].command_buffers[frame]; + VkCommandBuffer command_buffer = split_draw_list_allocators[i].command_buffers[frame]; VkCommandBufferInheritanceInfo inheritance_info; inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; @@ -5921,7 +6331,7 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p scissor.extent.height = viewport_size.height; vkCmdSetScissor(command_buffer, 0, 1, &scissor); - r_split_ids[i] = (DrawListID(1) << DrawListID(ID_TYPE_SPLIT_DRAW_LIST)) + i; + r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i; draw_list[i].viewport = Rect2i(viewport_offset, viewport_size); } @@ -5936,7 +6346,7 @@ RenderingDeviceVulkan::DrawList *RenderingDeviceVulkan::_get_draw_list_ptr(DrawL if (!draw_list) { return nullptr; - } else if (p_id == ID_TYPE_DRAW_LIST) { + } else if (p_id == (int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT)) { if (draw_list_split) { return nullptr; } @@ -6030,7 +6440,7 @@ void RenderingDeviceVulkan::draw_list_bind_render_pipeline(DrawListID p_list, RI void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_uniform_set, uint32_t p_index) { #ifdef DEBUG_ENABLED - ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index > MAX_UNIFORM_SETS, + ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index >= MAX_UNIFORM_SETS, "Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ")."); #endif DrawList *dl = _get_draw_list_ptr(p_list); @@ -6052,16 +6462,29 @@ void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_ dl->state.sets[p_index].uniform_set_format = uniform_set->format; dl->state.sets[p_index].uniform_set = p_uniform_set; + uint32_t mst_count = uniform_set->mutable_storage_textures.size(); + if (mst_count) { + Texture **mst_textures = const_cast<UniformSet *>(uniform_set)->mutable_storage_textures.ptrw(); + for (uint32_t i = 0; i < mst_count; i++) { + if (mst_textures[i]->used_in_frame != frames_drawn) { + mst_textures[i]->used_in_frame = frames_drawn; + mst_textures[i]->used_in_transfer = false; + mst_textures[i]->used_in_compute = false; + } + mst_textures[i]->used_in_raster = true; + } + } + #ifdef DEBUG_ENABLED { //validate that textures bound are not attached as framebuffer bindings uint32_t attachable_count = uniform_set->attachable_textures.size(); - const RID *attachable_ptr = uniform_set->attachable_textures.ptr(); + const UniformSet::AttachableTexture *attachable_ptr = uniform_set->attachable_textures.ptr(); uint32_t bound_count = draw_list_bound_textures.size(); const RID *bound_ptr = draw_list_bound_textures.ptr(); for (uint32_t i = 0; i < attachable_count; i++) { for (uint32_t j = 0; j < bound_count; j++) { - ERR_FAIL_COND_MSG(attachable_ptr[i] == bound_ptr[j], - "Attempted to use the same texture in framebuffer attachment and a uniform set, this is not allowed."); + ERR_FAIL_COND_MSG(attachable_ptr[i].texture == bound_ptr[j], + "Attempted to use the same texture in framebuffer attachment and a uniform (set: " + itos(p_index) + ", binding: " + itos(attachable_ptr[i].bind) + "), this is not allowed."); } } } @@ -6265,7 +6688,7 @@ void RenderingDeviceVulkan::draw_list_enable_scissor(DrawListID p_list, const Re Rect2i rect = p_rect; rect.position += dl->viewport.position; - rect = dl->viewport.clip(rect); + rect = dl->viewport.intersection(rect); if (rect.get_area() == 0) { return; @@ -6294,7 +6717,7 @@ void RenderingDeviceVulkan::draw_list_disable_scissor(DrawListID p_list) { vkCmdSetScissor(dl->command_buffer, 0, 1, &scissor); } -void RenderingDeviceVulkan::draw_list_end() { +void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_MSG(!draw_list, "Immediate draw list is already inactive."); @@ -6303,8 +6726,8 @@ void RenderingDeviceVulkan::draw_list_end() { //send all command buffers VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * draw_list_count); for (uint32_t i = 0; i < draw_list_count; i++) { - vkEndCommandBuffer(draw_list->command_buffer); - command_buffers[i] = draw_list->command_buffer; + vkEndCommandBuffer(draw_list[i].command_buffer); + command_buffers[i] = draw_list[i].command_buffer; } vkCmdExecuteCommands(frames[frame].draw_command_buffer, draw_list_count, command_buffers); @@ -6330,16 +6753,51 @@ void RenderingDeviceVulkan::draw_list_end() { } } + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + draw_list_bound_textures.clear(); - for (int i = 0; i < draw_list_storage_textures.size(); i++) { + VkImageMemoryBarrier *image_barriers = nullptr; + + uint32_t image_barrier_count = draw_list_storage_textures.size(); + + if (image_barrier_count) { + image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * draw_list_storage_textures.size()); + } + + uint32_t src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + uint32_t src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + + if (image_barrier_count) { + src_stage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + src_access |= VK_ACCESS_SHADER_WRITE_BIT; + } + + for (uint32_t i = 0; i < image_barrier_count; i++) { Texture *texture = texture_owner.getornull(draw_list_storage_textures[i]); - VkImageMemoryBarrier image_memory_barrier; + VkImageMemoryBarrier &image_memory_barrier = image_barriers[i]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.srcAccessMask = src_access; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = texture->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6352,8 +6810,6 @@ void RenderingDeviceVulkan::draw_list_end() { image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer; image_memory_barrier.subresourceRange.layerCount = texture->layers; - vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -6361,12 +6817,22 @@ void RenderingDeviceVulkan::draw_list_end() { // To ensure proper synchronization, we must make sure rendering is done before: // * Some buffer is copied - // * Another render pass happens (since we may be done + // * Another render pass happens (since we may be done) #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, true); + + VkMemoryBarrier mem_barrier; + mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mem_barrier.pNext = nullptr; + mem_barrier.srcAccessMask = src_access; + mem_barrier.dstAccessMask = access_flags; + + if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { + vkCmdPipelineBarrier(frames[frame].draw_command_buffer, src_stage, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); + } + #endif } @@ -6374,12 +6840,13 @@ void RenderingDeviceVulkan::draw_list_end() { /**** COMPUTE LISTS ****/ /***********************/ -RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin() { - ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); +RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin(bool p_allow_draw_overlap) { + ERR_FAIL_COND_V_MSG(!p_allow_draw_overlap && draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); compute_list = memnew(ComputeList); compute_list->command_buffer = frames[frame].draw_command_buffer; + compute_list->state.allow_draw_overlap = p_allow_draw_overlap; return ID_TYPE_COMPUTE_LIST; } @@ -6436,6 +6903,9 @@ void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_l } cl->state.pipeline_shader = pipeline->shader; + cl->state.local_group_size[0] = pipeline->local_group_size[0]; + cl->state.local_group_size[1] = pipeline->local_group_size[1]; + cl->state.local_group_size[2] = pipeline->local_group_size[2]; } #ifdef DEBUG_ENABLED @@ -6452,7 +6922,7 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, ComputeList *cl = compute_list; #ifdef DEBUG_ENABLED - ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index > MAX_UNIFORM_SETS, + ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index >= MAX_UNIFORM_SETS, "Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ")."); #endif @@ -6473,11 +6943,24 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, cl->state.sets[p_index].uniform_set = p_uniform_set; uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size(); + uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); + Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw(); + VkImageMemoryBarrier *texture_barriers = nullptr; + + if (textures_to_sampled_count + textures_to_storage_count) { + texture_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * (textures_to_sampled_count + textures_to_storage_count)); + } + uint32_t texture_barrier_count = 0; + + uint32_t src_stage_flags = 0; + for (uint32_t i = 0; i < textures_to_sampled_count; i++) { if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { - VkImageMemoryBarrier image_memory_barrier; + src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + + VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; @@ -6494,23 +6977,55 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]); } + + if (textures_to_sampled[i]->used_in_frame != frames_drawn) { + textures_to_sampled[i]->used_in_frame = frames_drawn; + textures_to_sampled[i]->used_in_transfer = false; + textures_to_sampled[i]->used_in_raster = false; + } + textures_to_sampled[i]->used_in_compute = true; } - uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw(); for (uint32_t i = 0; i < textures_to_storage_count; i++) { if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) { - VkImageMemoryBarrier image_memory_barrier; + uint32_t src_access_flags = 0; + + if (textures_to_storage[i]->used_in_frame == frames_drawn) { + if (textures_to_storage[i]->used_in_compute) { + src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (textures_to_storage[i]->used_in_raster) { + src_stage_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (textures_to_storage[i]->used_in_transfer) { + src_stage_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_raster = false; + textures_to_storage[i]->used_in_compute = false; + + } else { + src_access_flags = 0; + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_raster = false; + textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_frame = frames_drawn; + } + + VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.srcAccessMask = src_access_flags; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; image_memory_barrier.oldLayout = textures_to_storage[i]->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; @@ -6524,14 +7039,20 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_storage[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL; cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards } } + if (texture_barrier_count) { + if (src_stage_flags == 0) { + src_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + + vkCmdPipelineBarrier(cl->command_buffer, src_stage_flags, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, texture_barrier_count, texture_barriers); + } + #if 0 { //validate that textures bound are not attached as framebuffer bindings uint32_t attachable_count = uniform_set->attachable_textures.size(); @@ -6625,6 +7146,27 @@ void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups); } +void RenderingDeviceVulkan::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) { + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_COND(!compute_list); + + ComputeList *cl = compute_list; + +#ifdef DEBUG_ENABLED + + ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw."); + + if (cl->validation.pipeline_push_constant_size > 0) { + //using push constants, check that they were supplied + ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + compute_list_dispatch(p_list, (p_x_threads - 1) / cl->state.local_group_size[0] + 1, (p_y_threads - 1) / cl->state.local_group_size[1] + 1, (p_z_threads - 1) / cl->state.local_group_size[2] + 1); +} + void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) { ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_COND(!compute_list); @@ -6689,15 +7231,44 @@ void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) { #endif } -void RenderingDeviceVulkan::compute_list_end() { +void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) { ERR_FAIL_COND(!compute_list); + uint32_t barrier_flags = 0; + uint32_t access_flags = 0; + if (p_post_barrier & BARRIER_MASK_COMPUTE) { + barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_post_barrier & BARRIER_MASK_RASTER) { + barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_post_barrier & BARRIER_MASK_TRANSFER) { + barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + if (barrier_flags == 0) { + barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + VkImageMemoryBarrier *image_barriers = nullptr; + + uint32_t image_barrier_count = compute_list->state.textures_to_sampled_layout.size(); + + if (image_barrier_count) { + image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * image_barrier_count); + } + + uint32_t barrier_idx = 0; + for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) { - VkImageMemoryBarrier image_memory_barrier; + VkImageMemoryBarrier &image_memory_barrier = image_barriers[barrier_idx++]; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.oldLayout = E->get()->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6710,18 +7281,75 @@ void RenderingDeviceVulkan::compute_list_end() { image_memory_barrier.subresourceRange.baseArrayLayer = E->get()->base_layer; image_memory_barrier.subresourceRange.layerCount = E->get()->layers; - vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); - E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + if (E->get()->used_in_frame != frames_drawn) { + E->get()->used_in_transfer = false; + E->get()->used_in_raster = false; + E->get()->used_in_compute = false; + E->get()->used_in_frame = frames_drawn; + } } - memdelete(compute_list); - compute_list = nullptr; #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); + VkMemoryBarrier mem_barrier; + mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + mem_barrier.pNext = nullptr; + mem_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + mem_barrier.dstAccessMask = access_flags; + + if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers); + } + #endif + + memdelete(compute_list); + compute_list = nullptr; +} + +void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { + uint32_t src_barrier_flags = 0; + uint32_t src_access_flags = 0; + if (p_from & BARRIER_MASK_COMPUTE) { + src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_from & BARRIER_MASK_RASTER) { + src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + if (p_from & BARRIER_MASK_TRANSFER) { + src_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; + } + + if (p_from == 0) { + src_barrier_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + + uint32_t dst_barrier_flags = 0; + uint32_t dst_access_flags = 0; + if (p_to & BARRIER_MASK_COMPUTE) { + dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + if (p_to & BARRIER_MASK_RASTER) { + dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + if (p_to & BARRIER_MASK_TRANSFER) { + dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + } + + if (p_to == 0) { + dst_barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + _memory_barrier(src_barrier_flags, dst_barrier_flags, src_access_flags, dst_access_flags, true); } void RenderingDeviceVulkan::full_barrier() { @@ -6733,7 +7361,6 @@ void RenderingDeviceVulkan::full_barrier() { #if 0 void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_framebuffer, ID *p_draw_lists, uint32_t p_draw_list_count, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Variant> &p_clear_colors) { - VkCommandBuffer frame_cmdbuf = frames[frame].frame_buffer; ERR_FAIL_COND(!frame_cmdbuf); @@ -6762,7 +7389,6 @@ void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_frame ID screen_format = screen_get_framebuffer_format(); { - VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * p_draw_list_count); uint32_t command_buffer_count = 0; @@ -6786,7 +7412,6 @@ void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_frame } vkCmdEndRenderPass(frame_cmdbuf); - } #endif @@ -6861,6 +7486,82 @@ void RenderingDeviceVulkan::free(RID p_id) { _free_internal(p_id); } +// The full list of resources that can be named is in the VkObjectType enum +// We just expose the resources that are owned and can be accessed easily. +void RenderingDeviceVulkan::set_resource_name(RID p_id, const String p_name) { + if (texture_owner.owns(p_id)) { + Texture *texture = texture_owner.getornull(p_id); + if (texture->owner.is_null()) { + // Don't set the source texture's name when calling on a texture view + context->set_object_name(VK_OBJECT_TYPE_IMAGE, uint64_t(texture->image), p_name); + } + context->set_object_name(VK_OBJECT_TYPE_IMAGE_VIEW, uint64_t(texture->view), p_name + " View"); + } else if (framebuffer_owner.owns(p_id)) { + //Framebuffer *framebuffer = framebuffer_owner.getornull(p_id); + // Not implemented for now as the relationship between Framebuffer and RenderPass is very complex + } else if (sampler_owner.owns(p_id)) { + VkSampler *sampler = sampler_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_SAMPLER, uint64_t(*sampler), p_name); + } else if (vertex_buffer_owner.owns(p_id)) { + Buffer *vertex_buffer = vertex_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(vertex_buffer->buffer), p_name); + } else if (index_buffer_owner.owns(p_id)) { + IndexBuffer *index_buffer = index_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(index_buffer->buffer), p_name); + } else if (shader_owner.owns(p_id)) { + Shader *shader = shader_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(shader->pipeline_layout), p_name + " Pipeline Layout"); + for (int i = 0; i < shader->sets.size(); i++) { + context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, uint64_t(shader->sets[i].descriptor_set_layout), p_name); + } + } else if (uniform_buffer_owner.owns(p_id)) { + Buffer *uniform_buffer = uniform_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(uniform_buffer->buffer), p_name); + } else if (texture_buffer_owner.owns(p_id)) { + TextureBuffer *texture_buffer = texture_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(texture_buffer->buffer.buffer), p_name); + context->set_object_name(VK_OBJECT_TYPE_BUFFER_VIEW, uint64_t(texture_buffer->view), p_name + " View"); + } else if (storage_buffer_owner.owns(p_id)) { + Buffer *storage_buffer = storage_buffer_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(storage_buffer->buffer), p_name); + } else if (uniform_set_owner.owns(p_id)) { + UniformSet *uniform_set = uniform_set_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET, uint64_t(uniform_set->descriptor_set), p_name); + } else if (render_pipeline_owner.owns(p_id)) { + RenderPipeline *pipeline = render_pipeline_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE, uint64_t(pipeline->pipeline), p_name); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(pipeline->pipeline_layout), p_name + " Layout"); + } else if (compute_pipeline_owner.owns(p_id)) { + ComputePipeline *pipeline = compute_pipeline_owner.getornull(p_id); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE, uint64_t(pipeline->pipeline), p_name); + context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(pipeline->pipeline_layout), p_name + " Layout"); + } else { + ERR_PRINT("Attempted to name invalid ID: " + itos(p_id.get_id())); + } +} + +void RenderingDeviceVulkan::draw_command_begin_label(String p_label_name, const Color p_color) { + context->command_begin_label(frames[frame].draw_command_buffer, p_label_name, p_color); +} + +void RenderingDeviceVulkan::draw_command_insert_label(String p_label_name, const Color p_color) { + context->command_insert_label(frames[frame].draw_command_buffer, p_label_name, p_color); +} + +void RenderingDeviceVulkan::draw_command_end_label() { + context->command_end_label(frames[frame].draw_command_buffer); +} + +String RenderingDeviceVulkan::get_device_vendor_name() const { + return context->get_device_vendor_name(); +} +String RenderingDeviceVulkan::get_device_name() const { + return context->get_device_name(); +} +String RenderingDeviceVulkan::get_device_pipeline_cache_uuid() const { + return context->get_device_pipeline_cache_uuid(); +} + void RenderingDeviceVulkan::_finalize_command_bufers() { if (draw_list) { ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); @@ -6913,6 +7614,7 @@ void RenderingDeviceVulkan::_begin_frame() { if (frames[frame].timestamp_count) { vkGetQueryPoolResults(device, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count, sizeof(uint64_t) * max_timestamp_query_elements, frames[frame].timestamp_result_values, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); + vkCmdResetQueryPool(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count); SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names); SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values); } @@ -7078,7 +7780,7 @@ uint64_t RenderingDeviceVulkan::get_memory_usage() const { void RenderingDeviceVulkan::_flush(bool p_current_frame) { if (local_device.is_valid() && !p_current_frame) { - return; //flushign previous frames has no effect with local device + return; //flushing previous frames has no effect with local device } //not doing this crashes RADV (undefined behavior) if (p_current_frame) { @@ -7132,6 +7834,18 @@ void RenderingDeviceVulkan::_flush(bool p_current_frame) { } void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_device) { + // get our device capabilities + { + device_capabilities.version_major = p_context->get_vulkan_major(); + device_capabilities.version_minor = p_context->get_vulkan_minor(); + + // get info about subgroups + VulkanContext::SubgroupCapabilities subgroup_capabilities = p_context->get_subgroup_capabilities(); + device_capabilities.subgroup_size = subgroup_capabilities.size; + device_capabilities.subgroup_in_shaders = subgroup_capabilities.supported_stages_flags_rd(); + device_capabilities.subgroup_operations = subgroup_capabilities.supported_operations_flags_rd(); + } + context = p_context; device = p_context->get_device(); if (p_local_device) { @@ -7279,9 +7993,10 @@ void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) { } } -void RenderingDeviceVulkan::capture_timestamp(const String &p_name, bool p_sync_to_draw) { +void RenderingDeviceVulkan::capture_timestamp(const String &p_name) { ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements); + //this should be optional for profiling, else it will slow things down { VkMemoryBarrier memoryBarrier; @@ -7318,9 +8033,10 @@ void RenderingDeviceVulkan::capture_timestamp(const String &p_name, bool p_sync_ VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; - vkCmdPipelineBarrier(p_sync_to_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); + vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); } - vkCmdWriteTimestamp(p_sync_to_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frames[frame].timestamp_pool, frames[frame].timestamp_count); + + vkCmdWriteTimestamp(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frames[frame].timestamp_pool, frames[frame].timestamp_count); frames[frame].timestamp_names[frames[frame].timestamp_count] = p_name; frames[frame].timestamp_cpu_values[frames[frame].timestamp_count] = OS::get_singleton()->get_ticks_usec(); frames[frame].timestamp_count++; @@ -7549,7 +8265,7 @@ RenderingDevice *RenderingDeviceVulkan::create_local_device() { } RenderingDeviceVulkan::RenderingDeviceVulkan() { - screen_prepared = false; + device_capabilities.device_family = DEVICE_VULKAN; } RenderingDeviceVulkan::~RenderingDeviceVulkan() { diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index 6f8bbc9c64..a2527d5c33 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -31,9 +31,10 @@ #ifndef RENDERING_DEVICE_VULKAN_H #define RENDERING_DEVICE_VULKAN_H -#include "core/oa_hash_map.h" #include "core/os/thread_safe.h" -#include "core/rid_owner.h" +#include "core/templates/local_vector.h" +#include "core/templates/oa_hash_map.h" +#include "core/templates/rid_owner.h" #include "servers/rendering/rendering_device.h" #ifdef DEBUG_ENABLED @@ -45,11 +46,6 @@ #include <vulkan/vulkan.h> -//todo: -//compute -//push constants -//views of texture slices - class VulkanContext; class RenderingDeviceVulkan : public RenderingDevice { @@ -99,7 +95,7 @@ class RenderingDeviceVulkan : public RenderingDevice { ID_BASE_SHIFT = 58 //5 bits for ID types }; - VkDevice device; + VkDevice device = VK_NULL_HANDLE; Map<RID, Set<RID>> dependency_map; //IDs to IDs that depend on it Map<RID, Set<RID>> reverse_dependency_map; //same as above, but in reverse @@ -124,35 +120,40 @@ class RenderingDeviceVulkan : public RenderingDevice { // for a framebuffer to render into it. struct Texture { - VkImage image; - VmaAllocation allocation; + VkImage image = VK_NULL_HANDLE; + VmaAllocation allocation = nullptr; VmaAllocationInfo allocation_info; - VkImageView view; + VkImageView view = VK_NULL_HANDLE; TextureType type; DataFormat format; TextureSamples samples; - uint32_t width; - uint32_t height; - uint32_t depth; - uint32_t layers; - uint32_t mipmaps; - uint32_t usage_flags; - uint32_t base_mipmap; - uint32_t base_layer; + uint32_t width = 0; + uint32_t height = 0; + uint32_t depth = 0; + uint32_t layers = 0; + uint32_t mipmaps = 0; + uint32_t usage_flags = 0; + uint32_t base_mipmap = 0; + uint32_t base_layer = 0; Vector<DataFormat> allowed_shared_formats; VkImageLayout layout; - uint32_t read_aspect_mask; - uint32_t barrier_aspect_mask; - bool bound; //bound to framebffer + uint64_t used_in_frame = 0; + bool used_in_transfer = false; + bool used_in_raster = false; + bool used_in_compute = false; + + uint32_t read_aspect_mask = 0; + uint32_t barrier_aspect_mask = 0; + bool bound = false; //bound to framebffer RID owner; }; RID_Owner<Texture, true> texture_owner; - uint32_t texture_upload_region_size_px; + uint32_t texture_upload_region_size_px = 0; Vector<uint8_t> _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer, bool p_2d = false); @@ -188,32 +189,28 @@ class RenderingDeviceVulkan : public RenderingDevice { // See the comments in the code to understand better how it works. struct StagingBufferBlock { - VkBuffer buffer; - VmaAllocation allocation; - uint64_t frame_used; - uint32_t fill_amount; + VkBuffer buffer = VK_NULL_HANDLE; + VmaAllocation allocation = nullptr; + uint64_t frame_used = 0; + uint32_t fill_amount = 0; }; Vector<StagingBufferBlock> staging_buffer_blocks; - int staging_buffer_current; - uint32_t staging_buffer_block_size; - uint64_t staging_buffer_max_size; - bool staging_buffer_used; + int staging_buffer_current = 0; + uint32_t staging_buffer_block_size = 0; + uint64_t staging_buffer_max_size = 0; + bool staging_buffer_used = false; Error _staging_buffer_allocate(uint32_t p_amount, uint32_t p_required_align, uint32_t &r_alloc_offset, uint32_t &r_alloc_size, bool p_can_segment = true, bool p_on_draw_command_buffer = false); Error _insert_staging_block(); struct Buffer { - uint32_t size; - uint32_t usage; - VkBuffer buffer; - VmaAllocation allocation; + uint32_t size = 0; + uint32_t usage = 0; + VkBuffer buffer = VK_NULL_HANDLE; + VmaAllocation allocation = nullptr; VkDescriptorBufferInfo buffer_info; //used for binding Buffer() { - size = 0; - usage = 0; - buffer = VK_NULL_HANDLE; - allocation = nullptr; } }; @@ -236,13 +233,8 @@ class RenderingDeviceVulkan : public RenderingDevice { // used for the render pipelines. struct FramebufferFormatKey { - Size2i empty_size; Vector<AttachmentFormat> attachments; bool operator<(const FramebufferFormatKey &p_key) const { - if (empty_size != p_key.empty_size) { - return empty_size < p_key.empty_size; - } - int as = attachments.size(); int bs = p_key.attachments.size(); if (as != bs) { @@ -276,15 +268,15 @@ class RenderingDeviceVulkan : public RenderingDevice { Map<FramebufferFormatKey, FramebufferFormatID> framebuffer_format_cache; struct FramebufferFormat { const Map<FramebufferFormatKey, FramebufferFormatID>::Element *E; - VkRenderPass render_pass; //here for constructing shaders, never used, see section (7.2. Render Pass Compatibility from Vulkan spec) - int color_attachments; //used for pipeline validation + VkRenderPass render_pass = VK_NULL_HANDLE; //here for constructing shaders, never used, see section (7.2. Render Pass Compatibility from Vulkan spec) + int color_attachments = 0; //used for pipeline validation TextureSamples samples; }; Map<FramebufferFormatID, FramebufferFormat> framebuffer_formats; struct Framebuffer { - FramebufferFormatID format_id; + FramebufferFormatID format_id = 0; struct VersionKey { InitialAction initial_color_action; FinalAction final_color_action; @@ -307,12 +299,12 @@ class RenderingDeviceVulkan : public RenderingDevice { } }; - uint32_t storage_mask; + uint32_t storage_mask = 0; Vector<RID> texture_ids; struct Version { - VkFramebuffer framebuffer; - VkRenderPass render_pass; //this one is owned + VkFramebuffer framebuffer = VK_NULL_HANDLE; + VkRenderPass render_pass = VK_NULL_HANDLE; //this one is owned }; Map<VersionKey, Version> framebuffers; @@ -399,8 +391,8 @@ class RenderingDeviceVulkan : public RenderingDevice { struct VertexDescriptionCache { Vector<VertexAttribute> vertex_formats; - VkVertexInputBindingDescription *bindings; - VkVertexInputAttributeDescription *attributes; + VkVertexInputBindingDescription *bindings = nullptr; + VkVertexInputAttributeDescription *attributes = nullptr; VkPipelineVertexInputStateCreateInfo create_info; }; @@ -408,9 +400,9 @@ class RenderingDeviceVulkan : public RenderingDevice { struct VertexArray { RID buffer; - VertexFormatID description; - int vertex_count; - uint32_t max_instances_allowed; + VertexFormatID description = 0; + int vertex_count = 0; + uint32_t max_instances_allowed = 0; Vector<VkBuffer> buffers; //not owned, just referenced Vector<VkDeviceSize> offsets; @@ -419,21 +411,21 @@ class RenderingDeviceVulkan : public RenderingDevice { RID_Owner<VertexArray, true> vertex_array_owner; struct IndexBuffer : public Buffer { - uint32_t max_index; //used for validation - uint32_t index_count; - VkIndexType index_type; - bool supports_restart_indices; + uint32_t max_index = 0; //used for validation + uint32_t index_count = 0; + VkIndexType index_type = VK_INDEX_TYPE_NONE_NV; + bool supports_restart_indices = false; }; RID_Owner<IndexBuffer, true> index_buffer_owner; struct IndexArray { - uint32_t max_index; //remember the maximum index here too, for validation + uint32_t max_index = 0; //remember the maximum index here too, for validation VkBuffer buffer; //not owned, inherited from index buffer - uint32_t offset; - uint32_t indices; - VkIndexType index_type; - bool supports_restart_indices; + uint32_t offset = 0; + uint32_t indices = 0; + VkIndexType index_type = VK_INDEX_TYPE_NONE_NV; + bool supports_restart_indices = false; }; RID_Owner<IndexArray, true> index_array_owner; @@ -459,10 +451,10 @@ class RenderingDeviceVulkan : public RenderingDevice { }; struct UniformInfo { - UniformType type; - int binding; - uint32_t stages; - int length; //size of arrays (in total elements), or ubos (in bytes * total elements) + UniformType type = UniformType::UNIFORM_TYPE_MAX; + int binding = 0; + uint32_t stages = 0; + int length = 0; //size of arrays (in total elements), or ubos (in bytes * total elements) bool operator!=(const UniformInfo &p_info) const { return (binding != p_info.binding || type != p_info.type || stages != p_info.stages || length != p_info.length); @@ -528,25 +520,27 @@ class RenderingDeviceVulkan : public RenderingDevice { struct Shader { struct Set { Vector<UniformInfo> uniform_info; - VkDescriptorSetLayout descriptor_set_layout; + VkDescriptorSetLayout descriptor_set_layout = VK_NULL_HANDLE; }; - uint32_t vertex_input_mask; //inputs used, this is mostly for validation - int fragment_outputs; + uint32_t vertex_input_mask = 0; //inputs used, this is mostly for validation + int fragment_outputs = 0; struct PushConstant { - uint32_t push_constant_size; - uint32_t push_constants_vk_stage; + uint32_t push_constant_size = 0; + uint32_t push_constants_vk_stage = 0; }; PushConstant push_constant; + uint32_t compute_local_size[3] = { 0, 0, 0 }; + bool is_compute = false; - int max_output; + int max_output = 0; Vector<Set> sets; Vector<uint32_t> set_formats; Vector<VkPipelineShaderStageCreateInfo> pipeline_stages; - VkPipelineLayout pipeline_layout; + VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; }; String _shader_uniform_debug(RID p_shader, int p_set = -1); @@ -610,7 +604,7 @@ class RenderingDeviceVulkan : public RenderingDevice { }; Map<DescriptorPoolKey, Set<DescriptorPool *>> descriptor_pools; - uint32_t max_descriptors_per_pool; + uint32_t max_descriptors_per_pool = 0; DescriptorPool *_descriptor_pool_allocate(const DescriptorPoolKey &p_key); void _descriptor_pool_free(const DescriptorPoolKey &p_key, DescriptorPool *p_pool); @@ -621,7 +615,7 @@ class RenderingDeviceVulkan : public RenderingDevice { //texture buffer needs a view struct TextureBuffer { Buffer buffer; - VkBufferView view; + VkBufferView view = VK_NULL_HANDLE; }; RID_Owner<TextureBuffer, true> texture_buffer_owner; @@ -635,14 +629,19 @@ class RenderingDeviceVulkan : public RenderingDevice { // the above restriction is not too serious. struct UniformSet { - uint32_t format; + uint32_t format = 0; RID shader_id; - uint32_t shader_set; - DescriptorPool *pool; + uint32_t shader_set = 0; + DescriptorPool *pool = nullptr; DescriptorPoolKey pool_key; - VkDescriptorSet descriptor_set; + VkDescriptorSet descriptor_set = VK_NULL_HANDLE; //VkPipelineLayout pipeline_layout; //not owned, inherited from shader - Vector<RID> attachable_textures; //used for validation + struct AttachableTexture { + uint32_t bind; + RID texture; + }; + + LocalVector<AttachableTexture> attachable_textures; //used for validation Vector<Texture *> mutable_sampled_textures; //used for layout change Vector<Texture *> mutable_storage_textures; //used for layout change }; @@ -668,21 +667,21 @@ class RenderingDeviceVulkan : public RenderingDevice { //Cached values for validation #ifdef DEBUG_ENABLED struct Validation { - FramebufferFormatID framebuffer_format; - uint32_t dynamic_state; - VertexFormatID vertex_format; - bool uses_restart_indices; - uint32_t primitive_minimum; - uint32_t primitive_divisor; + FramebufferFormatID framebuffer_format = 0; + uint32_t dynamic_state = 0; + VertexFormatID vertex_format = 0; + bool uses_restart_indices = false; + uint32_t primitive_minimum = 0; + uint32_t primitive_divisor = 0; } validation; #endif //Actual pipeline RID shader; Vector<uint32_t> set_formats; - VkPipelineLayout pipeline_layout; // not owned, needed for push constants - VkPipeline pipeline; - uint32_t push_constant_size; - uint32_t push_constant_stages; + VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; // not owned, needed for push constants + VkPipeline pipeline = VK_NULL_HANDLE; + uint32_t push_constant_size = 0; + uint32_t push_constant_stages = 0; }; RID_Owner<RenderPipeline, true> render_pipeline_owner; @@ -690,10 +689,11 @@ class RenderingDeviceVulkan : public RenderingDevice { struct ComputePipeline { RID shader; Vector<uint32_t> set_formats; - VkPipelineLayout pipeline_layout; // not owned, needed for push constants - VkPipeline pipeline; - uint32_t push_constant_size; - uint32_t push_constant_stages; + VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; // not owned, needed for push constants + VkPipeline pipeline = VK_NULL_HANDLE; + uint32_t push_constant_size = 0; + uint32_t push_constant_stages = 0; + uint32_t local_group_size[3] = { 0, 0, 0 }; }; RID_Owner<ComputePipeline, true> compute_pipeline_owner; @@ -714,14 +714,14 @@ class RenderingDeviceVulkan : public RenderingDevice { // each needs it's own command pool. struct SplitDrawListAllocator { - VkCommandPool command_pool; + VkCommandPool command_pool = VK_NULL_HANDLE; Vector<VkCommandBuffer> command_buffers; //one for each frame }; Vector<SplitDrawListAllocator> split_draw_list_allocators; struct DrawList { - VkCommandBuffer command_buffer; // If persistent, this is owned, otherwise it's shared with the ringbuffer. + VkCommandBuffer command_buffer = VK_NULL_HANDLE; // If persistent, this is owned, otherwise it's shared with the ringbuffer. Rect2i viewport; struct SetState { @@ -755,7 +755,7 @@ class RenderingDeviceVulkan : public RenderingDevice { bool index_buffer_uses_restart_indices = false; uint32_t index_array_size = 0; uint32_t index_array_max_index = 0; - uint32_t index_array_offset; + uint32_t index_array_offset = 0; Vector<uint32_t> set_formats; Vector<bool> set_bound; Vector<RID> set_rids; @@ -766,8 +766,8 @@ class RenderingDeviceVulkan : public RenderingDevice { RID pipeline_shader; uint32_t invalid_set_from = 0; bool pipeline_uses_restart_indices = false; - uint32_t pipeline_primitive_divisor; - uint32_t pipeline_primitive_minimum; + uint32_t pipeline_primitive_divisor = 0; + uint32_t pipeline_primitive_minimum = 0; Vector<uint32_t> pipeline_set_formats; uint32_t pipeline_push_constant_size = 0; bool pipeline_push_constant_supplied = false; @@ -781,25 +781,26 @@ class RenderingDeviceVulkan : public RenderingDevice { #endif }; - DrawList *draw_list; // One for regular draw lists, multiple for split. - uint32_t draw_list_count; - bool draw_list_split; + DrawList *draw_list = nullptr; // One for regular draw lists, multiple for split. + uint32_t draw_list_count = 0; + bool draw_list_split = false; Vector<RID> draw_list_bound_textures; Vector<RID> draw_list_storage_textures; - bool draw_list_unbind_color_textures; - bool draw_list_unbind_depth_textures; + bool draw_list_unbind_color_textures = false; + bool draw_list_unbind_depth_textures = false; void _draw_list_insert_clear_region(DrawList *draw_list, Framebuffer *framebuffer, Point2i viewport_offset, Point2i viewport_size, bool p_clear_color, const Vector<Color> &p_clear_colors, bool p_clear_depth, float p_depth, uint32_t p_stencil); Error _draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass); Error _draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents, const Vector<RID> &p_storage_textures); _FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id); + Buffer *_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &dst_stage_mask, VkAccessFlags &dst_access, uint32_t p_post_barrier); /**********************/ /**** COMPUTE LIST ****/ /**********************/ struct ComputeList { - VkCommandBuffer command_buffer; // If persistent, this is owned, otherwise it's shared with the ringbuffer. + VkCommandBuffer command_buffer = VK_NULL_HANDLE; // If persistent, this is owned, otherwise it's shared with the ringbuffer. struct SetState { uint32_t pipeline_expected_format = 0; @@ -815,8 +816,10 @@ class RenderingDeviceVulkan : public RenderingDevice { uint32_t set_count = 0; RID pipeline; RID pipeline_shader; + uint32_t local_group_size[3] = { 0, 0, 0 }; VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; uint32_t pipeline_push_constant_stages = 0; + bool allow_draw_overlap; } state; #ifdef DEBUG_ENABLED @@ -836,7 +839,7 @@ class RenderingDeviceVulkan : public RenderingDevice { #endif }; - ComputeList *compute_list; + ComputeList *compute_list = nullptr; /**************************/ /**** FRAME MANAGEMENT ****/ @@ -868,46 +871,46 @@ class RenderingDeviceVulkan : public RenderingDevice { List<RenderPipeline> render_pipelines_to_dispose_of; List<ComputePipeline> compute_pipelines_to_dispose_of; - VkCommandPool command_pool; - VkCommandBuffer setup_command_buffer; //used at the beginning of every frame for set-up - VkCommandBuffer draw_command_buffer; //used at the beginning of every frame for set-up + VkCommandPool command_pool = VK_NULL_HANDLE; + VkCommandBuffer setup_command_buffer = VK_NULL_HANDLE; //used at the beginning of every frame for set-up + VkCommandBuffer draw_command_buffer = VK_NULL_HANDLE; //used at the beginning of every frame for set-up struct Timestamp { String description; - uint64_t value; + uint64_t value = 0; }; VkQueryPool timestamp_pool; - String *timestamp_names; - uint64_t *timestamp_cpu_values; - uint32_t timestamp_count; - String *timestamp_result_names; - uint64_t *timestamp_cpu_result_values; - uint64_t *timestamp_result_values; - uint32_t timestamp_result_count; - uint64_t index; + String *timestamp_names = nullptr; + uint64_t *timestamp_cpu_values = nullptr; + uint32_t timestamp_count = 0; + String *timestamp_result_names = nullptr; + uint64_t *timestamp_cpu_result_values = nullptr; + uint64_t *timestamp_result_values = nullptr; + uint32_t timestamp_result_count = 0; + uint64_t index = 0; }; - uint32_t max_timestamp_query_elements; + uint32_t max_timestamp_query_elements = 0; - Frame *frames; //frames available, for main device they are cycled (usually 3), for local devices only 1 - int frame; //current frame - int frame_count; //total amount of frames - uint64_t frames_drawn; + Frame *frames = nullptr; //frames available, for main device they are cycled (usually 3), for local devices only 1 + int frame = 0; //current frame + int frame_count = 0; //total amount of frames + uint64_t frames_drawn = 0; RID local_device; bool local_device_processing = false; void _free_pending_resources(int p_frame); - VmaAllocator allocator; + VmaAllocator allocator = nullptr; - VulkanContext *context; + VulkanContext *context = nullptr; void _free_internal(RID p_id); void _flush(bool p_current_frame); - bool screen_prepared; + bool screen_prepared = false; template <class T> void _free_rids(T &p_owner, const char *p_type); @@ -920,27 +923,27 @@ public: virtual RID texture_create_shared(const TextureView &p_view, RID p_with_texture); virtual RID texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, TextureSliceType p_slice_type = TEXTURE_SLICE_2D); - virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, bool p_sync_with_draw = false); + virtual Error texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); virtual Vector<uint8_t> texture_get_data(RID p_texture, uint32_t p_layer); virtual bool texture_is_format_supported_for_usage(DataFormat p_format, uint32_t p_usage) const; virtual bool texture_is_shared(RID p_texture); virtual bool texture_is_valid(RID p_texture); - virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, bool p_sync_with_draw = false); - virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, bool p_sync_with_draw = false); - virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, bool p_sync_with_draw = false); + virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, uint32_t p_post_barrier = BARRIER_MASK_ALL); /*********************/ /**** FRAMEBUFFER ****/ /*********************/ virtual FramebufferFormatID framebuffer_format_create(const Vector<AttachmentFormat> &p_format); - virtual FramebufferFormatID framebuffer_format_create_empty(const Size2i &p_size); + virtual FramebufferFormatID framebuffer_format_create_empty(TextureSamples p_samples = TEXTURE_SAMPLES_1); virtual TextureSamples framebuffer_format_get_texture_samples(FramebufferFormatID p_format); virtual RID framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check = INVALID_ID); - virtual RID framebuffer_create_empty(const Size2i &p_size, FramebufferFormatID p_format_check = INVALID_ID); + virtual RID framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples = TEXTURE_SAMPLES_1, FramebufferFormatID p_format_check = INVALID_ID); virtual FramebufferFormatID framebuffer_get_format(RID p_framebuffer); @@ -954,7 +957,7 @@ public: /**** VERTEX ARRAY ****/ /**********************/ - virtual RID vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>()); + virtual RID vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>(), bool p_use_as_storage = false); // Internally reference counted, this ID is warranted to be unique for the same description, but needs to be freed as many times as it was allocated virtual VertexFormatID vertex_format_create(const Vector<VertexAttribute> &p_vertex_formats); @@ -982,7 +985,8 @@ public: virtual RID uniform_set_create(const Vector<Uniform> &p_uniforms, RID p_shader, uint32_t p_shader_set); virtual bool uniform_set_is_valid(RID p_uniform_set); - virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_sync_with_draw = false); //works for any buffer + virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); //works for any buffer + virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier = BARRIER_MASK_ALL); virtual Vector<uint8_t> buffer_get_data(RID p_buffer); /*************************/ @@ -1028,22 +1032,24 @@ public: virtual void draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect); virtual void draw_list_disable_scissor(DrawListID p_list); - virtual void draw_list_end(); + virtual void draw_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); /***********************/ /**** COMPUTE LISTS ****/ /***********************/ - virtual ComputeListID compute_list_begin(); + virtual ComputeListID compute_list_begin(bool p_allow_draw_overlap = false); virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline); virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index); virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size); virtual void compute_list_add_barrier(ComputeListID p_list); virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads); virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); - virtual void compute_list_end(); + virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); + virtual void barrier(uint32_t p_from = BARRIER_MASK_ALL, uint32_t p_to = BARRIER_MASK_ALL); virtual void full_barrier(); /**************/ @@ -1056,7 +1062,7 @@ public: /**** Timing ****/ /****************/ - virtual void capture_timestamp(const String &p_name, bool p_sync_to_draw); + virtual void capture_timestamp(const String &p_name); virtual uint32_t get_captured_timestamps_count() const; virtual uint64_t get_captured_timestamps_frame() const; virtual uint64_t get_captured_timestamp_gpu_time(uint32_t p_index) const; @@ -1084,6 +1090,16 @@ public: virtual uint64_t get_memory_usage() const; + virtual void set_resource_name(RID p_id, const String p_name); + + virtual void draw_command_begin_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)); + virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)); + virtual void draw_command_end_label(); + + virtual String get_device_vendor_name() const; + virtual String get_device_name() const; + virtual String get_device_pipeline_cache_uuid() const; + RenderingDeviceVulkan(); ~RenderingDeviceVulkan(); }; diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index 997ed3935f..36db7c56f0 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -30,10 +30,11 @@ #include "vulkan_context.h" -#include "core/engine.h" -#include "core/project_settings.h" -#include "core/ustring.h" +#include "core/config/engine.h" +#include "core/config/project_settings.h" +#include "core/string/ustring.h" #include "core/version.h" +#include "servers/rendering/rendering_device.h" #include "vk_enum_string_helper.h" @@ -55,10 +56,29 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanContext::_debug_messenger_callback( return VK_FALSE; } // This needs to be ignored because Validator is wrong here. + if (strstr(pCallbackData->pMessage, "Invalid SPIR-V binary version 1.3") != nullptr) { + return VK_FALSE; + } + // This needs to be ignored because Validator is wrong here. + if (strstr(pCallbackData->pMessage, "Shader requires flag") != nullptr) { + return VK_FALSE; + } + + // This needs to be ignored because Validator is wrong here. if (strstr(pCallbackData->pMessage, "SPIR-V module not valid: Pointer operand") != nullptr && strstr(pCallbackData->pMessage, "must be a memory object") != nullptr) { return VK_FALSE; } + /* + // This is a valid warning because its illegal in Vulkan, but in practice it should work according to VK_KHR_maintenance2 + if (strstr(pCallbackData->pMessage, "VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 with tiling VK_IMAGE_TILING_OPTIMAL does not support usage that includes VK_IMAGE_USAGE_STORAGE_BIT") != nullptr) { + return VK_FALSE; + } + + if (strstr(pCallbackData->pMessage, "VK_FORMAT_R4G4B4A4_UNORM_PACK16 with tiling VK_IMAGE_TILING_OPTIMAL does not support usage that includes VK_IMAGE_USAGE_STORAGE_BIT") != nullptr) { + return VK_FALSE; + } +*/ // Workaround for Vulkan-Loader usability bug: https://github.com/KhronosGroup/Vulkan-Loader/issues/262. if (strstr(pCallbackData->pMessage, "wrong ELF class: ELFCLASS32") != nullptr) { return VK_FALSE; @@ -144,6 +164,35 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanContext::_debug_messenger_callback( return VK_FALSE; } +VKAPI_ATTR VkBool32 VKAPI_CALL VulkanContext::_debug_report_callback( + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, + size_t location, + int32_t messageCode, + const char *pLayerPrefix, + const char *pMessage, + void *pUserData) { + String debugMessage = String("Vulkan Debug Report: object - ") + + String::num_int64(object) + "\n" + pMessage; + + switch (flags) { + case VK_DEBUG_REPORT_DEBUG_BIT_EXT: + case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: + print_line(debugMessage); + break; + case VK_DEBUG_REPORT_WARNING_BIT_EXT: + case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: + WARN_PRINT(debugMessage); + break; + case VK_DEBUG_REPORT_ERROR_BIT_EXT: + ERR_PRINT(debugMessage); + break; + } + + return VK_FALSE; +} + VkBool32 VulkanContext::_check_layers(uint32_t check_count, const char **check_names, uint32_t layer_count, VkLayerProperties *layers) { for (uint32_t i = 0; i < check_count; i++) { VkBool32 found = 0; @@ -154,7 +203,7 @@ VkBool32 VulkanContext::_check_layers(uint32_t check_count, const char **check_n } } if (!found) { - ERR_PRINT("Can't find layer: " + String(check_names[i])); + WARN_PRINT("Can't find layer: " + String(check_names[i])); return 0; } } @@ -215,24 +264,58 @@ Error VulkanContext::_create_validation_layers() { return OK; } +typedef VkResult(VKAPI_PTR *_vkEnumerateInstanceVersion)(uint32_t *); + +Error VulkanContext::_obtain_vulkan_version() { + // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkApplicationInfo.html#_description + // for Vulkan 1.0 vkEnumerateInstanceVersion is not available, including not in the loader we compile against on Android. + _vkEnumerateInstanceVersion func = (_vkEnumerateInstanceVersion)vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceVersion"); + if (func != nullptr) { + uint32_t api_version; + VkResult res = func(&api_version); + if (res == VK_SUCCESS) { + vulkan_major = VK_VERSION_MAJOR(api_version); + vulkan_minor = VK_VERSION_MINOR(api_version); + uint32_t vulkan_patch = VK_VERSION_PATCH(api_version); + + print_line("Vulkan API " + itos(vulkan_major) + "." + itos(vulkan_minor) + "." + itos(vulkan_patch)); + } else { + // according to the documentation this shouldn't fail with anything except a memory allocation error + // in which case we're in deep trouble anyway + ERR_FAIL_V(ERR_CANT_CREATE); + } + } else { + print_line("vkEnumerateInstanceVersion not available, assuming Vulkan 1.0"); + } + + // we don't go above 1.2 + if ((vulkan_major > 1) || (vulkan_major == 1 && vulkan_minor > 2)) { + vulkan_major = 1; + vulkan_minor = 2; + } + + return OK; +} + Error VulkanContext::_initialize_extensions() { - VkResult err; uint32_t instance_extension_count = 0; enabled_extension_count = 0; enabled_layer_count = 0; + enabled_debug_utils = false; + enabled_debug_report = false; /* Look for instance extensions */ VkBool32 surfaceExtFound = 0; VkBool32 platformSurfaceExtFound = 0; memset(extension_names, 0, sizeof(extension_names)); - err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, nullptr); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + VkResult err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, nullptr); + ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_INCOMPLETE, ERR_CANT_CREATE); if (instance_extension_count > 0) { VkExtensionProperties *instance_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * instance_extension_count); err = vkEnumerateInstanceExtensionProperties(nullptr, &instance_extension_count, instance_extensions); - if (err) { + if (err != VK_SUCCESS && err != VK_INCOMPLETE) { free(instance_extensions); ERR_FAIL_V(ERR_CANT_CREATE); } @@ -249,12 +332,12 @@ Error VulkanContext::_initialize_extensions() { if (!strcmp(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, instance_extensions[i].extensionName)) { if (use_validation_layers) { extension_names[enabled_extension_count++] = VK_EXT_DEBUG_REPORT_EXTENSION_NAME; + enabled_debug_report = true; } } if (!strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, instance_extensions[i].extensionName)) { - if (use_validation_layers) { - extension_names[enabled_extension_count++] = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; - } + extension_names[enabled_extension_count++] = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; + enabled_debug_utils = true; } if (enabled_extension_count >= MAX_EXTENSIONS) { free(instance_extensions); @@ -271,12 +354,200 @@ Error VulkanContext::_initialize_extensions() { return OK; } +typedef void(VKAPI_PTR *_vkGetPhysicalDeviceProperties2)(VkPhysicalDevice, VkPhysicalDeviceProperties2 *); + +uint32_t VulkanContext::SubgroupCapabilities::supported_stages_flags_rd() const { + uint32_t flags = 0; + + if (supportedStages & VK_SHADER_STAGE_VERTEX_BIT) { + flags += RenderingDevice::ShaderStage::SHADER_STAGE_VERTEX_BIT; + } + if (supportedStages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) { + flags += RenderingDevice::ShaderStage::SHADER_STAGE_TESSELATION_CONTROL_BIT; + } + if (supportedStages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) { + flags += RenderingDevice::ShaderStage::SHADER_STAGE_TESSELATION_EVALUATION_BIT; + } + // if (supportedStages & VK_SHADER_STAGE_GEOMETRY_BIT) { + // flags += RenderingDevice::ShaderStage::SHADER_STAGE_GEOMETRY_BIT; + // } + if (supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) { + flags += RenderingDevice::ShaderStage::SHADER_STAGE_FRAGMENT_BIT; + } + if (supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) { + flags += RenderingDevice::ShaderStage::SHADER_STAGE_COMPUTE_BIT; + } + + return flags; +} + +String VulkanContext::SubgroupCapabilities::supported_stages_desc() const { + String res; + + if (supportedStages & VK_SHADER_STAGE_VERTEX_BIT) { + res += ", STAGE_VERTEX"; + } + if (supportedStages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) { + res += ", STAGE_TESSELLATION_CONTROL"; + } + if (supportedStages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) { + res += ", STAGE_TESSELLATION_EVALUATION"; + } + if (supportedStages & VK_SHADER_STAGE_GEOMETRY_BIT) { + res += ", STAGE_GEOMETRY"; + } + if (supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) { + res += ", STAGE_FRAGMENT"; + } + if (supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) { + res += ", STAGE_COMPUTE"; + } + + /* these are not defined on Android GRMBL */ + if (supportedStages & 0x00000100 /* VK_SHADER_STAGE_RAYGEN_BIT_KHR */) { + res += ", STAGE_RAYGEN_KHR"; + } + if (supportedStages & 0x00000200 /* VK_SHADER_STAGE_ANY_HIT_BIT_KHR */) { + res += ", STAGE_ANY_HIT_KHR"; + } + if (supportedStages & 0x00000400 /* VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR */) { + res += ", STAGE_CLOSEST_HIT_KHR"; + } + if (supportedStages & 0x00000800 /* VK_SHADER_STAGE_MISS_BIT_KHR */) { + res += ", STAGE_MISS_KHR"; + } + if (supportedStages & 0x00001000 /* VK_SHADER_STAGE_INTERSECTION_BIT_KHR */) { + res += ", STAGE_INTERSECTION_KHR"; + } + if (supportedStages & 0x00002000 /* VK_SHADER_STAGE_CALLABLE_BIT_KHR */) { + res += ", STAGE_CALLABLE_KHR"; + } + if (supportedStages & 0x00000040 /* VK_SHADER_STAGE_TASK_BIT_NV */) { + res += ", STAGE_TASK_NV"; + } + if (supportedStages & 0x00000080 /* VK_SHADER_STAGE_MESH_BIT_NV */) { + res += ", STAGE_MESH_NV"; + } + + return res.substr(2); // remove first ", " +} + +uint32_t VulkanContext::SubgroupCapabilities::supported_operations_flags_rd() const { + uint32_t flags = 0; + + if (supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) { + flags += RenderingDevice::SubgroupOperations::SUBGROUP_BASIC_BIT; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_VOTE_BIT) { + flags += RenderingDevice::SubgroupOperations::SUBGROUP_VOTE_BIT; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) { + flags += RenderingDevice::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) { + flags += RenderingDevice::SubgroupOperations::SUBGROUP_BALLOT_BIT; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) { + flags += RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_BIT; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) { + flags += RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) { + flags += RenderingDevice::SubgroupOperations::SUBGROUP_CLUSTERED_BIT; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_QUAD_BIT) { + flags += RenderingDevice::SubgroupOperations::SUBGROUP_QUAD_BIT; + } + + return flags; +} + +String VulkanContext::SubgroupCapabilities::supported_operations_desc() const { + String res; + + if (supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) { + res += ", FEATURE_BASIC"; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_VOTE_BIT) { + res += ", FEATURE_VOTE"; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) { + res += ", FEATURE_ARITHMETIC"; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) { + res += ", FEATURE_BALLOT"; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) { + res += ", FEATURE_SHUFFLE"; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) { + res += ", FEATURE_SHUFFLE_RELATIVE"; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) { + res += ", FEATURE_CLUSTERED"; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_QUAD_BIT) { + res += ", FEATURE_QUAD"; + } + if (supportedOperations & VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV) { + res += ", FEATURE_PARTITIONED_NV"; + } + + return res.substr(2); // remove first ", " +} + +Error VulkanContext::_check_capabilities() { + // check subgroups + // https://www.khronos.org/blog/vulkan-subgroup-tutorial + // for Vulkan 1.0 vkGetPhysicalDeviceProperties2 is not available, including not in the loader we compile against on Android. + _vkGetPhysicalDeviceProperties2 func = (_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(inst, "vkGetPhysicalDeviceProperties2"); + if (func != nullptr) { + VkPhysicalDeviceSubgroupProperties subgroupProperties; + subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + subgroupProperties.pNext = NULL; + + VkPhysicalDeviceProperties2 physicalDeviceProperties; + physicalDeviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + physicalDeviceProperties.pNext = &subgroupProperties; + + func(gpu, &physicalDeviceProperties); + + subgroup_capabilities.size = subgroupProperties.subgroupSize; + subgroup_capabilities.supportedStages = subgroupProperties.supportedStages; + subgroup_capabilities.supportedOperations = subgroupProperties.supportedOperations; + // Note: quadOperationsInAllStages will be true if: + // - supportedStages has VK_SHADER_STAGE_ALL_GRAPHICS + VK_SHADER_STAGE_COMPUTE_BIT + // - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT + subgroup_capabilities.quadOperationsInAllStages = subgroupProperties.quadOperationsInAllStages; + + // only output this when debugging? + print_line("- Vulkan subgroup size " + itos(subgroup_capabilities.size)); + print_line("- Vulkan subgroup stages " + subgroup_capabilities.supported_stages_desc()); + print_line("- Vulkan subgroup supported ops " + subgroup_capabilities.supported_operations_desc()); + if (subgroup_capabilities.quadOperationsInAllStages) { + print_line("- Vulkan subgroup quad operations in all stages"); + } + } else { + subgroup_capabilities.size = 0; + subgroup_capabilities.supportedStages = 0; + subgroup_capabilities.supportedOperations = 0; + subgroup_capabilities.quadOperationsInAllStages = false; + } + + return OK; +} + Error VulkanContext::_create_physical_device() { + /* obtain version */ + _obtain_vulkan_version(); + /* Look for validation layers */ if (use_validation_layers) { _create_validation_layers(); } + /* initialise extensions */ { Error err = _initialize_extensions(); if (err != OK) { @@ -294,7 +565,7 @@ Error VulkanContext::_create_physical_device() { /*applicationVersion*/ 0, /*pEngineName*/ namecs.get_data(), /*engineVersion*/ 0, - /*apiVersion*/ VK_API_VERSION_1_0, + /*apiVersion*/ VK_MAKE_VERSION(vulkan_major, vulkan_minor, 0) }; VkInstanceCreateInfo inst_info = { /*sType*/ VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, @@ -302,7 +573,7 @@ Error VulkanContext::_create_physical_device() { /*flags*/ 0, /*pApplicationInfo*/ &app, /*enabledLayerCount*/ enabled_layer_count, - /*ppEnabledLayerNames*/ (const char *const *)instance_validation_layers, + /*ppEnabledLayerNames*/ (const char *const *)enabled_layers, /*enabledExtensionCount*/ enabled_extension_count, /*ppEnabledExtensionNames*/ (const char *const *)extension_names, }; @@ -313,7 +584,8 @@ Error VulkanContext::_create_physical_device() { * function to register the final callback. */ VkDebugUtilsMessengerCreateInfoEXT dbg_messenger_create_info; - if (use_validation_layers) { + VkDebugReportCallbackCreateInfoEXT dbg_report_callback_create_info{}; + if (enabled_debug_utils) { // VK_EXT_debug_utils style dbg_messenger_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; dbg_messenger_create_info.pNext = nullptr; @@ -326,6 +598,16 @@ Error VulkanContext::_create_physical_device() { dbg_messenger_create_info.pfnUserCallback = _debug_messenger_callback; dbg_messenger_create_info.pUserData = this; inst_info.pNext = &dbg_messenger_create_info; + } else if (enabled_debug_report) { + dbg_report_callback_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; + dbg_report_callback_create_info.flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT | + VK_DEBUG_REPORT_WARNING_BIT_EXT | + VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | + VK_DEBUG_REPORT_ERROR_BIT_EXT | + VK_DEBUG_REPORT_DEBUG_BIT_EXT; + dbg_report_callback_create_info.pfnCallback = _debug_report_callback; + dbg_report_callback_create_info.pUserData = this; + inst_info.pNext = &dbg_report_callback_create_info; } uint32_t gpu_count; @@ -362,7 +644,8 @@ Error VulkanContext::_create_physical_device() { ERR_FAIL_V(ERR_CANT_CREATE); } /* for now, just grab the first physical device */ - gpu = physical_devices[0]; + uint32_t device_index = 0; + gpu = physical_devices[device_index]; free(physical_devices); /* Look for device extensions */ @@ -371,6 +654,40 @@ Error VulkanContext::_create_physical_device() { enabled_extension_count = 0; memset(extension_names, 0, sizeof(extension_names)); + /* Get identifier properties */ + vkGetPhysicalDeviceProperties(gpu, &gpu_props); + + static const struct { + uint32_t id; + const char *name; + } vendor_names[] = { + { 0x1002, "AMD" }, + { 0x1010, "ImgTec" }, + { 0x10DE, "NVIDIA" }, + { 0x13B5, "ARM" }, + { 0x5143, "Qualcomm" }, + { 0x8086, "INTEL" }, + { 0, nullptr }, + }; + device_name = gpu_props.deviceName; + pipeline_cache_id = String::hex_encode_buffer(gpu_props.pipelineCacheUUID, VK_UUID_SIZE); + pipeline_cache_id += "-driver-" + itos(gpu_props.driverVersion); + { + device_vendor = "Unknown"; + uint32_t vendor_idx = 0; + while (vendor_names[vendor_idx].name != nullptr) { + if (gpu_props.vendorID == vendor_names[vendor_idx].id) { + device_vendor = vendor_names[vendor_idx].name; + break; + } + vendor_idx++; + } + } +#ifdef DEBUG_ENABLED + print_line("Using Vulkan Device #" + itos(device_index) + ": " + device_vendor + " - " + device_name); +#endif + device_api_version = gpu_props.apiVersion; + err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); @@ -437,7 +754,7 @@ Error VulkanContext::_create_physical_device() { " extension.\n\nDo you have a compatible Vulkan installable client driver (ICD) installed?\n" "vkCreateInstance Failure"); - if (use_validation_layers) { + if (enabled_debug_utils) { // Setup VK_EXT_debug_utils function pointers always (we use them for // debug labels and names). CreateDebugUtilsMessengerEXT = @@ -479,8 +796,34 @@ Error VulkanContext::_create_physical_device() { ERR_FAIL_V(ERR_CANT_CREATE); break; } + } else if (enabled_debug_report) { + CreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(inst, "vkCreateDebugReportCallbackEXT"); + DebugReportMessageEXT = (PFN_vkDebugReportMessageEXT)vkGetInstanceProcAddr(inst, "vkDebugReportMessageEXT"); + DestroyDebugReportCallbackEXT = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(inst, "vkDestroyDebugReportCallbackEXT"); + + if (nullptr == CreateDebugReportCallbackEXT || nullptr == DebugReportMessageEXT || nullptr == DestroyDebugReportCallbackEXT) { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, + "GetProcAddr: Failed to init VK_EXT_debug_report\n" + "GetProcAddr: Failure"); + } + + err = CreateDebugReportCallbackEXT(inst, &dbg_report_callback_create_info, nullptr, &dbg_debug_report); + switch (err) { + case VK_SUCCESS: + break; + case VK_ERROR_OUT_OF_HOST_MEMORY: + ERR_FAIL_V_MSG(ERR_CANT_CREATE, + "CreateDebugReportCallbackEXT: out of host memory\n" + "CreateDebugReportCallbackEXT Failure"); + break; + default: + ERR_FAIL_V_MSG(ERR_CANT_CREATE, + "CreateDebugReportCallbackEXT: unknown failure\n" + "CreateDebugReportCallbackEXT Failure"); + ERR_FAIL_V(ERR_CANT_CREATE); + break; + } } - vkGetPhysicalDeviceProperties(gpu, &gpu_props); /* Call with NULL data to get count */ vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, nullptr); @@ -494,6 +837,8 @@ Error VulkanContext::_create_physical_device() { // features based on this query vkGetPhysicalDeviceFeatures(gpu, &physical_device_features); + physical_device_features.robustBufferAccess = false; //turn off robust buffer access, which can hamper performance on some hardware + #define GET_INSTANCE_PROC_ADDR(inst, entrypoint) \ { \ fp##entrypoint = (PFN_vk##entrypoint)vkGetInstanceProcAddr(inst, "vk" #entrypoint); \ @@ -507,6 +852,14 @@ Error VulkanContext::_create_physical_device() { GET_INSTANCE_PROC_ADDR(inst, GetPhysicalDeviceSurfacePresentModesKHR); GET_INSTANCE_PROC_ADDR(inst, GetSwapchainImagesKHR); + // get info about what our vulkan driver is capable off + { + Error res = _check_capabilities(); + if (res != OK) { + return res; + } + } + return OK; } @@ -545,6 +898,7 @@ Error VulkanContext::_create_device() { } err = vkCreateDevice(gpu, &sdevice, nullptr, &device); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + return OK; } @@ -707,7 +1061,8 @@ Error VulkanContext::_window_create(DisplayServer::WindowID p_window_id, VkSurfa // We use a single GPU, but we need a surface to initialize the // queues, so this process must be deferred until a surface // is created. - _initialize_queues(p_surface); + Error err = _initialize_queues(p_surface); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); } Window window; @@ -1009,7 +1364,6 @@ Error VulkanContext::_update_swap_chain(Window *window) { { const VkAttachmentDescription attachment = { - /*flags*/ 0, /*format*/ format, /*samples*/ VK_SAMPLE_COUNT_1_BIT, @@ -1258,13 +1612,13 @@ Error VulkanContext::swap_buffers() { DemoUpdateTargetIPD(demo); // Note: a real application would position its geometry to that it's in - // the correct locatoin for when the next image is presented. It might + // the correct location for when the next image is presented. It might // also wait, so that there's less latency between any input and when // the next image is rendered/presented. This demo program is so // simple that it doesn't do either of those. } #endif - // Wait for the image acquired semaphore to be signaled to ensure + // Wait for the image acquired semaphore to be signalled to ensure // that the image won't be rendered to until the presentation // engine has fully released ownership to the application, and it is // okay to render to the image. @@ -1330,7 +1684,7 @@ Error VulkanContext::swap_buffers() { ERR_FAIL_COND_V(err, ERR_CANT_CREATE); } - // If we are using separate queues we have to wait for image ownership, + // If we are using separate queues, we have to wait for image ownership, // otherwise wait for draw complete VkPresentInfoKHR present = { /*sType*/ VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, @@ -1479,23 +1833,6 @@ VkPhysicalDeviceLimits VulkanContext::get_device_limits() const { return gpu_props.limits; } -VulkanContext::VulkanContext() { - queue_props = nullptr; - command_buffer_count = 0; - instance_validation_layers = nullptr; - use_validation_layers = true; - VK_KHR_incremental_present_enabled = true; - VK_GOOGLE_display_timing_enabled = true; - - command_buffer_queue.resize(1); //first one is the setup command always - command_buffer_queue.write[0] = nullptr; - command_buffer_count = 1; - queues_initialized = false; - - buffers_prepared = false; - swapchainImageCount = 0; -} - RID VulkanContext::local_device_create() { LocalDevice ld; @@ -1583,6 +1920,77 @@ void VulkanContext::local_device_free(RID p_local_device) { local_device_owner.free(p_local_device); } +void VulkanContext::command_begin_label(VkCommandBuffer p_command_buffer, String p_label_name, const Color p_color) { + if (!enabled_debug_utils) { + return; + } + + CharString cs = p_label_name.utf8().get_data(); + VkDebugUtilsLabelEXT label; + label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + label.pNext = nullptr; + label.pLabelName = cs.get_data(); + label.color[0] = p_color[0]; + label.color[1] = p_color[1]; + label.color[2] = p_color[2]; + label.color[3] = p_color[3]; + CmdBeginDebugUtilsLabelEXT(p_command_buffer, &label); +} + +void VulkanContext::command_insert_label(VkCommandBuffer p_command_buffer, String p_label_name, const Color p_color) { + if (!enabled_debug_utils) { + return; + } + CharString cs = p_label_name.utf8().get_data(); + VkDebugUtilsLabelEXT label; + label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + label.pNext = nullptr; + label.pLabelName = cs.get_data(); + label.color[0] = p_color[0]; + label.color[1] = p_color[1]; + label.color[2] = p_color[2]; + label.color[3] = p_color[3]; + CmdInsertDebugUtilsLabelEXT(p_command_buffer, &label); +} + +void VulkanContext::command_end_label(VkCommandBuffer p_command_buffer) { + if (!enabled_debug_utils) { + return; + } + CmdEndDebugUtilsLabelEXT(p_command_buffer); +} + +void VulkanContext::set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name) { + if (!enabled_debug_utils) { + return; + } + CharString obj_data = p_object_name.utf8(); + VkDebugUtilsObjectNameInfoEXT name_info; + name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; + name_info.pNext = nullptr; + name_info.objectType = p_object_type; + name_info.objectHandle = p_object_handle; + name_info.pObjectName = obj_data.get_data(); + SetDebugUtilsObjectNameEXT(device, &name_info); +} + +String VulkanContext::get_device_vendor_name() const { + return device_vendor; +} +String VulkanContext::get_device_name() const { + return device_name; +} +String VulkanContext::get_device_pipeline_cache_uuid() const { + return pipeline_cache_id; +} + +VulkanContext::VulkanContext() { + use_validation_layers = Engine::get_singleton()->is_validation_layers_enabled(); + + command_buffer_queue.resize(1); // First one is always the setup command. + command_buffer_queue.write[0] = nullptr; +} + VulkanContext::~VulkanContext() { if (queue_props) { free(queue_props); @@ -1596,9 +2004,12 @@ VulkanContext::~VulkanContext() { vkDestroySemaphore(device, image_ownership_semaphores[i], nullptr); } } - if (inst_initialized) { + if (inst_initialized && enabled_debug_utils) { DestroyDebugUtilsMessengerEXT(inst, dbg_messenger, nullptr); } + if (inst_initialized && dbg_debug_report != VK_NULL_HANDLE) { + DestroyDebugReportCallbackEXT(inst, dbg_debug_report, nullptr); + } vkDestroyDevice(device, nullptr); } if (inst_initialized) { diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h index 9ebea42ecb..b788181ab9 100644 --- a/drivers/vulkan/vulkan_context.h +++ b/drivers/vulkan/vulkan_context.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -31,44 +31,71 @@ #ifndef VULKAN_CONTEXT_H #define VULKAN_CONTEXT_H -#include "core/error_list.h" -#include "core/map.h" +#include "core/error/error_list.h" #include "core/os/mutex.h" -#include "core/rid_owner.h" -#include "core/ustring.h" +#include "core/string/ustring.h" +#include "core/templates/map.h" +#include "core/templates/rid_owner.h" #include "servers/display_server.h" + #include <vulkan/vulkan.h> class VulkanContext { +public: + struct SubgroupCapabilities { + uint32_t size; + VkShaderStageFlags supportedStages; + VkSubgroupFeatureFlags supportedOperations; + VkBool32 quadOperationsInAllStages; + + uint32_t supported_stages_flags_rd() const; + String supported_stages_desc() const; + uint32_t supported_operations_flags_rd() const; + String supported_operations_desc() const; + }; + +private: enum { MAX_EXTENSIONS = 128, MAX_LAYERS = 64, FRAME_LAG = 2 }; - VkInstance inst; - VkSurfaceKHR surface; - VkPhysicalDevice gpu; + VkInstance inst = VK_NULL_HANDLE; + VkSurfaceKHR surface = VK_NULL_HANDLE; + VkPhysicalDevice gpu = VK_NULL_HANDLE; VkPhysicalDeviceProperties gpu_props; - uint32_t queue_family_count; - VkQueueFamilyProperties *queue_props; - VkDevice device; + uint32_t queue_family_count = 0; + VkQueueFamilyProperties *queue_props = nullptr; + VkDevice device = VK_NULL_HANDLE; bool device_initialized = false; bool inst_initialized = false; - //present - bool queues_initialized; - uint32_t graphics_queue_family_index; - uint32_t present_queue_family_index; - bool separate_present_queue; - VkQueue graphics_queue; - VkQueue present_queue; + // Vulkan 1.0 doesn't return version info so we assume this by default until we know otherwise + uint32_t vulkan_major = 1; + uint32_t vulkan_minor = 0; + SubgroupCapabilities subgroup_capabilities; + + String device_vendor; + String device_name; + String pipeline_cache_id; + uint32_t device_api_version = 0; + + bool buffers_prepared = false; + + // Present queue. + bool queues_initialized = false; + uint32_t graphics_queue_family_index = 0; + uint32_t present_queue_family_index = 0; + bool separate_present_queue = false; + VkQueue graphics_queue = VK_NULL_HANDLE; + VkQueue present_queue = VK_NULL_HANDLE; VkColorSpaceKHR color_space; VkFormat format; VkSemaphore image_acquired_semaphores[FRAME_LAG]; VkSemaphore draw_complete_semaphores[FRAME_LAG]; VkSemaphore image_ownership_semaphores[FRAME_LAG]; - int frame_index; + int frame_index = 0; VkFence fences[FRAME_LAG]; VkPhysicalDeviceMemoryProperties memory_properties; VkPhysicalDeviceFeatures physical_device_features; @@ -78,7 +105,6 @@ class VulkanContext { VkCommandBuffer graphics_to_present_cmd; VkImageView view; VkFramebuffer framebuffer; - } SwapchainImageResources; struct Window { @@ -89,32 +115,43 @@ class VulkanContext { uint32_t current_buffer = 0; int width = 0; int height = 0; - VkCommandPool present_cmd_pool; //for separate present queue + VkCommandPool present_cmd_pool = VK_NULL_HANDLE; // For separate present queue. VkRenderPass render_pass = VK_NULL_HANDLE; }; struct LocalDevice { bool waiting = false; - VkDevice device; - VkQueue queue; + VkDevice device = VK_NULL_HANDLE; + VkQueue queue = VK_NULL_HANDLE; }; RID_Owner<LocalDevice, true> local_device_owner; Map<DisplayServer::WindowID, Window> windows; - uint32_t swapchainImageCount; + uint32_t swapchainImageCount = 0; - //commands + // Commands. - bool prepared; + bool prepared = false; - //extensions - bool VK_KHR_incremental_present_enabled; - bool VK_GOOGLE_display_timing_enabled; - const char **instance_validation_layers; - uint32_t enabled_extension_count; - uint32_t enabled_layer_count; + Vector<VkCommandBuffer> command_buffer_queue; + int command_buffer_count = 1; + + // Extensions. + + bool VK_KHR_incremental_present_enabled = true; + bool VK_GOOGLE_display_timing_enabled = true; + uint32_t enabled_extension_count = 0; const char *extension_names[MAX_EXTENSIONS]; + bool enabled_debug_utils = false; + + /** + * True if VK_EXT_debug_report extension is used. VK_EXT_debug_report is deprecated but it is + * still used if VK_EXT_debug_utils is not available. + */ + bool enabled_debug_report = false; + + uint32_t enabled_layer_count = 0; const char *enabled_layers[MAX_LAYERS]; PFN_vkCreateDebugUtilsMessengerEXT CreateDebugUtilsMessengerEXT; @@ -124,6 +161,9 @@ class VulkanContext { PFN_vkCmdEndDebugUtilsLabelEXT CmdEndDebugUtilsLabelEXT; PFN_vkCmdInsertDebugUtilsLabelEXT CmdInsertDebugUtilsLabelEXT; PFN_vkSetDebugUtilsObjectNameEXT SetDebugUtilsObjectNameEXT; + PFN_vkCreateDebugReportCallbackEXT CreateDebugReportCallbackEXT; + PFN_vkDebugReportMessageEXT DebugReportMessageEXT; + PFN_vkDestroyDebugReportCallbackEXT DestroyDebugReportCallbackEXT; PFN_vkGetPhysicalDeviceSurfaceSupportKHR fpGetPhysicalDeviceSurfaceSupportKHR; PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR fpGetPhysicalDeviceSurfaceCapabilitiesKHR; PFN_vkGetPhysicalDeviceSurfaceFormatsKHR fpGetPhysicalDeviceSurfaceFormatsKHR; @@ -136,17 +176,31 @@ class VulkanContext { PFN_vkGetRefreshCycleDurationGOOGLE fpGetRefreshCycleDurationGOOGLE; PFN_vkGetPastPresentationTimingGOOGLE fpGetPastPresentationTimingGOOGLE; - VkDebugUtilsMessengerEXT dbg_messenger; + VkDebugUtilsMessengerEXT dbg_messenger = VK_NULL_HANDLE; + VkDebugReportCallbackEXT dbg_debug_report = VK_NULL_HANDLE; + Error _obtain_vulkan_version(); Error _create_validation_layers(); Error _initialize_extensions(); + Error _check_capabilities(); VkBool32 _check_layers(uint32_t check_count, const char **check_names, uint32_t layer_count, VkLayerProperties *layers); - static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback( + VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData); + static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_report_callback( + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, + size_t location, + int32_t messageCode, + const char *pLayerPrefix, + const char *pMessage, + void *pUserData); + Error _create_physical_device(); Error _initialize_queues(VkSurfaceKHR surface); @@ -160,12 +214,11 @@ class VulkanContext { Error _create_swap_chain(); Error _create_semaphores(); - Vector<VkCommandBuffer> command_buffer_queue; - int command_buffer_count; - protected: virtual const char *_get_platform_surface_extension() const = 0; - // virtual VkResult _create_surface(VkSurfaceKHR *surface, VkInstance p_instance) = 0; + + // Enabled via command line argument. + bool use_validation_layers = false; virtual Error _window_create(DisplayServer::WindowID p_window_id, VkSurfaceKHR p_surface, int p_width, int p_height); @@ -173,11 +226,11 @@ protected: return inst; } - bool buffers_prepared; - - bool use_validation_layers; - public: + uint32_t get_vulkan_major() const { return vulkan_major; }; + uint32_t get_vulkan_minor() const { return vulkan_minor; }; + SubgroupCapabilities get_subgroup_capabilities() const { return subgroup_capabilities; }; + VkDevice get_device(); VkPhysicalDevice get_physical_device(); int get_swapchain_image_count() const; @@ -207,6 +260,15 @@ public: Error swap_buffers(); Error initialize(); + void command_begin_label(VkCommandBuffer p_command_buffer, String p_label_name, const Color p_color); + void command_insert_label(VkCommandBuffer p_command_buffer, String p_label_name, const Color p_color); + void command_end_label(VkCommandBuffer p_command_buffer); + void set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name); + + String get_device_vendor_name() const; + String get_device_name() const; + String get_device_pipeline_cache_uuid() const; + VulkanContext(); virtual ~VulkanContext(); }; |