diff options
Diffstat (limited to 'drivers/vulkan/rendering_device_vulkan.cpp')
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.cpp | 1096 |
1 files changed, 715 insertions, 381 deletions
diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 23e9227a39..0c59310f8d 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -30,17 +30,68 @@ #include "rendering_device_vulkan.h" -#include "core/hashfuncs.h" +#include "core/config/project_settings.h" #include "core/os/file_access.h" #include "core/os/os.h" -#include "core/project_settings.h" +#include "core/templates/hashfuncs.h" #include "drivers/vulkan/vulkan_context.h" + #include "thirdparty/spirv-reflect/spirv_reflect.h" //#define FORCE_FULL_BARRIER -void RenderingDeviceVulkan::_add_dependency(RID p_id, RID p_depends_on) { +// Get the Vulkan object information and possible stage access types (bitwise OR'd with incoming values) +RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &stage_mask, VkAccessFlags &access_mask) { + Buffer *buffer = nullptr; + if (vertex_buffer_owner.owns(p_buffer)) { + stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + buffer = vertex_buffer_owner.getornull(p_buffer); + } else if (index_buffer_owner.owns(p_buffer)) { + stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + access_mask |= VK_ACCESS_INDEX_READ_BIT; + buffer = index_buffer_owner.getornull(p_buffer); + } else if (uniform_buffer_owner.owns(p_buffer)) { + stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask |= VK_ACCESS_UNIFORM_READ_BIT; + buffer = uniform_buffer_owner.getornull(p_buffer); + } else if (texture_buffer_owner.owns(p_buffer)) { + stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask |= VK_ACCESS_SHADER_READ_BIT; + buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; + } else if (storage_buffer_owner.owns(p_buffer)) { + stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + buffer = storage_buffer_owner.getornull(p_buffer); + } + return buffer; +} + +static void update_external_dependency_for_store(VkSubpassDependency &dependency, bool is_sampled, bool is_storage, bool is_depth) { + // Transitioning from write to read, protect the shaders that may use this next + // Allow for copies/image layout transitions + dependency.dstStageMask |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dependency.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT; + + if (is_sampled) { + dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT; + } else if (is_storage) { + dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } else { + dependency.dstStageMask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + if (is_depth) { + // Depth resources have addtional stages that may be interested in them + dependency.dstStageMask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dependency.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } +} +void RenderingDeviceVulkan::_add_dependency(RID p_id, RID p_depends_on) { if (!dependency_map.has(p_depends_on)) { dependency_map[p_depends_on] = Set<RID>(); } @@ -55,23 +106,20 @@ void RenderingDeviceVulkan::_add_dependency(RID p_id, RID p_depends_on) { } void RenderingDeviceVulkan::_free_dependencies(RID p_id) { - //direct dependencies must be freed Map<RID, Set<RID>>::Element *E = dependency_map.find(p_id); if (E) { - while (E->get().size()) { free(E->get().front()->get()); } dependency_map.erase(E); } - //reverse depenencies must be unreferenced + //reverse dependencies must be unreferenced E = reverse_dependency_map.find(p_id); if (E) { - for (Set<RID>::Element *F = E->get().front(); F; F = F->next()) { Map<RID, Set<RID>>::Element *G = dependency_map.find(F->get()); ERR_CONTINUE(!G); @@ -566,81 +614,98 @@ int RenderingDeviceVulkan::get_format_vertex_size(DataFormat p_format) { case DATA_FORMAT_B8G8R8A8_UNORM: case DATA_FORMAT_B8G8R8A8_SNORM: case DATA_FORMAT_B8G8R8A8_UINT: - case DATA_FORMAT_B8G8R8A8_SINT: return 4; + case DATA_FORMAT_B8G8R8A8_SINT: + case DATA_FORMAT_A2B10G10R10_UNORM_PACK32: + return 4; case DATA_FORMAT_R16_UNORM: case DATA_FORMAT_R16_SNORM: case DATA_FORMAT_R16_UINT: case DATA_FORMAT_R16_SINT: - case DATA_FORMAT_R16_SFLOAT: return 4; + case DATA_FORMAT_R16_SFLOAT: + return 4; case DATA_FORMAT_R16G16_UNORM: case DATA_FORMAT_R16G16_SNORM: case DATA_FORMAT_R16G16_UINT: case DATA_FORMAT_R16G16_SINT: - case DATA_FORMAT_R16G16_SFLOAT: return 4; + case DATA_FORMAT_R16G16_SFLOAT: + return 4; case DATA_FORMAT_R16G16B16_UNORM: case DATA_FORMAT_R16G16B16_SNORM: case DATA_FORMAT_R16G16B16_UINT: case DATA_FORMAT_R16G16B16_SINT: - case DATA_FORMAT_R16G16B16_SFLOAT: return 8; + case DATA_FORMAT_R16G16B16_SFLOAT: + return 8; case DATA_FORMAT_R16G16B16A16_UNORM: case DATA_FORMAT_R16G16B16A16_SNORM: case DATA_FORMAT_R16G16B16A16_UINT: case DATA_FORMAT_R16G16B16A16_SINT: - case DATA_FORMAT_R16G16B16A16_SFLOAT: return 8; + case DATA_FORMAT_R16G16B16A16_SFLOAT: + return 8; case DATA_FORMAT_R32_UINT: case DATA_FORMAT_R32_SINT: - case DATA_FORMAT_R32_SFLOAT: return 4; + case DATA_FORMAT_R32_SFLOAT: + return 4; case DATA_FORMAT_R32G32_UINT: case DATA_FORMAT_R32G32_SINT: - case DATA_FORMAT_R32G32_SFLOAT: return 8; + case DATA_FORMAT_R32G32_SFLOAT: + return 8; case DATA_FORMAT_R32G32B32_UINT: case DATA_FORMAT_R32G32B32_SINT: - case DATA_FORMAT_R32G32B32_SFLOAT: return 12; + case DATA_FORMAT_R32G32B32_SFLOAT: + return 12; case DATA_FORMAT_R32G32B32A32_UINT: case DATA_FORMAT_R32G32B32A32_SINT: - case DATA_FORMAT_R32G32B32A32_SFLOAT: return 16; + case DATA_FORMAT_R32G32B32A32_SFLOAT: + return 16; case DATA_FORMAT_R64_UINT: case DATA_FORMAT_R64_SINT: - case DATA_FORMAT_R64_SFLOAT: return 8; + case DATA_FORMAT_R64_SFLOAT: + return 8; case DATA_FORMAT_R64G64_UINT: case DATA_FORMAT_R64G64_SINT: - case DATA_FORMAT_R64G64_SFLOAT: return 16; + case DATA_FORMAT_R64G64_SFLOAT: + return 16; case DATA_FORMAT_R64G64B64_UINT: case DATA_FORMAT_R64G64B64_SINT: - case DATA_FORMAT_R64G64B64_SFLOAT: return 24; + case DATA_FORMAT_R64G64B64_SFLOAT: + return 24; case DATA_FORMAT_R64G64B64A64_UINT: case DATA_FORMAT_R64G64B64A64_SINT: - case DATA_FORMAT_R64G64B64A64_SFLOAT: return 32; - default: return 0; + case DATA_FORMAT_R64G64B64A64_SFLOAT: + return 32; + default: + return 0; } } uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) { - switch (p_format) { - - case DATA_FORMAT_R4G4_UNORM_PACK8: return 1; + case DATA_FORMAT_R4G4_UNORM_PACK8: + return 1; case DATA_FORMAT_R4G4B4A4_UNORM_PACK16: case DATA_FORMAT_B4G4R4A4_UNORM_PACK16: case DATA_FORMAT_R5G6B5_UNORM_PACK16: case DATA_FORMAT_B5G6R5_UNORM_PACK16: case DATA_FORMAT_R5G5B5A1_UNORM_PACK16: case DATA_FORMAT_B5G5R5A1_UNORM_PACK16: - case DATA_FORMAT_A1R5G5B5_UNORM_PACK16: return 2; + case DATA_FORMAT_A1R5G5B5_UNORM_PACK16: + return 2; case DATA_FORMAT_R8_UNORM: case DATA_FORMAT_R8_SNORM: case DATA_FORMAT_R8_USCALED: case DATA_FORMAT_R8_SSCALED: case DATA_FORMAT_R8_UINT: case DATA_FORMAT_R8_SINT: - case DATA_FORMAT_R8_SRGB: return 1; + case DATA_FORMAT_R8_SRGB: + return 1; case DATA_FORMAT_R8G8_UNORM: case DATA_FORMAT_R8G8_SNORM: case DATA_FORMAT_R8G8_USCALED: case DATA_FORMAT_R8G8_SSCALED: case DATA_FORMAT_R8G8_UINT: case DATA_FORMAT_R8G8_SINT: - case DATA_FORMAT_R8G8_SRGB: return 2; + case DATA_FORMAT_R8G8_SRGB: + return 2; case DATA_FORMAT_R8G8B8_UNORM: case DATA_FORMAT_R8G8B8_SNORM: case DATA_FORMAT_R8G8B8_USCALED: @@ -654,7 +719,8 @@ uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) case DATA_FORMAT_B8G8R8_SSCALED: case DATA_FORMAT_B8G8R8_UINT: case DATA_FORMAT_B8G8R8_SINT: - case DATA_FORMAT_B8G8R8_SRGB: return 3; + case DATA_FORMAT_B8G8R8_SRGB: + return 3; case DATA_FORMAT_R8G8B8A8_UNORM: case DATA_FORMAT_R8G8B8A8_SNORM: case DATA_FORMAT_R8G8B8A8_USCALED: @@ -668,7 +734,8 @@ uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) case DATA_FORMAT_B8G8R8A8_SSCALED: case DATA_FORMAT_B8G8R8A8_UINT: case DATA_FORMAT_B8G8R8A8_SINT: - case DATA_FORMAT_B8G8R8A8_SRGB: return 4; + case DATA_FORMAT_B8G8R8A8_SRGB: + return 4; case DATA_FORMAT_A8B8G8R8_UNORM_PACK32: case DATA_FORMAT_A8B8G8R8_SNORM_PACK32: case DATA_FORMAT_A8B8G8R8_USCALED_PACK32: @@ -687,67 +754,87 @@ uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) case DATA_FORMAT_A2B10G10R10_USCALED_PACK32: case DATA_FORMAT_A2B10G10R10_SSCALED_PACK32: case DATA_FORMAT_A2B10G10R10_UINT_PACK32: - case DATA_FORMAT_A2B10G10R10_SINT_PACK32: return 4; + case DATA_FORMAT_A2B10G10R10_SINT_PACK32: + return 4; case DATA_FORMAT_R16_UNORM: case DATA_FORMAT_R16_SNORM: case DATA_FORMAT_R16_USCALED: case DATA_FORMAT_R16_SSCALED: case DATA_FORMAT_R16_UINT: case DATA_FORMAT_R16_SINT: - case DATA_FORMAT_R16_SFLOAT: return 2; + case DATA_FORMAT_R16_SFLOAT: + return 2; case DATA_FORMAT_R16G16_UNORM: case DATA_FORMAT_R16G16_SNORM: case DATA_FORMAT_R16G16_USCALED: case DATA_FORMAT_R16G16_SSCALED: case DATA_FORMAT_R16G16_UINT: case DATA_FORMAT_R16G16_SINT: - case DATA_FORMAT_R16G16_SFLOAT: return 4; + case DATA_FORMAT_R16G16_SFLOAT: + return 4; case DATA_FORMAT_R16G16B16_UNORM: case DATA_FORMAT_R16G16B16_SNORM: case DATA_FORMAT_R16G16B16_USCALED: case DATA_FORMAT_R16G16B16_SSCALED: case DATA_FORMAT_R16G16B16_UINT: case DATA_FORMAT_R16G16B16_SINT: - case DATA_FORMAT_R16G16B16_SFLOAT: return 6; + case DATA_FORMAT_R16G16B16_SFLOAT: + return 6; case DATA_FORMAT_R16G16B16A16_UNORM: case DATA_FORMAT_R16G16B16A16_SNORM: case DATA_FORMAT_R16G16B16A16_USCALED: case DATA_FORMAT_R16G16B16A16_SSCALED: case DATA_FORMAT_R16G16B16A16_UINT: case DATA_FORMAT_R16G16B16A16_SINT: - case DATA_FORMAT_R16G16B16A16_SFLOAT: return 8; + case DATA_FORMAT_R16G16B16A16_SFLOAT: + return 8; case DATA_FORMAT_R32_UINT: case DATA_FORMAT_R32_SINT: - case DATA_FORMAT_R32_SFLOAT: return 4; + case DATA_FORMAT_R32_SFLOAT: + return 4; case DATA_FORMAT_R32G32_UINT: case DATA_FORMAT_R32G32_SINT: - case DATA_FORMAT_R32G32_SFLOAT: return 8; + case DATA_FORMAT_R32G32_SFLOAT: + return 8; case DATA_FORMAT_R32G32B32_UINT: case DATA_FORMAT_R32G32B32_SINT: - case DATA_FORMAT_R32G32B32_SFLOAT: return 12; + case DATA_FORMAT_R32G32B32_SFLOAT: + return 12; case DATA_FORMAT_R32G32B32A32_UINT: case DATA_FORMAT_R32G32B32A32_SINT: - case DATA_FORMAT_R32G32B32A32_SFLOAT: return 16; + case DATA_FORMAT_R32G32B32A32_SFLOAT: + return 16; case DATA_FORMAT_R64_UINT: case DATA_FORMAT_R64_SINT: - case DATA_FORMAT_R64_SFLOAT: return 8; + case DATA_FORMAT_R64_SFLOAT: + return 8; case DATA_FORMAT_R64G64_UINT: case DATA_FORMAT_R64G64_SINT: - case DATA_FORMAT_R64G64_SFLOAT: return 16; + case DATA_FORMAT_R64G64_SFLOAT: + return 16; case DATA_FORMAT_R64G64B64_UINT: case DATA_FORMAT_R64G64B64_SINT: - case DATA_FORMAT_R64G64B64_SFLOAT: return 24; + case DATA_FORMAT_R64G64B64_SFLOAT: + return 24; case DATA_FORMAT_R64G64B64A64_UINT: case DATA_FORMAT_R64G64B64A64_SINT: - case DATA_FORMAT_R64G64B64A64_SFLOAT: return 32; + case DATA_FORMAT_R64G64B64A64_SFLOAT: + return 32; case DATA_FORMAT_B10G11R11_UFLOAT_PACK32: - case DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32: return 4; - case DATA_FORMAT_D16_UNORM: return 2; - case DATA_FORMAT_X8_D24_UNORM_PACK32: return 4; - case DATA_FORMAT_D32_SFLOAT: return 4; - case DATA_FORMAT_S8_UINT: return 1; - case DATA_FORMAT_D16_UNORM_S8_UINT: return 4; - case DATA_FORMAT_D24_UNORM_S8_UINT: return 4; + case DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32: + return 4; + case DATA_FORMAT_D16_UNORM: + return 2; + case DATA_FORMAT_X8_D24_UNORM_PACK32: + return 4; + case DATA_FORMAT_D32_SFLOAT: + return 4; + case DATA_FORMAT_S8_UINT: + return 1; + case DATA_FORMAT_D16_UNORM_S8_UINT: + return 4; + case DATA_FORMAT_D24_UNORM_S8_UINT: + return 4; case DATA_FORMAT_D32_SFLOAT_S8_UINT: return 5; //? case DATA_FORMAT_BC1_RGB_UNORM_BLOCK: @@ -765,17 +852,20 @@ uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) case DATA_FORMAT_BC6H_UFLOAT_BLOCK: case DATA_FORMAT_BC6H_SFLOAT_BLOCK: case DATA_FORMAT_BC7_UNORM_BLOCK: - case DATA_FORMAT_BC7_SRGB_BLOCK: return 1; + case DATA_FORMAT_BC7_SRGB_BLOCK: + return 1; case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: case DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: - case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: return 1; + case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + return 1; case DATA_FORMAT_EAC_R11_UNORM_BLOCK: case DATA_FORMAT_EAC_R11_SNORM_BLOCK: case DATA_FORMAT_EAC_R11G11_UNORM_BLOCK: - case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK: return 1; + case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK: + return 1; case DATA_FORMAT_ASTC_4x4_UNORM_BLOCK: case DATA_FORMAT_ASTC_4x4_SRGB_BLOCK: case DATA_FORMAT_ASTC_5x4_UNORM_BLOCK: @@ -803,14 +893,17 @@ uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) case DATA_FORMAT_ASTC_12x10_UNORM_BLOCK: case DATA_FORMAT_ASTC_12x10_SRGB_BLOCK: case DATA_FORMAT_ASTC_12x12_UNORM_BLOCK: - case DATA_FORMAT_ASTC_12x12_SRGB_BLOCK: return 1; + case DATA_FORMAT_ASTC_12x12_SRGB_BLOCK: + return 1; case DATA_FORMAT_G8B8G8R8_422_UNORM: - case DATA_FORMAT_B8G8R8G8_422_UNORM: return 4; + case DATA_FORMAT_B8G8R8G8_422_UNORM: + return 4; case DATA_FORMAT_G8_B8_R8_3PLANE_420_UNORM: case DATA_FORMAT_G8_B8R8_2PLANE_420_UNORM: case DATA_FORMAT_G8_B8_R8_3PLANE_422_UNORM: case DATA_FORMAT_G8_B8R8_2PLANE_422_UNORM: - case DATA_FORMAT_G8_B8_R8_3PLANE_444_UNORM: return 4; + case DATA_FORMAT_G8_B8_R8_3PLANE_444_UNORM: + return 4; case DATA_FORMAT_R10X6_UNORM_PACK16: case DATA_FORMAT_R10X6G10X6_UNORM_2PACK16: case DATA_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16: @@ -830,14 +923,16 @@ uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16: case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16: case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16: - case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16: return 2; + case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16: + return 2; case DATA_FORMAT_G16B16G16R16_422_UNORM: case DATA_FORMAT_B16G16R16G16_422_UNORM: case DATA_FORMAT_G16_B16_R16_3PLANE_420_UNORM: case DATA_FORMAT_G16_B16R16_2PLANE_420_UNORM: case DATA_FORMAT_G16_B16_R16_3PLANE_422_UNORM: case DATA_FORMAT_G16_B16R16_2PLANE_422_UNORM: - case DATA_FORMAT_G16_B16_R16_3PLANE_444_UNORM: return 8; + case DATA_FORMAT_G16_B16_R16_3PLANE_444_UNORM: + return 8; case DATA_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG: case DATA_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG: case DATA_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG: @@ -845,7 +940,8 @@ uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) case DATA_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG: case DATA_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG: case DATA_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG: - case DATA_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG: return 1; + case DATA_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG: + return 1; default: { ERR_PRINT("Format not handled, bug"); } @@ -857,7 +953,6 @@ uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) // https://www.khronos.org/registry/DataFormat/specs/1.1/dataformat.1.1.pdf void RenderingDeviceVulkan::get_compressed_image_format_block_dimensions(DataFormat p_format, uint32_t &r_w, uint32_t &r_h) { - switch (p_format) { case DATA_FORMAT_BC1_RGB_UNORM_BLOCK: case DATA_FORMAT_BC1_RGB_SRGB_BLOCK: @@ -938,34 +1033,45 @@ void RenderingDeviceVulkan::get_compressed_image_format_block_dimensions(DataFor } uint32_t RenderingDeviceVulkan::get_compressed_image_format_block_byte_size(DataFormat p_format) { - switch (p_format) { case DATA_FORMAT_BC1_RGB_UNORM_BLOCK: case DATA_FORMAT_BC1_RGB_SRGB_BLOCK: case DATA_FORMAT_BC1_RGBA_UNORM_BLOCK: - case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK: return 8; + case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK: + return 8; case DATA_FORMAT_BC2_UNORM_BLOCK: - case DATA_FORMAT_BC2_SRGB_BLOCK: return 16; + case DATA_FORMAT_BC2_SRGB_BLOCK: + return 16; case DATA_FORMAT_BC3_UNORM_BLOCK: - case DATA_FORMAT_BC3_SRGB_BLOCK: return 16; + case DATA_FORMAT_BC3_SRGB_BLOCK: + return 16; case DATA_FORMAT_BC4_UNORM_BLOCK: - case DATA_FORMAT_BC4_SNORM_BLOCK: return 8; + case DATA_FORMAT_BC4_SNORM_BLOCK: + return 8; case DATA_FORMAT_BC5_UNORM_BLOCK: - case DATA_FORMAT_BC5_SNORM_BLOCK: return 16; + case DATA_FORMAT_BC5_SNORM_BLOCK: + return 16; case DATA_FORMAT_BC6H_UFLOAT_BLOCK: - case DATA_FORMAT_BC6H_SFLOAT_BLOCK: return 16; + case DATA_FORMAT_BC6H_SFLOAT_BLOCK: + return 16; case DATA_FORMAT_BC7_UNORM_BLOCK: - case DATA_FORMAT_BC7_SRGB_BLOCK: return 16; + case DATA_FORMAT_BC7_SRGB_BLOCK: + return 16; case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: - case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: return 8; + case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: + return 8; case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: - case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: return 8; + case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: + return 8; case DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: - case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: return 16; + case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + return 16; case DATA_FORMAT_EAC_R11_UNORM_BLOCK: - case DATA_FORMAT_EAC_R11_SNORM_BLOCK: return 8; + case DATA_FORMAT_EAC_R11_SNORM_BLOCK: + return 8; case DATA_FORMAT_EAC_R11G11_UNORM_BLOCK: - case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK: return 16; + case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK: + return 16; case DATA_FORMAT_ASTC_4x4_UNORM_BLOCK: //again, not sure about astc case DATA_FORMAT_ASTC_4x4_SRGB_BLOCK: case DATA_FORMAT_ASTC_5x4_UNORM_BLOCK: @@ -1011,7 +1117,6 @@ uint32_t RenderingDeviceVulkan::get_compressed_image_format_block_byte_size(Data } uint32_t RenderingDeviceVulkan::get_compressed_image_format_pixel_rshift(DataFormat p_format) { - switch (p_format) { case DATA_FORMAT_BC1_RGB_UNORM_BLOCK: //these formats are half byte size, so rshift is 1 case DATA_FORMAT_BC1_RGB_SRGB_BLOCK: @@ -1028,11 +1133,13 @@ uint32_t RenderingDeviceVulkan::get_compressed_image_format_pixel_rshift(DataFor case DATA_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG: case DATA_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG: case DATA_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG: - case DATA_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG: return 1; + case DATA_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG: + return 1; case DATA_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG: //these formats are quarter byte size, so rshift is 1 case DATA_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG: case DATA_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG: - case DATA_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG: return 2; + case DATA_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG: + return 2; default: { } } @@ -1055,7 +1162,6 @@ bool RenderingDeviceVulkan::format_has_stencil(DataFormat p_format) { } uint32_t RenderingDeviceVulkan::get_image_format_required_size(DataFormat p_format, uint32_t p_width, uint32_t p_height, uint32_t p_depth, uint32_t p_mipmaps, uint32_t *r_blockw, uint32_t *r_blockh, uint32_t *r_depth) { - ERR_FAIL_COND_V(p_mipmaps == 0, 0); uint32_t w = p_width; uint32_t h = p_height; @@ -1095,7 +1201,6 @@ uint32_t RenderingDeviceVulkan::get_image_format_required_size(DataFormat p_form } uint32_t RenderingDeviceVulkan::get_image_required_mipmaps(uint32_t p_width, uint32_t p_height, uint32_t p_depth) { - //formats and block size don't really matter here since they can all go down to 1px (even if block is larger) int w = p_width; int h = p_height; @@ -1104,7 +1209,6 @@ uint32_t RenderingDeviceVulkan::get_image_required_mipmaps(uint32_t p_width, uin int mipmaps = 1; while (true) { - if (w == 1 && h == 1 && d == 1) { break; } @@ -1258,6 +1362,7 @@ Error RenderingDeviceVulkan::_buffer_allocate(Buffer *p_buffer, uint32_t p_size, p_buffer->buffer_info.buffer = p_buffer->buffer; p_buffer->buffer_info.offset = 0; p_buffer->buffer_info.range = p_size; + p_buffer->usage = p_usage; return OK; } @@ -1274,7 +1379,6 @@ Error RenderingDeviceVulkan::_buffer_free(Buffer *p_buffer) { } Error RenderingDeviceVulkan::_insert_staging_block() { - VkBufferCreateInfo bufferInfo; bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bufferInfo.pNext = nullptr; @@ -1312,7 +1416,6 @@ Error RenderingDeviceVulkan::_staging_buffer_allocate(uint32_t p_amount, uint32_ r_alloc_size = p_amount; while (true) { - r_alloc_offset = 0; //see if we can use current block @@ -1368,7 +1471,6 @@ Error RenderingDeviceVulkan::_staging_buffer_allocate(uint32_t p_amount, uint32_ //block_until_next_frame() continue; } else { - //flush EVERYTHING including setup commands. IF not immediate, also need to flush the draw commands _flush(true); @@ -1413,7 +1515,6 @@ Error RenderingDeviceVulkan::_staging_buffer_allocate(uint32_t p_amount, uint32_ //block_until_next_frame() continue; //and try again } else { - _flush(false); for (int i = 0; i < staging_buffer_blocks.size(); i++) { @@ -1443,13 +1544,11 @@ Error RenderingDeviceVulkan::_staging_buffer_allocate(uint32_t p_amount, uint32_ } Error RenderingDeviceVulkan::_buffer_update(Buffer *p_buffer, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_command_buffer, uint32_t p_required_align) { - //submitting may get chunked for various reasons, so convert this to a task size_t to_submit = p_data_size; size_t submit_from = 0; while (to_submit > 0) { - uint32_t block_write_offset; uint32_t block_write_amount; @@ -1490,7 +1589,6 @@ Error RenderingDeviceVulkan::_buffer_update(Buffer *p_buffer, size_t p_offset, c } void RenderingDeviceVulkan::_memory_barrier(VkPipelineStageFlags p_src_stage_mask, VkPipelineStageFlags p_dst_stage_mask, VkAccessFlags p_src_access, VkAccessFlags p_dst_sccess, bool p_sync_with_draw) { - VkMemoryBarrier mem_barrier; mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; mem_barrier.pNext = nullptr; @@ -1537,7 +1635,6 @@ void RenderingDeviceVulkan::_full_barrier(bool p_sync_with_draw) { } void RenderingDeviceVulkan::_buffer_memory_barrier(VkBuffer buffer, uint64_t p_from, uint64_t p_size, VkPipelineStageFlags p_src_stage_mask, VkPipelineStageFlags p_dst_stage_mask, VkAccessFlags p_src_access, VkAccessFlags p_dst_sccess, bool p_sync_with_draw) { - VkBufferMemoryBarrier buffer_mem_barrier; buffer_mem_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; buffer_mem_barrier.pNext = nullptr; @@ -1557,7 +1654,6 @@ void RenderingDeviceVulkan::_buffer_memory_barrier(VkBuffer buffer, uint64_t p_f /*****************/ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const TextureView &p_view, const Vector<Vector<uint8_t>> &p_data) { - _THREAD_SAFE_METHOD_ VkImageCreateInfo image_create_info; @@ -1597,16 +1693,16 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T #endif } - if (p_format.type == TEXTURE_TYPE_CUBE || p_format.type == TEXTURE_TYPE_CUBE_ARRAY) { + if (p_format.texture_type == TEXTURE_TYPE_CUBE || p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) { image_create_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; } /*if (p_format.type == TEXTURE_TYPE_2D || p_format.type == TEXTURE_TYPE_2D_ARRAY) { image_create_info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; }*/ - ERR_FAIL_INDEX_V(p_format.type, TEXTURE_TYPE_MAX, RID()); + ERR_FAIL_INDEX_V(p_format.texture_type, TEXTURE_TYPE_MAX, RID()); - image_create_info.imageType = vulkan_image_type[p_format.type]; + image_create_info.imageType = vulkan_image_type[p_format.texture_type]; ERR_FAIL_COND_V_MSG(p_format.width < 1, RID(), "Width must be equal or greater than 1 for all textures"); @@ -1631,10 +1727,10 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_create_info.mipLevels = p_format.mipmaps; - if (p_format.type == TEXTURE_TYPE_1D_ARRAY || p_format.type == TEXTURE_TYPE_2D_ARRAY || p_format.type == TEXTURE_TYPE_CUBE_ARRAY || p_format.type == TEXTURE_TYPE_CUBE) { + if (p_format.texture_type == TEXTURE_TYPE_1D_ARRAY || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE) { ERR_FAIL_COND_V_MSG(p_format.array_layers < 1, RID(), "Amount of layers must be equal or greater than 1 for arrays and cubemaps."); - ERR_FAIL_COND_V_MSG((p_format.type == TEXTURE_TYPE_CUBE_ARRAY || p_format.type == TEXTURE_TYPE_CUBE) && (p_format.array_layers % 6) != 0, RID(), + ERR_FAIL_COND_V_MSG((p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE) && (p_format.array_layers % 6) != 0, RID(), "Cubemap and cubemap array textures must provide a layer number that is multiple of 6"); image_create_info.arrayLayers = p_format.array_layers; } else { @@ -1687,7 +1783,6 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T "Too many mipmaps requested for texture format and dimensions (" + itos(image_create_info.mipLevels) + "), maximum allowed: (" + itos(required_mipmaps) + ")."); if (p_data.size()) { - ERR_FAIL_COND_V_MSG(!(p_format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT), RID(), "Texture needs the TEXTURE_USAGE_CAN_UPDATE_BIT usage flag in order to be updated at initialization or later"); @@ -1765,7 +1860,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T VkResult err = vmaCreateImage(allocator, &image_create_info, &allocInfo, &texture.image, &texture.allocation, &texture.allocation_info); ERR_FAIL_COND_V_MSG(err, RID(), "vmaCreateImage failed with error " + itos(err) + "."); - texture.type = p_format.type; + texture.type = p_format.texture_type; texture.format = p_format.format; texture.width = image_create_info.extent.width; texture.height = image_create_info.extent.height; @@ -1795,7 +1890,6 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T texture.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; } else if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - texture.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; } else { @@ -1803,7 +1897,6 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T } if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT; texture.barrier_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT; @@ -1835,7 +1928,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, }; - image_view_create_info.viewType = view_types[p_format.type]; + image_view_create_info.viewType = view_types[p_format.texture_type]; if (p_view.format_override == DATA_FORMAT_MAX) { image_view_create_info.format = image_create_info.format; } else { @@ -1892,13 +1985,12 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_memory_barrier.subresourceRange.baseArrayLayer = 0; image_memory_barrier.subresourceRange.layerCount = image_create_info.arrayLayers; - vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } RID id = texture_owner.make_rid(texture); if (p_data.size()) { - for (uint32_t i = 0; i < image_create_info.arrayLayers; i++) { texture_update(id, i, p_data[i]); } @@ -1907,7 +1999,6 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T } RID RenderingDeviceVulkan::texture_create_shared(const TextureView &p_view, RID p_with_texture) { - _THREAD_SAFE_METHOD_ Texture *src_texture = texture_owner.getornull(p_with_texture); @@ -1987,7 +2078,6 @@ RID RenderingDeviceVulkan::texture_create_shared(const TextureView &p_view, RID } RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, TextureSliceType p_slice_type) { - _THREAD_SAFE_METHOD_ Texture *src_texture = texture_owner.getornull(p_with_texture); @@ -2005,15 +2095,26 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_3D && src_texture->type != TEXTURE_TYPE_3D, RID(), "Can only create a 3D slice from a 3D texture"); + ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_2D_ARRAY && (src_texture->type != TEXTURE_TYPE_2D_ARRAY), RID(), + "Can only create an array slice from a 2D array mipmap"); + //create view ERR_FAIL_UNSIGNED_INDEX_V(p_mipmap, src_texture->mipmaps, RID()); ERR_FAIL_UNSIGNED_INDEX_V(p_layer, src_texture->layers, RID()); + int slice_layers = 1; + if (p_slice_type == TEXTURE_SLICE_2D_ARRAY) { + ERR_FAIL_COND_V_MSG(p_layer != 0, RID(), "layer must be 0 when obtaining a 2D array mipmap slice"); + slice_layers = src_texture->layers; + } else if (p_slice_type == TEXTURE_SLICE_CUBEMAP) { + slice_layers = 6; + } + Texture texture = *src_texture; get_image_format_required_size(texture.format, texture.width, texture.height, texture.depth, p_mipmap + 1, &texture.width, &texture.height); texture.mipmaps = 1; - texture.layers = p_slice_type == TEXTURE_SLICE_CUBEMAP ? 6 : 1; + texture.layers = slice_layers; texture.base_mipmap = p_mipmap; texture.base_layer = p_layer; @@ -2033,7 +2134,16 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p VK_IMAGE_VIEW_TYPE_2D, }; - image_view_create_info.viewType = p_slice_type == TEXTURE_SLICE_CUBEMAP ? VK_IMAGE_VIEW_TYPE_CUBE : (p_slice_type == TEXTURE_SLICE_3D ? VK_IMAGE_VIEW_TYPE_3D : view_types[texture.type]); + image_view_create_info.viewType = view_types[texture.type]; + + if (p_slice_type == TEXTURE_SLICE_CUBEMAP) { + image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; + } else if (p_slice_type == TEXTURE_SLICE_3D) { + image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + } else if (p_slice_type == TEXTURE_SLICE_2D_ARRAY) { + image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + } + if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) { image_view_create_info.format = vulkan_formats[texture.format]; } else { @@ -2067,7 +2177,7 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p } image_view_create_info.subresourceRange.baseMipLevel = p_mipmap; image_view_create_info.subresourceRange.levelCount = 1; - image_view_create_info.subresourceRange.layerCount = p_slice_type == TEXTURE_SLICE_CUBEMAP ? 6 : 1; + image_view_create_info.subresourceRange.layerCount = slice_layers; image_view_create_info.subresourceRange.baseArrayLayer = p_layer; if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { @@ -2087,7 +2197,6 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p } Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, bool p_sync_with_draw) { - _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, @@ -2153,12 +2262,11 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } uint32_t mipmap_offset = 0; for (uint32_t mm_i = 0; mm_i < texture->mipmaps; mm_i++) { - uint32_t depth; uint32_t image_total = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, mm_i + 1, &width, &height, &depth); @@ -2171,7 +2279,6 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con for (uint32_t x = 0; x < width; x += region_size) { for (uint32_t y = 0; y < height; y += region_size) { - uint32_t region_w = MIN(region_size, width - x); uint32_t region_h = MIN(region_size, height - y); @@ -2232,7 +2339,6 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con uint32_t dst_offset = (yr * region_w + xr) * pixel_size; //copy block for (uint32_t i = 0; i < pixel_size; i++) { - write_ptr[dst_offset + i] = read_ptr[src_offset + i]; } } @@ -2245,8 +2351,8 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con VkBufferImageCopy buffer_image_copy; buffer_image_copy.bufferOffset = alloc_offset; - buffer_image_copy.bufferRowLength = 0; //tigthly packed - buffer_image_copy.bufferImageHeight = 0; //tigthly packed + buffer_image_copy.bufferRowLength = 0; //tightly packed + buffer_image_copy.bufferImageHeight = 0; //tightly packed buffer_image_copy.imageSubresource.aspectMask = texture->read_aspect_mask; buffer_image_copy.imageSubresource.mipLevel = mm_i; @@ -2289,14 +2395,13 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } return OK; } Vector<uint8_t> RenderingDeviceVulkan::_texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer, bool p_2d) { - uint32_t width, height, depth; uint32_t image_size = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, tex->mipmaps, &width, &height, &depth); @@ -2316,7 +2421,6 @@ Vector<uint8_t> RenderingDeviceVulkan::_texture_get_data_from_image(Texture *tex uint32_t mipmap_offset = 0; for (uint32_t mm_i = 0; mm_i < tex->mipmaps; mm_i++) { - uint32_t image_total = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, mm_i + 1, &width, &height, &depth); uint8_t *write_ptr_mipmap = w + mipmap_offset; @@ -2363,7 +2467,6 @@ Vector<uint8_t> RenderingDeviceVulkan::_texture_get_data_from_image(Texture *tex } Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t p_layer) { - _THREAD_SAFE_METHOD_ Texture *tex = texture_owner.getornull(p_texture); @@ -2384,13 +2487,12 @@ Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t //does not need anything fancy, map and read. return _texture_get_data_from_image(tex, tex->image, tex->allocation, p_layer); } else { - //compute total image size uint32_t width, height, depth; uint32_t buffer_size = get_image_format_required_size(tex->format, tex->width, tex->height, tex->depth, tex->mipmaps, &width, &height, &depth); //allocate buffer - VkCommandBuffer command_buffer = frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; //makes more sense to retrieve Buffer tmp_buffer; _buffer_allocate(&tmp_buffer, buffer_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_CPU_ONLY); @@ -2422,7 +2524,6 @@ Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t uint32_t prev_size = 0; uint32_t offset = 0; for (uint32_t i = 0; i < tex->mipmaps; i++) { - VkBufferImageCopy buffer_image_copy; uint32_t image_size = get_image_format_required_size(tex->format, tex->width, tex->height, tex->depth, i + 1); @@ -2457,6 +2558,9 @@ Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + if (tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) { + image_memory_barrier.dstAccessMask |= VK_ACCESS_SHADER_WRITE_BIT; + } image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; image_memory_barrier.newLayout = tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2468,7 +2572,7 @@ Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t image_memory_barrier.subresourceRange.baseArrayLayer = p_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } _flush(true); @@ -2479,7 +2583,6 @@ Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t Vector<uint8_t> buffer_data; { - buffer_data.resize(buffer_size); uint8_t *w = buffer_data.ptrw(); copymem(w, buffer_mem, buffer_size); @@ -2506,7 +2609,6 @@ bool RenderingDeviceVulkan::texture_is_valid(RID p_texture) { } Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, bool p_sync_with_draw) { - _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_from_texture); @@ -2557,7 +2659,6 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; { - //PRE Copy the image { //Source @@ -2604,7 +2705,6 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, //COPY { - VkImageCopy image_copy_region; image_copy_region.srcSubresource.aspectMask = src_tex->read_aspect_mask; image_copy_region.srcSubresource.baseArrayLayer = p_src_layer; @@ -2648,7 +2748,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //make dst readable @@ -2670,12 +2770,13 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } } return OK; } + Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, bool p_sync_with_draw) { _THREAD_SAFE_METHOD_ @@ -2701,8 +2802,8 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID ERR_FAIL_COND_V_MSG(dst_tex->type != TEXTURE_TYPE_2D, ERR_INVALID_PARAMETER, "Destination texture must be 2D (or a slice of a 3D/Cube texture)."); ERR_FAIL_COND_V_MSG(dst_tex->samples != TEXTURE_SAMPLES_1, ERR_INVALID_PARAMETER, "Destination texture must not be multisampled."); - ERR_FAIL_COND_V_MSG(src_tex->format != dst_tex->format, ERR_INVALID_PARAMETER, "Source and Destionation textures must be the same format."); - ERR_FAIL_COND_V_MSG(src_tex->width != dst_tex->width && src_tex->height != dst_tex->height && src_tex->depth != dst_tex->depth, ERR_INVALID_PARAMETER, "Source and Destionation textures must have the same dimensions."); + ERR_FAIL_COND_V_MSG(src_tex->format != dst_tex->format, ERR_INVALID_PARAMETER, "Source and Destination textures must be the same format."); + ERR_FAIL_COND_V_MSG(src_tex->width != dst_tex->width && src_tex->height != dst_tex->height && src_tex->depth != dst_tex->depth, ERR_INVALID_PARAMETER, "Source and Destination textures must have the same dimensions."); ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); @@ -2710,7 +2811,6 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; { - //PRE Copy the image { //Source @@ -2731,7 +2831,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //Dest VkImageMemoryBarrier image_memory_barrier; @@ -2757,7 +2857,6 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID //COPY { - VkImageResolve image_copy_region; image_copy_region.srcSubresource.aspectMask = src_tex->read_aspect_mask; image_copy_region.srcSubresource.baseArrayLayer = src_tex->base_layer; @@ -2801,7 +2900,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } { //make dst readable @@ -2823,7 +2922,7 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID image_memory_barrier.subresourceRange.baseArrayLayer = dst_tex->base_layer; image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } } @@ -2831,7 +2930,6 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID } Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, bool p_sync_with_draw) { - _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.getornull(p_texture); @@ -2856,16 +2954,22 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, VkCommandBuffer command_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; - VkImageLayout layout = src_tex->layout; + VkImageLayout clear_layout = (src_tex->layout == VK_IMAGE_LAYOUT_GENERAL) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + // NOTE: Perhaps the valid stages/accesses for a given onwner should be a property of the owner. (Here and places like _get_buffer_from_owner) + const VkPipelineStageFlags valid_texture_stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + constexpr VkAccessFlags read_access = VK_ACCESS_SHADER_READ_BIT; + constexpr VkAccessFlags read_write_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + const VkAccessFlags valid_texture_access = (src_tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) ? read_write_access : read_access; - if (src_tex->layout != VK_IMAGE_LAYOUT_GENERAL) { //storage may be in general state + { // Barrier from previous access with optional layout change (see clear_layout logic above) VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; - image_memory_barrier.srcAccessMask = 0; + image_memory_barrier.srcAccessMask = valid_texture_access; image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; image_memory_barrier.oldLayout = src_tex->layout; - image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_memory_barrier.newLayout = clear_layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2876,8 +2980,7 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer; image_memory_barrier.subresourceRange.layerCount = p_layers; - layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, valid_texture_stages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } VkClearColorValue clear_color; @@ -2893,16 +2996,15 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, range.baseMipLevel = src_tex->base_mipmap + p_base_mipmap; range.levelCount = p_mipmaps; - vkCmdClearColorImage(command_buffer, src_tex->image, layout, &clear_color, 1, &range); - - if (src_tex->layout != VK_IMAGE_LAYOUT_GENERAL) { //storage may be in general state + vkCmdClearColorImage(command_buffer, src_tex->image, clear_layout, &clear_color, 1, &range); + { // Barrier to post clear accesses (changing back the layout if needed) VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_memory_barrier.dstAccessMask = valid_texture_access; + image_memory_barrier.oldLayout = clear_layout; image_memory_barrier.newLayout = src_tex->layout; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -2914,7 +3016,7 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer; image_memory_barrier.subresourceRange.layerCount = p_layers; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, valid_texture_stages, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); } return OK; @@ -2964,35 +3066,49 @@ bool RenderingDeviceVulkan::texture_is_format_supported_for_usage(DataFormat p_f /********************/ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentFormat> &p_format, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, int *r_color_attachment_count) { - Vector<VkAttachmentDescription> attachments; Vector<VkAttachmentReference> color_references; Vector<VkAttachmentReference> depth_stencil_references; Vector<VkAttachmentReference> resolve_references; - for (int i = 0; i < p_format.size(); i++) { + // Set up a dependencies from/to external equivalent to the default (implicit) one, and then amend them + const VkPipelineStageFlags default_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; // From Section 7.1 of Vulkan API Spec v1.1.148 - VkAttachmentDescription description; + VkPipelineStageFlags reading_stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + VkSubpassDependency dependencies[2] = { { VK_SUBPASS_EXTERNAL, 0, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, default_access_mask, 0 }, + { 0, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, default_access_mask, 0, 0 } }; + VkSubpassDependency &dependency_from_external = dependencies[0]; + VkSubpassDependency &dependency_to_external = dependencies[1]; - description.flags = 0; + for (int i = 0; i < p_format.size(); i++) { ERR_FAIL_INDEX_V(p_format[i].format, DATA_FORMAT_MAX, VK_NULL_HANDLE); - description.format = vulkan_formats[p_format[i].format]; ERR_FAIL_INDEX_V(p_format[i].samples, TEXTURE_SAMPLES_MAX, VK_NULL_HANDLE); + ERR_FAIL_COND_V_MSG(!(p_format[i].usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)), + VK_NULL_HANDLE, "Texture format for index (" + itos(i) + ") requires an attachment (depth, stencil or resolve) bit set."); + + VkAttachmentDescription description = {}; + description.flags = 0; + description.format = vulkan_formats[p_format[i].format]; description.samples = rasterization_sample_count[p_format[i].samples]; - //anything below does not really matter, as vulkan just ignores it when creating a pipeline - ERR_FAIL_COND_V_MSG(!(p_format[i].usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)), VK_NULL_HANDLE, - "Texture format for index (" + itos(i) + ") requires an attachment (depth, stencil or resolve) bit set."); bool is_depth_stencil = p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; bool is_sampled = p_format[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT; bool is_storage = p_format[i].usage_flags & TEXTURE_USAGE_STORAGE_BIT; + // For each UNDEFINED, assume the prior use was a *read*, as we'd be discarding the output of a write + // Also, each UNDEFINED will do an immediate layout transition (write), s.t. we must ensure execution syncronization vs. + // the read. If this is a performance issue, one could track the actual last accessor of each resource, adding only that + // stage switch (is_depth_stencil ? p_initial_depth_action : p_initial_color_action) { - case INITIAL_ACTION_CLEAR: { description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; } break; case INITIAL_ACTION_KEEP: { if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { @@ -3003,10 +3119,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + dependency_from_external.srcStageMask |= reading_stages; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; } } break; case INITIAL_ACTION_DROP: { @@ -3018,10 +3136,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + dependency_from_external.srcStageMask |= reading_stages; } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; } } break; case INITIAL_ACTION_CONTINUE: { @@ -3037,6 +3157,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; } } break; default: { @@ -3046,20 +3167,21 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF switch (is_depth_stencil ? p_final_depth_action : p_final_color_action) { case FINAL_ACTION_READ: { - if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + update_external_dependency_for_store(dependency_to_external, is_sampled, is_storage, false); } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + update_external_dependency_for_store(dependency_to_external, is_sampled, is_storage, true); } else { description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + // TODO: What does this mean about the next usage (and thus appropriate dependency masks } } break; case FINAL_ACTION_DISCARD: { @@ -3068,7 +3190,6 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); @@ -3084,7 +3205,6 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; description.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; description.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; @@ -3114,9 +3234,24 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF } else if (p_format[i].usage_flags & TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT) { reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; resolve_references.push_back(reference); + // if resolves are done, we need to ensure the copy is safe + dependency_to_external.dstStageMask |= VK_PIPELINE_STAGE_TRANSFER_BIT; + dependency_to_external.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT; } else { ERR_FAIL_V_MSG(VK_NULL_HANDLE, "Texture index " + itos(i) + " is neither color, depth stencil or resolve so it can't be used as attachment."); } + + // NOTE: Big Mallet Approach -- any layout transition causes a full barrier + if (reference.layout != description.initialLayout) { + // NOTE: this should be smarter based on the textures knowledge of it's previous role + dependency_from_external.srcStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + dependency_from_external.srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + } + if (reference.layout != description.finalLayout) { + // NOTE: this should be smarter based on the textures knowledge of it's subsequent role + dependency_to_external.dstStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + dependency_to_external.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + } } ERR_FAIL_COND_V_MSG(depth_stencil_references.size() > 1, VK_NULL_HANDLE, @@ -3145,8 +3280,8 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF render_pass_create_info.pAttachments = attachments.ptr(); render_pass_create_info.subpassCount = 1; render_pass_create_info.pSubpasses = &subpass; - render_pass_create_info.dependencyCount = 0; - render_pass_create_info.pDependencies = nullptr; + render_pass_create_info.dependencyCount = 2; + render_pass_create_info.pDependencies = dependencies; VkRenderPass render_pass; VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass); @@ -3159,7 +3294,6 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF } RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create(const Vector<AttachmentFormat> &p_format) { - _THREAD_SAFE_METHOD_ FramebufferFormatKey key; @@ -3172,7 +3306,7 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_c } int color_references; - VkRenderPass render_pass = _render_pass_create(p_format, INITIAL_ACTION_CLEAR, FINAL_ACTION_DISCARD, INITIAL_ACTION_CLEAR, FINAL_ACTION_DISCARD, &color_references); //actions don't matter for this use case + VkRenderPass render_pass = _render_pass_create(p_format, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, &color_references); //actions don't matter for this use case if (render_pass == VK_NULL_HANDLE) { //was likely invalid return INVALID_ID; @@ -3189,6 +3323,63 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_c return id; } +RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create_empty(const Size2i &p_size) { + ERR_FAIL_COND_V(p_size.width <= 0 || p_size.height <= 0, INVALID_FORMAT_ID); + + FramebufferFormatKey key; + key.empty_size = p_size; + + const Map<FramebufferFormatKey, FramebufferFormatID>::Element *E = framebuffer_format_cache.find(key); + if (E) { + //exists, return + return E->get(); + } + + VkSubpassDescription subpass; + subpass.flags = 0; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.inputAttachmentCount = 0; //unsupported for now + subpass.pInputAttachments = nullptr; + subpass.colorAttachmentCount = 0; + subpass.pColorAttachments = nullptr; + subpass.pDepthStencilAttachment = nullptr; + subpass.pResolveAttachments = nullptr; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = nullptr; + + VkRenderPassCreateInfo render_pass_create_info; + render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_create_info.pNext = nullptr; + render_pass_create_info.flags = 0; + render_pass_create_info.attachmentCount = 0; + render_pass_create_info.pAttachments = nullptr; + render_pass_create_info.subpassCount = 1; + render_pass_create_info.pSubpasses = &subpass; + render_pass_create_info.dependencyCount = 0; + render_pass_create_info.pDependencies = nullptr; + + VkRenderPass render_pass; + VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass); + + ERR_FAIL_COND_V_MSG(res, VK_NULL_HANDLE, "vkCreateRenderPass for empty fb failed with error " + itos(res) + "."); + + if (render_pass == VK_NULL_HANDLE) { //was likely invalid + return INVALID_ID; + } + + FramebufferFormatID id = FramebufferFormatID(framebuffer_format_cache.size()) | (FramebufferFormatID(ID_TYPE_FRAMEBUFFER_FORMAT) << FramebufferFormatID(ID_BASE_SHIFT)); + + E = framebuffer_format_cache.insert(key, id); + + FramebufferFormat fb_format; + fb_format.E = E; + fb_format.color_attachments = 0; + fb_format.render_pass = render_pass; + fb_format.samples = TEXTURE_SAMPLES_1; + framebuffer_formats[id] = fb_format; + return id; +} + RenderingDevice::TextureSamples RenderingDeviceVulkan::framebuffer_format_get_texture_samples(FramebufferFormatID p_format) { Map<FramebufferFormatID, FramebufferFormat>::Element *E = framebuffer_formats.find(p_format); ERR_FAIL_COND_V(!E, TEXTURE_SAMPLES_1); @@ -3200,8 +3391,17 @@ RenderingDevice::TextureSamples RenderingDeviceVulkan::framebuffer_format_get_te /**** RENDER TARGET ****/ /***********************/ -RID RenderingDeviceVulkan::framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check) { +RID RenderingDeviceVulkan::framebuffer_create_empty(const Size2i &p_size, FramebufferFormatID p_format_check) { + _THREAD_SAFE_METHOD_ + Framebuffer framebuffer; + framebuffer.format_id = framebuffer_format_create_empty(p_size); + ERR_FAIL_COND_V(p_format_check != INVALID_FORMAT_ID && framebuffer.format_id != p_format_check, RID()); + framebuffer.size = p_size; + + return framebuffer_owner.make_rid(framebuffer); +} +RID RenderingDeviceVulkan::framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check) { _THREAD_SAFE_METHOD_ Vector<AttachmentFormat> attachments; @@ -3249,7 +3449,6 @@ RID RenderingDeviceVulkan::framebuffer_create(const Vector<RID> &p_texture_attac } RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_get_format(RID p_framebuffer) { - _THREAD_SAFE_METHOD_ Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer); @@ -3263,7 +3462,6 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_get_form /*****************/ RID RenderingDeviceVulkan::sampler_create(const SamplerState &p_state) { - _THREAD_SAFE_METHOD_ VkSamplerCreateInfo sampler_create_info; @@ -3308,14 +3506,21 @@ RID RenderingDeviceVulkan::sampler_create(const SamplerState &p_state) { /**** VERTEX ARRAY ****/ /**********************/ -RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data) { - +RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data, bool p_use_as_storage) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + uint32_t usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + if (p_use_as_storage) { + usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + } Buffer buffer; - _buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY); + _buffer_allocate(&buffer, p_size_bytes, usage, VMA_MEMORY_USAGE_GPU_ONLY); if (p_data.size()) { uint64_t data_size = p_data.size(); const uint8_t *r = p_data.ptr(); @@ -3328,7 +3533,6 @@ RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vec // Internally reference counted, this ID is warranted to be unique for the same description, but needs to be freed as many times as it was allocated RenderingDevice::VertexFormatID RenderingDeviceVulkan::vertex_format_create(const Vector<VertexAttribute> &p_vertex_formats) { - _THREAD_SAFE_METHOD_ VertexDescriptionKey key; @@ -3350,7 +3554,7 @@ RenderingDevice::VertexFormatID RenderingDeviceVulkan::vertex_format_create(cons ERR_FAIL_COND_V(used_locations.has(p_vertex_formats[i].location), INVALID_ID); ERR_FAIL_COND_V_MSG(get_format_vertex_size(p_vertex_formats[i].format) == 0, INVALID_ID, - "Data format for attachment (" + itos(i) + ") is not valid for a vertex array."); + "Data format for attachment (" + itos(i) + "), '" + named_formats[p_vertex_formats[i].format] + "', is not valid for a vertex array."); vdcache.bindings[i].binding = i; vdcache.bindings[i].stride = p_vertex_formats[i].stride; @@ -3380,7 +3584,6 @@ RenderingDevice::VertexFormatID RenderingDeviceVulkan::vertex_format_create(cons } RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers) { - _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(!vertex_formats.has(p_vertex_format), RID()); @@ -3437,8 +3640,11 @@ RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFo } RID RenderingDeviceVulkan::index_buffer_create(uint32_t p_index_count, IndexBufferFormat p_format, const Vector<uint8_t> &p_data, bool p_use_restart_indices) { - _THREAD_SAFE_METHOD_ + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); ERR_FAIL_COND_V(p_index_count == 0, RID()); @@ -3457,7 +3663,7 @@ RID RenderingDeviceVulkan::index_buffer_create(uint32_t p_index_count, IndexBuff const uint16_t *index16 = (const uint16_t *)r; for (uint32_t i = 0; i < p_index_count; i++) { if (p_use_restart_indices && index16[i] == 0xFFFF) { - continue; //restart index, ingnore + continue; //restart index, ignore } index_buffer.max_index = MAX(index16[i], index_buffer.max_index); } @@ -3465,7 +3671,7 @@ RID RenderingDeviceVulkan::index_buffer_create(uint32_t p_index_count, IndexBuff const uint32_t *index32 = (const uint32_t *)r; for (uint32_t i = 0; i < p_index_count; i++) { if (p_use_restart_indices && index32[i] == 0xFFFFFFFF) { - continue; //restart index, ingnore + continue; //restart index, ignore } index_buffer.max_index = MAX(index32[i], index_buffer.max_index); } @@ -3487,7 +3693,6 @@ RID RenderingDeviceVulkan::index_buffer_create(uint32_t p_index_count, IndexBuff } RID RenderingDeviceVulkan::index_array_create(RID p_index_buffer, uint32_t p_index_offset, uint32_t p_index_count) { - _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(!index_buffer_owner.owns(p_index_buffer), RID()); @@ -3554,13 +3759,11 @@ String RenderingDeviceVulkan::_shader_uniform_debug(RID p_shader, int p_set) { } #if 0 bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLayoutBinding> > &bindings, Vector<Vector<UniformInfo> > &uniform_infos, const glslang::TObjectReflection &reflection, RenderingDevice::ShaderStage p_stage, Shader::PushConstant &push_constant, String *r_error) { - VkDescriptorSetLayoutBinding layout_binding; UniformInfo info; switch (reflection.getType()->getBasicType()) { case glslang::EbtSampler: { - //print_line("DEBUG: IsSampler"); if (reflection.getType()->getSampler().dim == glslang::EsdBuffer) { //texture buffers @@ -3578,7 +3781,7 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa //print_line("DEBUG: SAMPLER: texel buffer"); } else { if (r_error) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' is of unsupported buffer type."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' is of unsupported buffer type."; } return false; } @@ -3601,7 +3804,7 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa } else { //print_line("DEBUG: sampler unknown"); if (r_error) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' is of unsupported sampler type."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' is of unsupported sampler type."; } return false; } @@ -3626,7 +3829,7 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa if (reflection.getType()->getQualifier().layoutPushConstant) { uint32_t len = reflection.size; if (push_constant.push_constant_size != 0 && push_constant.push_constant_size != len) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' push constants for different stages should all be the same size."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' push constants for different stages should all be the same size."; return false; } push_constant.push_constant_size = len; @@ -3642,7 +3845,7 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa //print_line("DEBUG: Storage buffer"); } else { if (r_error) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' is of unsupported block type: (" + itos(reflection.getType()->getQualifier().storage) + ")."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' is of unsupported block type: (" + itos(reflection.getType()->getQualifier().storage) + ")."; } return false; } @@ -3658,20 +3861,17 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa } break; /*case glslang::EbtReference: { - } break;*/ /*case glslang::EbtAtomicUint: { - } break;*/ default: { - if (reflection.getType()->getQualifier().hasOffset() || reflection.name.find(".") != std::string::npos) { //member of uniform block? return true; } if (r_error) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' unsupported uniform type."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' unsupported uniform type."; } return false; } @@ -3679,7 +3879,7 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa if (!reflection.getType()->getQualifier().hasBinding()) { if (r_error) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' lacks a binding number."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' lacks a binding number."; } return false; } @@ -3688,14 +3888,14 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa if (set >= MAX_UNIFORM_SETS) { if (r_error) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ")."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ")."; } return false; } if (set >= limits.maxBoundDescriptorSets) { if (r_error) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' uses a set (" + itos(set) + ") index larger than what is supported by the hardware (" + itos(limits.maxBoundDescriptorSets) + ")."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' uses a set (" + itos(set) + ") index larger than what is supported by the hardware (" + itos(limits.maxBoundDescriptorSets) + ")."; } return false; } @@ -3709,7 +3909,7 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa //already exists, verify that it's the same type if (bindings[set][i].descriptorType != layout_binding.descriptorType) { if (r_error) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(binding) + " with different uniform type."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(binding) + " with different uniform type."; } return false; } @@ -3717,7 +3917,7 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa //also, verify that it's the same size if (bindings[set][i].descriptorCount != layout_binding.descriptorCount || uniform_infos[set][i].length != info.length) { if (r_error) { - *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name.c_str() + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(binding) + " with different uniform size."; + *r_error = "On shader stage '" + String(shader_stage_names[p_stage]) + "', uniform '" + reflection.name + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(binding) + " with different uniform size."; } return false; } @@ -3751,7 +3951,6 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa #endif RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages) { - //descriptor layouts Vector<Vector<VkDescriptorSetLayoutBinding>> set_bindings; Vector<Vector<UniformInfo>> uniform_info; @@ -3768,7 +3967,6 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages bool is_compute = false; for (int i = 0; i < p_stages.size(); i++) { - if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) { is_compute = true; ERR_FAIL_COND_V_MSG(p_stages.size() != 1, RID(), @@ -3792,7 +3990,6 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages uint32_t stage = p_stages[i].shader_stage; if (binding_count > 0) { - //Parse bindings Vector<SpvReflectDescriptorBinding *> bindings; @@ -3864,6 +4061,10 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; info.type = UNIFORM_TYPE_INPUT_ATTACHMENT; } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { + ERR_PRINT("Acceleration structure not supported."); + continue; + } break; } if (need_array_dimensions) { @@ -3943,7 +4144,6 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } if (stage == SHADER_STAGE_VERTEX) { - uint32_t iv_count = 0; result = spvReflectEnumerateInputVariables(&module, &iv_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -3966,7 +4166,6 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } if (stage == SHADER_STAGE_FRAGMENT) { - uint32_t ov_count = 0; result = spvReflectEnumerateOutputVariables(&module, &ov_count, nullptr); ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), @@ -4079,9 +4278,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages //proceed to create descriptor sets if (success) { - for (int i = 0; i < set_bindings.size(); i++) { - //empty ones are fine if they were not used according to spec (binding count will be 0) VkDescriptorSetLayoutCreateInfo layout_create_info; layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; @@ -4140,8 +4337,10 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } pipeline_layout_create_info.pSetLayouts = layouts.ptr(); + // Needs to be declared in this outer scope, otherwise it may not outlive its assignment + // to pipeline_layout_create_info. + VkPushConstantRange push_constant_range; if (push_constant.push_constant_size) { - VkPushConstantRange push_constant_range; push_constant_range.stageFlags = push_constant.push_constants_vk_stage; push_constant_range.offset = 0; push_constant_range.size = push_constant.push_constant_size; @@ -4190,10 +4389,13 @@ uint32_t RenderingDeviceVulkan::shader_get_vertex_input_attribute_mask(RID p_sha /******************/ RID RenderingDeviceVulkan::uniform_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data) { - _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); Buffer buffer; Error err = _buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY); @@ -4207,14 +4409,22 @@ RID RenderingDeviceVulkan::uniform_buffer_create(uint32_t p_size_bytes, const Ve return uniform_buffer_owner.make_rid(buffer); } -RID RenderingDeviceVulkan::storage_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data) { - +RID RenderingDeviceVulkan::storage_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data, uint32_t p_usage) { _THREAD_SAFE_METHOD_ + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); Buffer buffer; - Error err = _buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY); + buffer.usage = p_usage; + uint32_t flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + if (p_usage & STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT) { + flags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; + } + Error err = _buffer_allocate(&buffer, p_size_bytes, flags, VMA_MEMORY_USAGE_GPU_ONLY); ERR_FAIL_COND_V(err != OK, RID()); if (p_data.size()) { @@ -4227,8 +4437,11 @@ RID RenderingDeviceVulkan::storage_buffer_create(uint32_t p_size_bytes, const Ve } RID RenderingDeviceVulkan::texture_buffer_create(uint32_t p_size_elements, DataFormat p_format, const Vector<uint8_t> &p_data) { - _THREAD_SAFE_METHOD_ + ERR_FAIL_COND_V_MSG(draw_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(), + "Creating buffers with data is forbidden during creation of a draw list"); uint32_t element_size = get_format_vertex_size(p_format); ERR_FAIL_COND_V_MSG(element_size == 0, RID(), "Format requested is not supported for texture buffers"); @@ -4377,14 +4590,13 @@ void RenderingDeviceVulkan::_descriptor_pool_free(const DescriptorPoolKey &p_key vkDestroyDescriptorPool(device, p_pool->pool, nullptr); descriptor_pools[p_key].erase(p_pool); memdelete(p_pool); - if (descriptor_pools[p_key].empty()) { + if (descriptor_pools[p_key].is_empty()) { descriptor_pools.erase(p_key); } } } RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p_shader, uint32_t p_shader_set) { - _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(p_uniforms.size() == 0, RID()); @@ -4412,7 +4624,7 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, List<Vector<VkBufferView>> buffer_views; List<Vector<VkDescriptorImageInfo>> image_infos; //used for verification to make sure a uniform set does not use a framebuffer bound texture - Vector<RID> attachable_textures; + LocalVector<UniformSet::AttachableTexture> attachable_textures; Vector<Texture *> mutable_sampled_textures; Vector<Texture *> mutable_storage_textures; @@ -4425,21 +4637,27 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, } } ERR_FAIL_COND_V_MSG(uniform_idx == -1, RID(), - "All the shader bindings for the given set must be covered by the uniforms provided."); + "All the shader bindings for the given set must be covered by the uniforms provided. Binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + ") was not provided."); const Uniform &uniform = uniforms[uniform_idx]; - ERR_FAIL_COND_V_MSG(uniform.type != set_uniform.type, RID(), - "Mismatch uniform type for binding (" + itos(set_uniform.binding) + "). Expected '" + shader_uniform_names[set_uniform.type] + "', supplied: '" + shader_uniform_names[uniform.type] + "'."); + ERR_FAIL_COND_V_MSG(uniform.uniform_type != set_uniform.type, RID(), + "Mismatch uniform type for binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + "). Expected '" + shader_uniform_names[set_uniform.type] + "', supplied: '" + shader_uniform_names[uniform.uniform_type] + "'."); VkWriteDescriptorSet write; //common header write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; write.pNext = nullptr; write.dstSet = VK_NULL_HANDLE; //will assign afterwards when everything is valid write.dstBinding = set_uniform.binding; + write.dstArrayElement = 0; + write.descriptorCount = 0; + write.descriptorType = VK_DESCRIPTOR_TYPE_MAX_ENUM; //Invalid value. + write.pImageInfo = nullptr; + write.pBufferInfo = nullptr; + write.pTexelBufferView = nullptr; uint32_t type_size = 1; - switch (uniform.type) { + switch (uniform.uniform_type) { case UNIFORM_TYPE_SAMPLER: { if (uniform.ids.size() != set_uniform.length) { if (set_uniform.length > 1) { @@ -4474,7 +4692,6 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, } break; case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { - if (uniform.ids.size() != set_uniform.length * 2) { if (set_uniform.length > 1) { ERR_FAIL_V_MSG(RID(), "SamplerTexture (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") sampler&texture elements, so it should provided twice the amount of IDs (sampler,texture pairs) to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ")."); @@ -4500,7 +4717,10 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, img_info.imageView = texture->view; if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)) { - attachable_textures.push_back(texture->owner.is_valid() ? texture->owner : uniform.ids[j + 1]); + UniformSet::AttachableTexture attachable_texture; + attachable_texture.bind = set_uniform.binding; + attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.ids[j + 1]; + attachable_textures.push_back(attachable_texture); } if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) { @@ -4528,7 +4748,6 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, } break; case UNIFORM_TYPE_TEXTURE: { - if (uniform.ids.size() != set_uniform.length) { if (set_uniform.length > 1) { ERR_FAIL_V_MSG(RID(), "Texture (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ")."); @@ -4551,7 +4770,10 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, img_info.imageView = texture->view; if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)) { - attachable_textures.push_back(texture->owner.is_valid() ? texture->owner : uniform.ids[j]); + UniformSet::AttachableTexture attachable_texture; + attachable_texture.bind = set_uniform.binding; + attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.ids[j]; + attachable_textures.push_back(attachable_texture); } if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) { @@ -4579,7 +4801,6 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, type_size = uniform.ids.size(); } break; case UNIFORM_TYPE_IMAGE: { - if (uniform.ids.size() != set_uniform.length) { if (set_uniform.length > 1) { ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ")."); @@ -4659,7 +4880,6 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, } break; case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { - if (uniform.ids.size() != set_uniform.length * 2) { if (set_uniform.length > 1) { ERR_FAIL_V_MSG(RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") sampler buffer elements, so it should provided twice the amount of IDs (sampler,buffer pairs) to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ")."); @@ -4726,7 +4946,15 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, ERR_FAIL_COND_V_MSG(uniform.ids.size() != 1, RID(), "Storage buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.ids.size()) + " provided)."); - Buffer *buffer = storage_buffer_owner.getornull(uniform.ids[0]); + Buffer *buffer = nullptr; + + if (storage_buffer_owner.owns(uniform.ids[0])) { + buffer = storage_buffer_owner.getornull(uniform.ids[0]); + } else if (vertex_buffer_owner.owns(uniform.ids[0])) { + buffer = vertex_buffer_owner.getornull(uniform.ids[0]); + + ERR_FAIL_COND_V_MSG(!(buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), RID(), "Vertex buffer supplied (binding: " + itos(uniform.binding) + ") was not created with storage flag."); + } ERR_FAIL_COND_V_MSG(!buffer, RID(), "Storage buffer supplied (binding: " + itos(uniform.binding) + ") is invalid."); //if 0, then its sized on link time @@ -4741,7 +4969,6 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, write.pTexelBufferView = nullptr; } break; case UNIFORM_TYPE_INPUT_ATTACHMENT: { - } break; default: { } @@ -4818,44 +5045,27 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER, "Updating buffers in 'sync to draw' mode is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list && p_sync_with_draw, ERR_INVALID_PARAMETER, + "Updating buffers in 'sync to draw' mode is forbidden during creation of a compute list"); - VkPipelineStageFlags dst_stage_mask; - VkAccessFlags dst_access; + // Protect subsequent updates... + VkPipelineStageFlags dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + VkAccessFlags dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; - Buffer *buffer = nullptr; - if (vertex_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - dst_access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - buffer = vertex_buffer_owner.getornull(p_buffer); - } else if (index_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - dst_access = VK_ACCESS_INDEX_READ_BIT; - buffer = index_buffer_owner.getornull(p_buffer); - } else if (uniform_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access = VK_ACCESS_UNIFORM_READ_BIT; - buffer = uniform_buffer_owner.getornull(p_buffer); - } else if (texture_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access = VK_ACCESS_SHADER_READ_BIT; - buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; - } else if (storage_buffer_owner.owns(p_buffer)) { - dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - dst_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - buffer = storage_buffer_owner.getornull(p_buffer); - } else { + Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access); + if (!buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); } ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); + _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_sync_with_draw); Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, p_sync_with_draw); if (err) { return err; } - _buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_sync_with_draw); #ifdef FORCE_FULL_BARRIER _full_barrier(p_sync_with_draw); #else @@ -4865,23 +5075,22 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint } Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { - _THREAD_SAFE_METHOD_ - Buffer *buffer = nullptr; - if (vertex_buffer_owner.owns(p_buffer)) { - buffer = vertex_buffer_owner.getornull(p_buffer); - } else if (index_buffer_owner.owns(p_buffer)) { - buffer = index_buffer_owner.getornull(p_buffer); - } else if (texture_buffer_owner.owns(p_buffer)) { - buffer = &texture_buffer_owner.getornull(p_buffer)->buffer; - } else if (storage_buffer_owner.owns(p_buffer)) { - buffer = storage_buffer_owner.getornull(p_buffer); - } else { + // It could be this buffer was just created + VkPipelineShaderStageCreateFlags src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + VkAccessFlags src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + // Get the vulkan buffer and the potential stage/access possible + Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask); + if (!buffer) { ERR_FAIL_V_MSG(Vector<uint8_t>(), "Buffer is either invalid or this type of buffer can't be retrieved. Only Index and Vertex buffers allow retrieving."); } + // Make sure no one is using the buffer -- the "false" gets us to the same command buffer as below. + _buffer_memory_barrier(buffer->buffer, 0, buffer->size, src_stage_mask, src_access_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, false); + VkCommandBuffer command_buffer = frames[frame].setup_command_buffer; + Buffer tmp_buffer; _buffer_allocate(&tmp_buffer, buffer->size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_CPU_ONLY); VkBufferCopy region; @@ -4898,7 +5107,6 @@ Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { Vector<uint8_t> buffer_data; { - buffer_data.resize(buffer->size); uint8_t *w = buffer_data.ptrw(); copymem(w, buffer_mem, buffer->size); @@ -4916,7 +5124,6 @@ Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { /*************************/ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags) { - _THREAD_SAFE_METHOD_ //needs a shader @@ -5003,13 +5210,13 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma input_assembly_create_info.topology = topology_list[p_render_primitive]; input_assembly_create_info.primitiveRestartEnable = (p_render_primitive == RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX); - //tesselation - VkPipelineTessellationStateCreateInfo tesselation_create_info; - tesselation_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; - tesselation_create_info.pNext = nullptr; - tesselation_create_info.flags = 0; + //tessellation + VkPipelineTessellationStateCreateInfo tessellation_create_info; + tessellation_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; + tessellation_create_info.pNext = nullptr; + tessellation_create_info.flags = 0; ERR_FAIL_COND_V(p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > limits.maxTessellationPatchSize, RID()); - tesselation_create_info.patchControlPoints = p_rasterization_state.patch_control_points; + tessellation_create_info.patchControlPoints = p_rasterization_state.patch_control_points; VkPipelineViewportStateCreateInfo viewport_state_create_info; viewport_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; @@ -5221,7 +5428,7 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma graphics_pipeline_create_info.pStages = shader->pipeline_stages.ptr(); graphics_pipeline_create_info.pVertexInputState = &pipeline_vertex_input_state_create_info; graphics_pipeline_create_info.pInputAssemblyState = &input_assembly_create_info; - graphics_pipeline_create_info.pTessellationState = &tesselation_create_info; + graphics_pipeline_create_info.pTessellationState = &tessellation_create_info; graphics_pipeline_create_info.pViewportState = &viewport_state_create_info; graphics_pipeline_create_info.pRasterizationState = &rasterization_state_create_info; graphics_pipeline_create_info.pMultisampleState = &multisample_state_create_info; @@ -5326,7 +5533,6 @@ RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) { } bool RenderingDeviceVulkan::compute_pipeline_is_valid(RID p_pipeline) { - return compute_pipeline_owner.owns(p_pipeline); } @@ -5339,14 +5545,15 @@ int RenderingDeviceVulkan::screen_get_width(DisplayServer::WindowID p_screen) co ERR_FAIL_COND_V_MSG(local_device.is_valid(), -1, "Local devices have no screen"); return context->window_get_width(p_screen); } + int RenderingDeviceVulkan::screen_get_height(DisplayServer::WindowID p_screen) const { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(local_device.is_valid(), -1, "Local devices have no screen"); return context->window_get_height(p_screen); } -RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::screen_get_framebuffer_format() const { +RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::screen_get_framebuffer_format() const { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(local_device.is_valid(), INVALID_ID, "Local devices have no screen"); @@ -5376,7 +5583,6 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::screen_get_framebuff /*******************/ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(DisplayServer::WindowID p_screen, const Color &p_clear_color) { - _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(local_device.is_valid(), INVALID_ID, "Local devices have no screen"); @@ -5436,11 +5642,10 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(Di vkCmdSetScissor(command_buffer, 0, 1, &scissor); - return ID_TYPE_DRAW_LIST; + return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } Error RenderingDeviceVulkan::_draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass) { - Framebuffer::VersionKey vk; vk.initial_color_action = p_initial_color_action; vk.final_color_action = p_final_color_action; @@ -5484,8 +5689,7 @@ Error RenderingDeviceVulkan::_draw_list_setup_framebuffer(Framebuffer *p_framebu return OK; } -Error RenderingDeviceVulkan::_draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents) { - +Error RenderingDeviceVulkan::_draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents, const Vector<RID> &p_storage_textures) { VkRenderPassBeginInfo render_pass_begin; render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin.pNext = nullptr; @@ -5530,6 +5734,37 @@ Error RenderingDeviceVulkan::_draw_list_render_pass_begin(Framebuffer *framebuff render_pass_begin.clearValueCount = clear_values.size(); render_pass_begin.pClearValues = clear_values.ptr(); + for (int i = 0; i < p_storage_textures.size(); i++) { + Texture *texture = texture_owner.getornull(p_storage_textures[i]); + ERR_CONTINUE_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), "Supplied storage texture " + itos(i) + " for draw list is not set to be used for storage."); + + if (texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT) { + //must change layout to general + VkImageMemoryBarrier image_memory_barrier; + image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_memory_barrier.pNext = nullptr; + image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.oldLayout = texture->layout; + image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + + image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.image = texture->image; + image_memory_barrier.subresourceRange.aspectMask = texture->read_aspect_mask; + image_memory_barrier.subresourceRange.baseMipLevel = texture->base_mipmap; + image_memory_barrier.subresourceRange.levelCount = texture->mipmaps; + image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer; + image_memory_barrier.subresourceRange.layerCount = texture->layers; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + + texture->layout = VK_IMAGE_LAYOUT_GENERAL; + + draw_list_storage_textures.push_back(p_storage_textures[i]); + } + } + vkCmdBeginRenderPass(command_buffer, &render_pass_begin, subpass_contents); //mark textures as bound @@ -5551,7 +5786,8 @@ void RenderingDeviceVulkan::_draw_list_insert_clear_region(DrawList *draw_list, int color_index = 0; for (int i = 0; i < framebuffer->texture_ids.size(); i++) { Texture *texture = texture_owner.getornull(framebuffer->texture_ids[i]); - VkClearAttachment clear_at; + VkClearAttachment clear_at = {}; + if (p_clear_color && texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { ERR_FAIL_INDEX(color_index, p_clear_colors.size()); //a bug Color clear_color = p_clear_colors[color_index]; @@ -5562,7 +5798,6 @@ void RenderingDeviceVulkan::_draw_list_insert_clear_region(DrawList *draw_list, clear_at.colorAttachment = color_index++; clear_at.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; } else if (p_clear_depth && texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - clear_at.clearValue.depthStencil.depth = p_depth; clear_at.clearValue.depthStencil.stencil = p_stencil; clear_at.colorAttachment = 0; @@ -5587,8 +5822,7 @@ void RenderingDeviceVulkan::_draw_list_insert_clear_region(DrawList *draw_list, vkCmdClearAttachments(draw_list->command_buffer, clear_attachments.size(), clear_attachments.ptr(), 1, &cr); } -RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region) { - +RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); @@ -5638,7 +5872,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu ERR_FAIL_COND_V(err != OK, INVALID_ID); VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; - err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, vkframebuffer, render_pass, command_buffer, VK_SUBPASS_CONTENTS_INLINE); + err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, vkframebuffer, render_pass, command_buffer, VK_SUBPASS_CONTENTS_INLINE, p_storage_textures); if (err != OK) { return INVALID_ID; @@ -5675,11 +5909,10 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu vkCmdSetScissor(command_buffer, 0, 1, &scissor); draw_list->viewport = Rect2i(viewport_offset, viewport_size); - return ID_TYPE_DRAW_LIST; + return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } -Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region) { - +Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V(p_splits < 1, ERR_INVALID_DECLARATION); @@ -5726,7 +5959,6 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p uint32_t from = split_draw_list_allocators.size(); split_draw_list_allocators.resize(p_splits); for (uint32_t i = from; i < p_splits; i++) { - VkCommandPoolCreateInfo cmd_pool_info; cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cmd_pool_info.pNext = nullptr; @@ -5737,7 +5969,6 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p ERR_FAIL_COND_V_MSG(res, ERR_CANT_CREATE, "vkCreateCommandPool failed with error " + itos(res) + "."); for (int j = 0; j < frame_count; j++) { - VkCommandBuffer command_buffer; VkCommandBufferAllocateInfo cmdbuf; @@ -5763,7 +5994,7 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); VkCommandBuffer frame_command_buffer = frames[frame].draw_command_buffer; - err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, vkframebuffer, render_pass, frame_command_buffer, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS); + err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, vkframebuffer, render_pass, frame_command_buffer, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, p_storage_textures); if (err != OK) { return ERR_CANT_CREATE; @@ -5774,9 +6005,8 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p draw_list_split = true; for (uint32_t i = 0; i < p_splits; i++) { - //take a command buffer and initialize it - VkCommandBuffer command_buffer = split_draw_list_allocators[p_splits].command_buffers[frame]; + VkCommandBuffer command_buffer = split_draw_list_allocators[i].command_buffers[frame]; VkCommandBufferInheritanceInfo inheritance_info; inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; @@ -5834,7 +6064,7 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p scissor.extent.height = viewport_size.height; vkCmdSetScissor(command_buffer, 0, 1, &scissor); - r_split_ids[i] = (DrawListID(1) << DrawListID(ID_TYPE_SPLIT_DRAW_LIST)) + i; + r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i; draw_list[i].viewport = Rect2i(viewport_offset, viewport_size); } @@ -5843,14 +6073,13 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p } RenderingDeviceVulkan::DrawList *RenderingDeviceVulkan::_get_draw_list_ptr(DrawListID p_id) { - if (p_id < 0) { return nullptr; } if (!draw_list) { return nullptr; - } else if (p_id == ID_TYPE_DRAW_LIST) { + } else if (p_id == (int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT)) { if (draw_list_split) { return nullptr; } @@ -5873,7 +6102,6 @@ RenderingDeviceVulkan::DrawList *RenderingDeviceVulkan::_get_draw_list_ptr(DrawL } void RenderingDeviceVulkan::draw_list_bind_render_pipeline(DrawListID p_list, RID p_render_pipeline) { - DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_COND(!dl); #ifdef DEBUG_ENABLED @@ -5924,7 +6152,7 @@ void RenderingDeviceVulkan::draw_list_bind_render_pipeline(DrawListID p_list, RI if (pipeline->push_constant_size) { dl->state.pipeline_push_constant_stages = pipeline->push_constant_stages; #ifdef DEBUG_ENABLED - dl->validation.pipeline_push_constant_suppplied = false; + dl->validation.pipeline_push_constant_supplied = false; #endif } @@ -5944,9 +6172,8 @@ void RenderingDeviceVulkan::draw_list_bind_render_pipeline(DrawListID p_list, RI } void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_uniform_set, uint32_t p_index) { - #ifdef DEBUG_ENABLED - ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index > MAX_UNIFORM_SETS, + ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index >= MAX_UNIFORM_SETS, "Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ")."); #endif DrawList *dl = _get_draw_list_ptr(p_list); @@ -5971,13 +6198,13 @@ void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_ #ifdef DEBUG_ENABLED { //validate that textures bound are not attached as framebuffer bindings uint32_t attachable_count = uniform_set->attachable_textures.size(); - const RID *attachable_ptr = uniform_set->attachable_textures.ptr(); + const UniformSet::AttachableTexture *attachable_ptr = uniform_set->attachable_textures.ptr(); uint32_t bound_count = draw_list_bound_textures.size(); const RID *bound_ptr = draw_list_bound_textures.ptr(); for (uint32_t i = 0; i < attachable_count; i++) { for (uint32_t j = 0; j < bound_count; j++) { - ERR_FAIL_COND_MSG(attachable_ptr[i] == bound_ptr[j], - "Attempted to use the same texture in framebuffer attachment and a uniform set, this is not allowed."); + ERR_FAIL_COND_MSG(attachable_ptr[i].texture == bound_ptr[j], + "Attempted to use the same texture in framebuffer attachment and a uniform (set: " + itos(p_index) + ", binding: " + itos(attachable_ptr[i].bind) + "), this is not allowed."); } } } @@ -6007,8 +6234,8 @@ void RenderingDeviceVulkan::draw_list_bind_vertex_array(DrawListID p_list, RID p dl->validation.vertex_array_size = vertex_array->vertex_count; vkCmdBindVertexBuffers(dl->command_buffer, 0, vertex_array->buffers.size(), vertex_array->buffers.ptr(), vertex_array->offsets.ptr()); } -void RenderingDeviceVulkan::draw_list_bind_index_array(DrawListID p_list, RID p_index_array) { +void RenderingDeviceVulkan::draw_list_bind_index_array(DrawListID p_list, RID p_index_array) { DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_COND(!dl); #ifdef DEBUG_ENABLED @@ -6033,7 +6260,6 @@ void RenderingDeviceVulkan::draw_list_bind_index_array(DrawListID p_list, RID p_ } void RenderingDeviceVulkan::draw_list_set_line_width(DrawListID p_list, float p_width) { - DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_COND(!dl); #ifdef DEBUG_ENABLED @@ -6057,12 +6283,11 @@ void RenderingDeviceVulkan::draw_list_set_push_constant(DrawListID p_list, const #endif vkCmdPushConstants(dl->command_buffer, dl->state.pipeline_layout, dl->state.pipeline_push_constant_stages, 0, p_data_size, p_data); #ifdef DEBUG_ENABLED - dl->validation.pipeline_push_constant_suppplied = true; + dl->validation.pipeline_push_constant_supplied = true; #endif } void RenderingDeviceVulkan::draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances, uint32_t p_procedural_vertices) { - DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_COND(!dl); #ifdef DEBUG_ENABLED @@ -6079,14 +6304,14 @@ void RenderingDeviceVulkan::draw_list_draw(DrawListID p_list, bool p_use_indices //make sure format is right ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format != dl->validation.vertex_format, "The vertex format used to create the pipeline does not match the vertex format bound."); - //make sure amount of instances is valid + //make sure number of instances is valid ERR_FAIL_COND_MSG(p_instances > dl->validation.vertex_max_instances_allowed, - "Amount of instances requested (" + itos(p_instances) + " is larger than the maximum amount suported by the bound vertex array (" + itos(dl->validation.vertex_max_instances_allowed) + ")."); + "Number of instances requested (" + itos(p_instances) + " is larger than the maximum number supported by the bound vertex array (" + itos(dl->validation.vertex_max_instances_allowed) + ")."); } if (dl->validation.pipeline_push_constant_size > 0) { //using push constants, check that they were supplied - ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_suppplied, + ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_supplied, "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); } @@ -6095,13 +6320,11 @@ void RenderingDeviceVulkan::draw_list_draw(DrawListID p_list, bool p_use_indices //Bind descriptor sets for (uint32_t i = 0; i < dl->state.set_count; i++) { - if (dl->state.sets[i].pipeline_expected_format == 0) { continue; //nothing expected by this pipeline } #ifdef DEBUG_ENABLED if (dl->state.sets[i].pipeline_expected_format != dl->state.sets[i].uniform_set_format) { - if (dl->state.sets[i].uniform_set_format == 0) { ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline"); } else if (uniform_set_owner.owns(dl->state.sets[i].uniform_set)) { @@ -6120,7 +6343,6 @@ void RenderingDeviceVulkan::draw_list_draw(DrawListID p_list, bool p_use_indices } if (p_use_indices) { - #ifdef DEBUG_ENABLED ERR_FAIL_COND_MSG(p_procedural_vertices > 0, "Procedural vertices can't be used together with indices."); @@ -6148,7 +6370,6 @@ void RenderingDeviceVulkan::draw_list_draw(DrawListID p_list, bool p_use_indices #endif vkCmdDrawIndexed(dl->command_buffer, to_draw, p_instances, dl->validation.index_array_offset, 0, 0); } else { - uint32_t to_draw; if (p_procedural_vertices > 0) { @@ -6158,7 +6379,6 @@ void RenderingDeviceVulkan::draw_list_draw(DrawListID p_list, bool p_use_indices #endif to_draw = p_procedural_vertices; } else { - #ifdef DEBUG_ENABLED ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format == INVALID_ID, "Draw command lacks indices, but pipeline format does not use vertices."); @@ -6188,7 +6408,7 @@ void RenderingDeviceVulkan::draw_list_enable_scissor(DrawListID p_list, const Re Rect2i rect = p_rect; rect.position += dl->viewport.position; - rect = dl->viewport.clip(rect); + rect = dl->viewport.intersection(rect); if (rect.get_area() == 0) { return; @@ -6201,6 +6421,7 @@ void RenderingDeviceVulkan::draw_list_enable_scissor(DrawListID p_list, const Re vkCmdSetScissor(dl->command_buffer, 0, 1, &scissor); } + void RenderingDeviceVulkan::draw_list_disable_scissor(DrawListID p_list) { DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_COND(!dl); @@ -6217,7 +6438,6 @@ void RenderingDeviceVulkan::draw_list_disable_scissor(DrawListID p_list) { } void RenderingDeviceVulkan::draw_list_end() { - _THREAD_SAFE_METHOD_ ERR_FAIL_COND_MSG(!draw_list, "Immediate draw list is already inactive."); @@ -6226,8 +6446,8 @@ void RenderingDeviceVulkan::draw_list_end() { //send all command buffers VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * draw_list_count); for (uint32_t i = 0; i < draw_list_count; i++) { - vkEndCommandBuffer(draw_list->command_buffer); - command_buffers[i] = draw_list->command_buffer; + vkEndCommandBuffer(draw_list[i].command_buffer); + command_buffers[i] = draw_list[i].command_buffer; } vkCmdExecuteCommands(frames[frame].draw_command_buffer, draw_list_count, command_buffers); @@ -6255,6 +6475,33 @@ void RenderingDeviceVulkan::draw_list_end() { draw_list_bound_textures.clear(); + for (int i = 0; i < draw_list_storage_textures.size(); i++) { + Texture *texture = texture_owner.getornull(draw_list_storage_textures[i]); + + VkImageMemoryBarrier image_memory_barrier; + image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_memory_barrier.pNext = nullptr; + image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.oldLayout = texture->layout; + image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.image = texture->image; + image_memory_barrier.subresourceRange.aspectMask = texture->read_aspect_mask; + image_memory_barrier.subresourceRange.baseMipLevel = texture->base_mipmap; + image_memory_barrier.subresourceRange.levelCount = texture->mipmaps; + image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer; + image_memory_barrier.subresourceRange.layerCount = texture->layers; + + vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + + texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + + draw_list_storage_textures.clear(); + // To ensure proper synchronization, we must make sure rendering is done before: // * Some buffer is copied // * Another render pass happens (since we may be done @@ -6271,7 +6518,6 @@ void RenderingDeviceVulkan::draw_list_end() { /***********************/ RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin() { - ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); @@ -6328,7 +6574,7 @@ void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_l if (pipeline->push_constant_size) { cl->state.pipeline_push_constant_stages = pipeline->push_constant_stages; #ifdef DEBUG_ENABLED - cl->validation.pipeline_push_constant_suppplied = false; + cl->validation.pipeline_push_constant_supplied = false; #endif } @@ -6341,6 +6587,7 @@ void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_l cl->validation.pipeline_push_constant_size = pipeline->push_constant_size; #endif } + void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) { ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_COND(!compute_list); @@ -6348,7 +6595,7 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, ComputeList *cl = compute_list; #ifdef DEBUG_ENABLED - ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index > MAX_UNIFORM_SETS, + ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index >= MAX_UNIFORM_SETS, "Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ")."); #endif @@ -6373,7 +6620,6 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, for (uint32_t i = 0; i < textures_to_sampled_count; i++) { if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { - VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; @@ -6391,7 +6637,7 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer; image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers; - vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6404,7 +6650,6 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, for (uint32_t i = 0; i < textures_to_storage_count; i++) { if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) { - VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; @@ -6462,9 +6707,10 @@ void RenderingDeviceVulkan::compute_list_set_push_constant(ComputeListID p_list, #endif vkCmdPushConstants(cl->command_buffer, cl->state.pipeline_layout, cl->state.pipeline_push_constant_stages, 0, p_data_size, p_data); #ifdef DEBUG_ENABLED - cl->validation.pipeline_push_constant_suppplied = true; + cl->validation.pipeline_push_constant_supplied = true; #endif } + void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_COND(!compute_list); @@ -6488,7 +6734,7 @@ void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t if (cl->validation.pipeline_push_constant_size > 0) { //using push constants, check that they were supplied - ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_suppplied, + ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied, "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); } @@ -6497,13 +6743,11 @@ void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t //Bind descriptor sets for (uint32_t i = 0; i < cl->state.set_count; i++) { - if (cl->state.sets[i].pipeline_expected_format == 0) { continue; //nothing expected by this pipeline } #ifdef DEBUG_ENABLED if (cl->state.sets[i].pipeline_expected_format != cl->state.sets[i].uniform_set_format) { - if (cl->state.sets[i].uniform_set_format == 0) { ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline"); } else if (uniform_set_owner.owns(cl->state.sets[i].uniform_set)) { @@ -6524,6 +6768,62 @@ void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups); } +void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) { + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_COND(!compute_list); + + ComputeList *cl = compute_list; + Buffer *buffer = storage_buffer_owner.getornull(p_buffer); + ERR_FAIL_COND(!buffer); + + ERR_FAIL_COND_MSG(!(buffer->usage & STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT), "Buffer provided was not created to do indirect dispatch."); + + ERR_FAIL_COND_MSG(p_offset + 12 > buffer->size, "Offset provided (+12) is past the end of buffer."); + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + + ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw."); + + if (cl->validation.pipeline_push_constant_size > 0) { + //using push constants, check that they were supplied + ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + //Bind descriptor sets + + for (uint32_t i = 0; i < cl->state.set_count; i++) { + if (cl->state.sets[i].pipeline_expected_format == 0) { + continue; //nothing expected by this pipeline + } +#ifdef DEBUG_ENABLED + if (cl->state.sets[i].pipeline_expected_format != cl->state.sets[i].uniform_set_format) { + if (cl->state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline"); + } else if (uniform_set_owner.owns(cl->state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.getornull(cl->state.sets[i].uniform_set); + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader)); + } else { + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader)); + } + } +#endif + if (!cl->state.sets[i].bound) { + //All good, see if this requires re-binding + vkCmdBindDescriptorSets(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, cl->state.pipeline_layout, i, 1, &cl->state.sets[i].descriptor_set, 0, nullptr); + cl->state.sets[i].bound = true; + } + } + + vkCmdDispatchIndirect(cl->command_buffer, buffer->buffer, p_offset); +} + void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) { #ifdef FORCE_FULL_BARRIER _full_barrier(true); @@ -6534,14 +6834,12 @@ void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) { void RenderingDeviceVulkan::compute_list_end() { ERR_FAIL_COND(!compute_list); - for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) { - VkImageMemoryBarrier image_memory_barrier; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.pNext = nullptr; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT; image_memory_barrier.oldLayout = E->get()->layout; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -6554,7 +6852,8 @@ void RenderingDeviceVulkan::compute_list_end() { image_memory_barrier.subresourceRange.baseArrayLayer = E->get()->base_layer; image_memory_barrier.subresourceRange.layerCount = E->get()->layers; - vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); + // TODO: Look at the usages in the compute list and determine tighter dst stage and access masks based on some "final" usage equivalent + vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -6564,13 +6863,19 @@ void RenderingDeviceVulkan::compute_list_end() { #ifdef FORCE_FULL_BARRIER _full_barrier(true); #else - _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT, true); + _memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); +#endif +} + +void RenderingDeviceVulkan::full_barrier() { +#ifndef DEBUG_ENABLED + ERR_PRINT("Full barrier is debug-only, should not be used in production"); #endif + _full_barrier(true); } #if 0 void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_framebuffer, ID *p_draw_lists, uint32_t p_draw_list_count, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Variant> &p_clear_colors) { - VkCommandBuffer frame_cmdbuf = frames[frame].frame_buffer; ERR_FAIL_COND(!frame_cmdbuf); @@ -6599,7 +6904,6 @@ void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_frame ID screen_format = screen_get_framebuffer_format(); { - VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * p_draw_list_count); uint32_t command_buffer_count = 0; @@ -6623,12 +6927,10 @@ void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_frame } vkCmdEndRenderPass(frame_cmdbuf); - } #endif void RenderingDeviceVulkan::_free_internal(RID p_id) { - //push everything so it's disposed of next time this frame index is processed (means, it's safe to do it) if (texture_owner.owns(p_id)) { Texture *texture = texture_owner.getornull(p_id); @@ -6691,8 +6993,8 @@ void RenderingDeviceVulkan::_free_internal(RID p_id) { ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id())); } } -void RenderingDeviceVulkan::free(RID p_id) { +void RenderingDeviceVulkan::free(RID p_id) { _THREAD_SAFE_METHOD_ _free_dependencies(p_id); //recursively erase dependencies first, to avoid potential API problems @@ -6700,7 +7002,6 @@ void RenderingDeviceVulkan::free(RID p_id) { } void RenderingDeviceVulkan::_finalize_command_bufers() { - if (draw_list) { ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); } @@ -6716,7 +7017,6 @@ void RenderingDeviceVulkan::_finalize_command_bufers() { } void RenderingDeviceVulkan::_begin_frame() { - //erase pending resources _free_pending_resources(frame); @@ -6763,7 +7063,6 @@ void RenderingDeviceVulkan::_begin_frame() { } void RenderingDeviceVulkan::swap_buffers() { - ERR_FAIL_COND_MSG(local_device.is_valid(), "Local devices can't swap buffers."); _THREAD_SAFE_METHOD_ @@ -6790,12 +7089,12 @@ void RenderingDeviceVulkan::submit() { } void RenderingDeviceVulkan::sync() { - ERR_FAIL_COND_MSG(local_device.is_null(), "Only local devices can submit and sync."); ERR_FAIL_COND_MSG(!local_device_processing, "sync can only be called after a submit"); context->local_device_sync(local_device); _begin_frame(); + local_device_processing = false; } void RenderingDeviceVulkan::_free_pending_resources(int p_frame) { @@ -6895,7 +7194,6 @@ void RenderingDeviceVulkan::_free_pending_resources(int p_frame) { //buffers while (frames[p_frame].buffers_to_dispose_of.front()) { - _buffer_free(&frames[p_frame].buffers_to_dispose_of.front()->get()); frames[p_frame].buffers_to_dispose_of.pop_front(); @@ -6912,8 +7210,13 @@ uint32_t RenderingDeviceVulkan::get_frame_delay() const { return frame_count; } -void RenderingDeviceVulkan::_flush(bool p_current_frame) { +uint64_t RenderingDeviceVulkan::get_memory_usage() const { + VmaStats stats; + vmaCalculateStats(allocator, &stats); + return stats.total.usedBytes; +} +void RenderingDeviceVulkan::_flush(bool p_current_frame) { if (local_device.is_valid() && !p_current_frame) { return; //flushign previous frames has no effect with local device } @@ -6924,7 +7227,6 @@ void RenderingDeviceVulkan::_flush(bool p_current_frame) { } if (local_device.is_valid()) { - VkCommandBuffer command_buffers[2] = { frames[frame].setup_command_buffer, frames[frame].draw_command_buffer }; context->local_device_push_command_buffers(local_device, command_buffers, 2); context->local_device_sync(local_device); @@ -6970,12 +7272,12 @@ void RenderingDeviceVulkan::_flush(bool p_current_frame) { } void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_device) { - context = p_context; device = p_context->get_device(); if (p_local_device) { frame_count = 1; local_device = p_context->local_device_create(); + device = p_context->local_device_get_vk_device(local_device); } else { frame_count = p_context->get_swapchain_image_count() + 1; //always need one extra to ensure it's unused at any time, without having to use a fence for this. } @@ -6995,7 +7297,6 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_de frame = 0; //create setup and frame buffers for (int i = 0; i < frame_count; i++) { - frames[i].index = 0; { //create command pool, one per frame is recommended @@ -7119,7 +7420,6 @@ void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) { } void RenderingDeviceVulkan::capture_timestamp(const String &p_name, bool p_sync_to_draw) { - ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements); { @@ -7199,7 +7499,7 @@ uint64_t RenderingDeviceVulkan::get_captured_timestamp_gpu_time(uint32_t p_index // this sucks because timestampPeriod multiplier is a float, while the timestamp is 64 bits nanosecs. // so, in cases like nvidia which give you enormous numbers and 1 as multiplier, multiplying is next to impossible - // need to do 128 bits fixed point multiplication to get the rigth value + // need to do 128 bits fixed point multiplication to get the right value uint64_t shift_bits = 16; @@ -7211,10 +7511,12 @@ uint64_t RenderingDeviceVulkan::get_captured_timestamp_gpu_time(uint32_t p_index return l; } + uint64_t RenderingDeviceVulkan::get_captured_timestamp_cpu_time(uint32_t p_index) const { ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0); return frames[frame].timestamp_cpu_result_values[p_index]; } + String RenderingDeviceVulkan::get_captured_timestamp_name(uint32_t p_index) const { ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, String()); return frames[frame].timestamp_result_names[p_index]; @@ -7222,49 +7524,83 @@ String RenderingDeviceVulkan::get_captured_timestamp_name(uint32_t p_index) cons int RenderingDeviceVulkan::limit_get(Limit p_limit) { switch (p_limit) { - case LIMIT_MAX_BOUND_UNIFORM_SETS: return limits.maxBoundDescriptorSets; - case LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS: return limits.maxColorAttachments; - case LIMIT_MAX_TEXTURES_PER_UNIFORM_SET: return limits.maxDescriptorSetSampledImages; - case LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET: return limits.maxDescriptorSetSamplers; - case LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET: return limits.maxDescriptorSetStorageBuffers; - case LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET: return limits.maxDescriptorSetStorageImages; - case LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET: return limits.maxDescriptorSetUniformBuffers; - case LIMIT_MAX_DRAW_INDEXED_INDEX: return limits.maxDrawIndexedIndexValue; - case LIMIT_MAX_FRAMEBUFFER_HEIGHT: return limits.maxFramebufferHeight; - case LIMIT_MAX_FRAMEBUFFER_WIDTH: return limits.maxFramebufferWidth; - case LIMIT_MAX_TEXTURE_ARRAY_LAYERS: return limits.maxImageArrayLayers; - case LIMIT_MAX_TEXTURE_SIZE_1D: return limits.maxImageDimension1D; - case LIMIT_MAX_TEXTURE_SIZE_2D: return limits.maxImageDimension2D; - case LIMIT_MAX_TEXTURE_SIZE_3D: return limits.maxImageDimension3D; - case LIMIT_MAX_TEXTURE_SIZE_CUBE: return limits.maxImageDimensionCube; - case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE: return limits.maxPerStageDescriptorSampledImages; - case LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE: return limits.maxPerStageDescriptorSamplers; - case LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE: return limits.maxPerStageDescriptorStorageBuffers; - case LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE: return limits.maxPerStageDescriptorStorageImages; - case LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE: return limits.maxPerStageDescriptorUniformBuffers; - case LIMIT_MAX_PUSH_CONSTANT_SIZE: return limits.maxPushConstantsSize; - case LIMIT_MAX_UNIFORM_BUFFER_SIZE: return limits.maxUniformBufferRange; - case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET: return limits.maxVertexInputAttributeOffset; - case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES: return limits.maxVertexInputAttributes; - case LIMIT_MAX_VERTEX_INPUT_BINDINGS: return limits.maxVertexInputBindings; - case LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE: return limits.maxVertexInputBindingStride; - case LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT: return limits.minUniformBufferOffsetAlignment; - case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X: return limits.maxComputeWorkGroupCount[0]; - case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y: return limits.maxComputeWorkGroupCount[1]; - case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z: return limits.maxComputeWorkGroupCount[2]; - case LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS: return limits.maxComputeWorkGroupInvocations; - case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X: return limits.maxComputeWorkGroupSize[0]; - case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y: return limits.maxComputeWorkGroupSize[1]; - case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: return limits.maxComputeWorkGroupSize[2]; - - default: ERR_FAIL_V(0); + case LIMIT_MAX_BOUND_UNIFORM_SETS: + return limits.maxBoundDescriptorSets; + case LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS: + return limits.maxColorAttachments; + case LIMIT_MAX_TEXTURES_PER_UNIFORM_SET: + return limits.maxDescriptorSetSampledImages; + case LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET: + return limits.maxDescriptorSetSamplers; + case LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET: + return limits.maxDescriptorSetStorageBuffers; + case LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET: + return limits.maxDescriptorSetStorageImages; + case LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET: + return limits.maxDescriptorSetUniformBuffers; + case LIMIT_MAX_DRAW_INDEXED_INDEX: + return limits.maxDrawIndexedIndexValue; + case LIMIT_MAX_FRAMEBUFFER_HEIGHT: + return limits.maxFramebufferHeight; + case LIMIT_MAX_FRAMEBUFFER_WIDTH: + return limits.maxFramebufferWidth; + case LIMIT_MAX_TEXTURE_ARRAY_LAYERS: + return limits.maxImageArrayLayers; + case LIMIT_MAX_TEXTURE_SIZE_1D: + return limits.maxImageDimension1D; + case LIMIT_MAX_TEXTURE_SIZE_2D: + return limits.maxImageDimension2D; + case LIMIT_MAX_TEXTURE_SIZE_3D: + return limits.maxImageDimension3D; + case LIMIT_MAX_TEXTURE_SIZE_CUBE: + return limits.maxImageDimensionCube; + case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE: + return limits.maxPerStageDescriptorSampledImages; + case LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE: + return limits.maxPerStageDescriptorSamplers; + case LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE: + return limits.maxPerStageDescriptorStorageBuffers; + case LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE: + return limits.maxPerStageDescriptorStorageImages; + case LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE: + return limits.maxPerStageDescriptorUniformBuffers; + case LIMIT_MAX_PUSH_CONSTANT_SIZE: + return limits.maxPushConstantsSize; + case LIMIT_MAX_UNIFORM_BUFFER_SIZE: + return limits.maxUniformBufferRange; + case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET: + return limits.maxVertexInputAttributeOffset; + case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES: + return limits.maxVertexInputAttributes; + case LIMIT_MAX_VERTEX_INPUT_BINDINGS: + return limits.maxVertexInputBindings; + case LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE: + return limits.maxVertexInputBindingStride; + case LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT: + return limits.minUniformBufferOffsetAlignment; + case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X: + return limits.maxComputeWorkGroupCount[0]; + case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y: + return limits.maxComputeWorkGroupCount[1]; + case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z: + return limits.maxComputeWorkGroupCount[2]; + case LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS: + return limits.maxComputeWorkGroupInvocations; + case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X: + return limits.maxComputeWorkGroupSize[0]; + case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y: + return limits.maxComputeWorkGroupSize[1]; + case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: + return limits.maxComputeWorkGroupSize[2]; + + default: + ERR_FAIL_V(0); } return 0; } void RenderingDeviceVulkan::finalize() { - //free all resources _flush(false); @@ -7290,7 +7626,6 @@ void RenderingDeviceVulkan::finalize() { WARN_PRINT(itos(owned.size()) + " RIDs of type 'Texture' were leaked."); //free shared first for (List<RID>::Element *E = owned.front(); E;) { - List<RID>::Element *N = E->next(); if (texture_is_shared(E->get())) { free(E->get()); @@ -7354,7 +7689,6 @@ RenderingDevice *RenderingDeviceVulkan::create_local_device() { } RenderingDeviceVulkan::RenderingDeviceVulkan() { - screen_prepared = false; } RenderingDeviceVulkan::~RenderingDeviceVulkan() { |