diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/register_driver_types.cpp | 3 | ||||
-rw-r--r-- | drivers/unix/dir_access_unix.cpp | 26 | ||||
-rw-r--r-- | drivers/unix/file_access_unix.cpp | 2 | ||||
-rw-r--r-- | drivers/unix/net_socket_posix.cpp | 6 | ||||
-rw-r--r-- | drivers/unix/os_unix.cpp | 6 | ||||
-rw-r--r-- | drivers/unix/os_unix.h | 1 | ||||
-rw-r--r-- | drivers/vulkan/SCsub | 143 | ||||
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.cpp | 952 | ||||
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.h | 35 | ||||
-rw-r--r-- | drivers/vulkan/vulkan_context.cpp | 167 | ||||
-rw-r--r-- | drivers/vulkan/vulkan_context.h | 19 | ||||
-rw-r--r-- | drivers/wasapi/audio_driver_wasapi.cpp | 146 | ||||
-rw-r--r-- | drivers/wasapi/audio_driver_wasapi.h | 3 | ||||
-rw-r--r-- | drivers/windows/dir_access_windows.cpp | 10 | ||||
-rw-r--r-- | drivers/windows/file_access_windows.cpp | 24 |
15 files changed, 1166 insertions, 377 deletions
diff --git a/drivers/register_driver_types.cpp b/drivers/register_driver_types.cpp index 83702ea2cc..4a163b7c10 100644 --- a/drivers/register_driver_types.cpp +++ b/drivers/register_driver_types.cpp @@ -30,6 +30,7 @@ #include "register_driver_types.h" +#include "core/extension/native_extension_manager.h" #include "drivers/png/image_loader_png.h" #include "drivers/png/resource_saver_png.h" @@ -54,7 +55,9 @@ void unregister_core_driver_types() { } void register_driver_types() { + NativeExtensionManager::get_singleton()->initialize_extensions(NativeExtension::INITIALIZATION_LEVEL_DRIVER); } void unregister_driver_types() { + NativeExtensionManager::get_singleton()->deinitialize_extensions(NativeExtension::INITIALIZATION_LEVEL_DRIVER); } diff --git a/drivers/unix/dir_access_unix.cpp b/drivers/unix/dir_access_unix.cpp index a2c9bae852..1754b47c85 100644 --- a/drivers/unix/dir_access_unix.cpp +++ b/drivers/unix/dir_access_unix.cpp @@ -71,7 +71,7 @@ Error DirAccessUnix::list_dir_begin() { bool DirAccessUnix::file_exists(String p_file) { GLOBAL_LOCK_FUNCTION - if (p_file.is_rel_path()) { + if (p_file.is_relative_path()) { p_file = current_dir.plus_file(p_file); } @@ -90,7 +90,7 @@ bool DirAccessUnix::file_exists(String p_file) { bool DirAccessUnix::dir_exists(String p_dir) { GLOBAL_LOCK_FUNCTION - if (p_dir.is_rel_path()) { + if (p_dir.is_relative_path()) { p_dir = get_current_dir().plus_file(p_dir); } @@ -105,7 +105,7 @@ bool DirAccessUnix::dir_exists(String p_dir) { bool DirAccessUnix::is_readable(String p_dir) { GLOBAL_LOCK_FUNCTION - if (p_dir.is_rel_path()) { + if (p_dir.is_relative_path()) { p_dir = get_current_dir().plus_file(p_dir); } @@ -116,7 +116,7 @@ bool DirAccessUnix::is_readable(String p_dir) { bool DirAccessUnix::is_writable(String p_dir) { GLOBAL_LOCK_FUNCTION - if (p_dir.is_rel_path()) { + if (p_dir.is_relative_path()) { p_dir = get_current_dir().plus_file(p_dir); } @@ -125,7 +125,7 @@ bool DirAccessUnix::is_writable(String p_dir) { } uint64_t DirAccessUnix::get_modified_time(String p_file) { - if (p_file.is_rel_path()) { + if (p_file.is_relative_path()) { p_file = current_dir.plus_file(p_file); } @@ -293,7 +293,7 @@ bool DirAccessUnix::drives_are_shortcuts() { Error DirAccessUnix::make_dir(String p_dir) { GLOBAL_LOCK_FUNCTION - if (p_dir.is_rel_path()) { + if (p_dir.is_relative_path()) { p_dir = get_current_dir().plus_file(p_dir); } @@ -328,7 +328,7 @@ Error DirAccessUnix::change_dir(String p_dir) { // try_dir is the directory we are trying to change into String try_dir = ""; - if (p_dir.is_rel_path()) { + if (p_dir.is_relative_path()) { String next_dir = current_dir.plus_file(p_dir); next_dir = next_dir.simplify_path(); try_dir = next_dir; @@ -372,13 +372,13 @@ String DirAccessUnix::get_current_dir(bool p_include_drive) { } Error DirAccessUnix::rename(String p_path, String p_new_path) { - if (p_path.is_rel_path()) { + if (p_path.is_relative_path()) { p_path = get_current_dir().plus_file(p_path); } p_path = fix_path(p_path); - if (p_new_path.is_rel_path()) { + if (p_new_path.is_relative_path()) { p_new_path = get_current_dir().plus_file(p_new_path); } @@ -388,7 +388,7 @@ Error DirAccessUnix::rename(String p_path, String p_new_path) { } Error DirAccessUnix::remove(String p_path) { - if (p_path.is_rel_path()) { + if (p_path.is_relative_path()) { p_path = get_current_dir().plus_file(p_path); } @@ -407,7 +407,7 @@ Error DirAccessUnix::remove(String p_path) { } bool DirAccessUnix::is_link(String p_file) { - if (p_file.is_rel_path()) { + if (p_file.is_relative_path()) { p_file = get_current_dir().plus_file(p_file); } @@ -422,7 +422,7 @@ bool DirAccessUnix::is_link(String p_file) { } String DirAccessUnix::read_link(String p_file) { - if (p_file.is_rel_path()) { + if (p_file.is_relative_path()) { p_file = get_current_dir().plus_file(p_file); } @@ -439,7 +439,7 @@ String DirAccessUnix::read_link(String p_file) { } Error DirAccessUnix::create_link(String p_source, String p_target) { - if (p_target.is_rel_path()) { + if (p_target.is_relative_path()) { p_target = get_current_dir().plus_file(p_target); } diff --git a/drivers/unix/file_access_unix.cpp b/drivers/unix/file_access_unix.cpp index 6ea55219bb..f07c654bd6 100644 --- a/drivers/unix/file_access_unix.cpp +++ b/drivers/unix/file_access_unix.cpp @@ -111,7 +111,7 @@ Error FileAccessUnix::_open(const String &p_path, int p_mode_flags) { } } - if (is_backup_save_enabled() && (p_mode_flags & WRITE) && !(p_mode_flags & READ)) { + if (is_backup_save_enabled() && (p_mode_flags == WRITE)) { save_path = path; path = path + ".tmp"; } diff --git a/drivers/unix/net_socket_posix.cpp b/drivers/unix/net_socket_posix.cpp index efeaf32ff7..e01c6a0e0f 100644 --- a/drivers/unix/net_socket_posix.cpp +++ b/drivers/unix/net_socket_posix.cpp @@ -269,11 +269,11 @@ _FORCE_INLINE_ Error NetSocketPosix::_change_multicast_group(IPAddress p_ip, Str break; // IPv6 uses index. } - for (List<IPAddress>::Element *F = c.ip_addresses.front(); F; F = F->next()) { - if (!F->get().is_ipv4()) { + for (const IPAddress &F : c.ip_addresses) { + if (!F.is_ipv4()) { continue; // Wrong IP type } - if_ip = F->get(); + if_ip = F; break; } break; diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp index a5c61bbea5..3032c31629 100644 --- a/drivers/unix/os_unix.cpp +++ b/drivers/unix/os_unix.cpp @@ -139,10 +139,6 @@ void OS_Unix::finalize_core() { NetSocketPosix::cleanup(); } -void OS_Unix::alert(const String &p_alert, const String &p_title) { - fprintf(stderr, "ERROR: %s\n", p_alert.utf8().get_data()); -} - String OS_Unix::get_stdin_string(bool p_block) { if (p_block) { char buff[1024]; @@ -396,7 +392,7 @@ String OS_Unix::get_locale() const { Error OS_Unix::open_dynamic_library(const String p_path, void *&p_library_handle, bool p_also_set_library_path) { String path = p_path; - if (FileAccess::exists(path) && path.is_rel_path()) { + if (FileAccess::exists(path) && path.is_relative_path()) { // dlopen expects a slash, in this case a leading ./ for it to be interpreted as a relative path, // otherwise it will end up searching various system directories for the lib instead and finally failing. path = "./" + path; diff --git a/drivers/unix/os_unix.h b/drivers/unix/os_unix.h index 6c79d984e9..bf82019d38 100644 --- a/drivers/unix/os_unix.h +++ b/drivers/unix/os_unix.h @@ -52,7 +52,6 @@ protected: public: OS_Unix(); - virtual void alert(const String &p_alert, const String &p_title = "ALERT!"); virtual String get_stdin_string(bool p_block) override; //virtual void set_mouse_show(bool p_show); diff --git a/drivers/vulkan/SCsub b/drivers/vulkan/SCsub index 14b9d63204..8fe75367a8 100644 --- a/drivers/vulkan/SCsub +++ b/drivers/vulkan/SCsub @@ -3,116 +3,45 @@ Import("env") thirdparty_obj = [] +thirdparty_dir = "#thirdparty/vulkan" +thirdparty_volk_dir = "#thirdparty/volk" -# FIXME: Refactor all this to reduce code duplication. -if env["platform"] == "android": - # Use NDK Vulkan headers - thirdparty_dir = env["ANDROID_NDK_ROOT"] + "/sources/third_party/vulkan/src" - thirdparty_includes = [ - thirdparty_dir, - thirdparty_dir + "/include", - thirdparty_dir + "/layers", - thirdparty_dir + "/layers/generated", - ] - env.Prepend(CPPPATH=thirdparty_includes) - - # Build Vulkan memory allocator - env_thirdparty = env.Clone() - env_thirdparty.disable_warnings() - - thirdparty_dir = "#thirdparty/vulkan" - vma_sources = [thirdparty_dir + "/android/vk_mem_alloc.cpp"] - env_thirdparty.add_source_files(thirdparty_obj, vma_sources) +# Use bundled Vulkan headers +env.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "/include"]) + +if env["use_volk"]: + env.AppendUnique(CPPDEFINES=["USE_VOLK"]) + env.Prepend(CPPPATH=[thirdparty_volk_dir]) +if env["platform"] == "android": + env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_ANDROID_KHR"]) elif env["platform"] == "iphone": - # Use bundled Vulkan headers - thirdparty_dir = "#thirdparty/vulkan" - env.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "/include", thirdparty_dir + "/loader"]) - - # Build Vulkan memory allocator - env_thirdparty = env.Clone() - env_thirdparty.disable_warnings() - - vma_sources = [thirdparty_dir + "/vk_mem_alloc.cpp"] - env_thirdparty.add_source_files(thirdparty_obj, vma_sources) - -elif env["builtin_vulkan"]: - # Use bundled Vulkan headers - thirdparty_dir = "#thirdparty/vulkan" - env.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "/include", thirdparty_dir + "/loader"]) - - # Build Vulkan loader library - env_thirdparty = env.Clone() - env_thirdparty.disable_warnings() - - loader_sources = [ - "cJSON.c", - "debug_utils.c", - "dev_ext_trampoline.c", - "loader.c", - "murmurhash.c", - "phys_dev_ext.c", - "trampoline.c", - "unknown_ext_chain.c", - "wsi.c", - "extension_manual.c", - ] - vma_sources = [thirdparty_dir + "/vk_mem_alloc.cpp"] - - if env["platform"] == "windows": - loader_sources.append("dirent_on_windows.c") - env_thirdparty.AppendUnique( - CPPDEFINES=[ - "VK_USE_PLATFORM_WIN32_KHR", - "VULKAN_NON_CMAKE_BUILD", - "WIN32_LEAN_AND_MEAN", - 'API_NAME=\\"%s\\"' % "Vulkan", - ] - ) - if not env.msvc: # Windows 7+, missing in mingw headers - env_thirdparty.AppendUnique( - CPPDEFINES=["CM_GETIDLIST_FILTER_CLASS=0x00000200", "CM_GETIDLIST_FILTER_PRESENT=0x00000100"] - ) - elif env["platform"] == "osx": - env_thirdparty.AppendUnique( - CPPDEFINES=[ - "VK_USE_PLATFORM_MACOS_MVK", - "VULKAN_NON_CMAKE_BUILD", - 'SYSCONFDIR=\\"%s\\"' % "/etc", - 'FALLBACK_DATA_DIRS=\\"%s\\"' % "/usr/local/share:/usr/share", - 'FALLBACK_CONFIG_DIRS=\\"%s\\"' % "/etc/xdg", - ] - ) - elif env["platform"] == "linuxbsd": - env_thirdparty.AppendUnique( - CPPDEFINES=[ - "VK_USE_PLATFORM_XLIB_KHR", - "VULKAN_NON_CMAKE_BUILD", - 'SYSCONFDIR=\\"%s\\"' % "/etc", - 'FALLBACK_DATA_DIRS=\\"%s\\"' % "/usr/local/share:/usr/share", - 'FALLBACK_CONFIG_DIRS=\\"%s\\"' % "/etc/xdg", - ] - ) - import platform - - if platform.system() == "Linux": - # In glibc since 2.17 and musl libc since 1.1.24. Used by loader.c. - env_thirdparty.AppendUnique(CPPDEFINES=["HAVE_SECURE_GETENV"]) - - loader_sources = [thirdparty_dir + "/loader/" + file for file in loader_sources] - env_thirdparty.add_source_files(thirdparty_obj, loader_sources) - env_thirdparty.add_source_files(thirdparty_obj, vma_sources) - -else: # Always build VMA. - thirdparty_dir = "#thirdparty/vulkan" - env.Prepend(CPPPATH=[thirdparty_dir]) - - # Build Vulkan loader library - env_thirdparty = env.Clone() - env_thirdparty.disable_warnings() - vma_sources = [thirdparty_dir + "/vk_mem_alloc.cpp"] - - env_thirdparty.add_source_files(thirdparty_obj, vma_sources) + env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_IOS_MVK"]) +elif env["platform"] == "linuxbsd": + env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_XLIB_KHR"]) +elif env["platform"] == "osx": + env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_MACOS_MVK"]) +elif env["platform"] == "windows": + env.AppendUnique(CPPDEFINES=["VK_USE_PLATFORM_WIN32_KHR"]) + +# Build Vulkan memory allocator and volk +env_thirdparty_vma = env.Clone() +env_thirdparty_vma.disable_warnings() +thirdparty_sources_vma = [thirdparty_dir + "/vk_mem_alloc.cpp"] + +if env["use_volk"]: + env_thirdparty_vma.AppendUnique(CPPDEFINES=["VMA_STATIC_VULKAN_FUNCTIONS=1"]) + env_thirdparty_volk = env.Clone() + env_thirdparty_volk.disable_warnings() + + thirdparty_sources_volk = [thirdparty_volk_dir + "/volk.c"] + env_thirdparty_volk.add_source_files(thirdparty_obj, thirdparty_sources_volk) +elif env["platform"] == "android": + # Our current NDK version only provides old Vulkan headers, + # so we have to limit VMA. + env_thirdparty_vma.AppendUnique(CPPDEFINES=["VMA_VULKAN_VERSION=1000000"]) + +env_thirdparty_vma.add_source_files(thirdparty_obj, thirdparty_sources_vma) env.drivers_sources += thirdparty_obj diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 2a8d4fcded..a4324f0a2c 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -31,11 +31,14 @@ #include "rendering_device_vulkan.h" #include "core/config/project_settings.h" +#include "core/io/compression.h" #include "core/io/file_access.h" +#include "core/io/marshalls.h" #include "core/os/os.h" #include "core/templates/hashfuncs.h" #include "drivers/vulkan/vulkan_context.h" +#include "thirdparty/misc/smolv.h" #include "thirdparty/spirv-reflect/spirv_reflect.h" //#define FORCE_FULL_BARRIER @@ -1801,6 +1804,10 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T image_create_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; } + if (p_format.usage_bits & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) { + image_create_info.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + } + if (p_format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT) { image_create_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; } @@ -2032,7 +2039,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T if (p_data.size()) { for (uint32_t i = 0; i < image_create_info.arrayLayers; i++) { - texture_update(id, i, p_data[i]); + _texture_update(id, i, p_data[i], RD::BARRIER_MASK_ALL, true); } } return id; @@ -2131,6 +2138,10 @@ RID RenderingDeviceVulkan::texture_create_shared(const TextureView &p_view, RID } } + if (texture.usage_flags & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) { + usage_info.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + } + if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { usage_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; } @@ -2279,10 +2290,14 @@ RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p } Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier) { + return _texture_update(p_texture, p_layer, p_data, p_post_barrier, false); +} + +Error RenderingDeviceVulkan::_texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier, bool p_use_setup_queue) { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(draw_list || compute_list, ERR_INVALID_PARAMETER, - "Updating textures in is forbidden during creation of a draw or compute list"); + ERR_FAIL_COND_V_MSG((draw_list || compute_list) && !p_use_setup_queue, ERR_INVALID_PARAMETER, + "Updating textures is forbidden during creation of a draw or compute list"); Texture *texture = texture_owner.getornull(p_texture); ERR_FAIL_COND_V(!texture, ERR_INVALID_PARAMETER); @@ -2323,7 +2338,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con const uint8_t *r = p_data.ptr(); - VkCommandBuffer command_buffer = p_post_barrier ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; + VkCommandBuffer command_buffer = p_use_setup_queue ? frames[frame].setup_command_buffer : frames[frame].draw_command_buffer; //barrier to transfer { @@ -2376,7 +2391,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con to_allocate >>= get_compressed_image_format_pixel_rshift(texture->format); uint32_t alloc_offset, alloc_size; - Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false, p_post_barrier); + Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false, !p_use_setup_queue); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); uint8_t *write_ptr; @@ -2725,6 +2740,14 @@ bool RenderingDeviceVulkan::texture_is_valid(RID p_texture) { return texture_owner.owns(p_texture); } +Size2i RenderingDeviceVulkan::texture_size(RID p_texture) { + _THREAD_SAFE_METHOD_ + + Texture *tex = texture_owner.getornull(p_texture); + ERR_FAIL_COND_V(!tex, Size2i()); + return Size2i(tex->width, tex->height); +} + Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ @@ -3268,8 +3291,8 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF for (int i = 0; i < p_attachments.size(); i++) { ERR_FAIL_INDEX_V(p_attachments[i].format, DATA_FORMAT_MAX, VK_NULL_HANDLE); ERR_FAIL_INDEX_V(p_attachments[i].samples, TEXTURE_SAMPLES_MAX, VK_NULL_HANDLE); - ERR_FAIL_COND_V_MSG(!(p_attachments[i].usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)), - VK_NULL_HANDLE, "Texture format for index (" + itos(i) + ") requires an attachment (depth, stencil or resolve) bit set."); + ERR_FAIL_COND_V_MSG(!(p_attachments[i].usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT)), + VK_NULL_HANDLE, "Texture format for index (" + itos(i) + ") requires an attachment (color, depth, input or stencil) bit set."); VkAttachmentDescription description = {}; description.flags = 0; @@ -3284,13 +3307,25 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF // Also, each UNDEFINED will do an immediate layout transition (write), s.t. we must ensure execution synchronization vs. // the read. If this is a performance issue, one could track the actual last accessor of each resource, adding only that // stage + switch (is_depth ? p_initial_depth_action : p_initial_action) { case INITIAL_ACTION_CLEAR_REGION: case INITIAL_ACTION_CLEAR: { - description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there - dependency_from_external.srcStageMask |= reading_stages; + if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + dependency_from_external.srcStageMask |= reading_stages; + } else { + description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there + dependency_from_external.srcStageMask |= reading_stages; + } } break; case INITIAL_ACTION_KEEP: { if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { @@ -3348,7 +3383,58 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF } } - switch (is_depth ? p_final_depth_action : p_final_action) { + bool used_last = false; + + { + int last_pass = p_passes.size() - 1; + + if (is_depth) { + //likely missing depth resolve? + if (p_passes[last_pass].depth_attachment == i) { + used_last = true; + } + } else { + if (p_passes[last_pass].resolve_attachments.size()) { + //if using resolve attachments, check resolve attachments + for (int j = 0; j < p_passes[last_pass].resolve_attachments.size(); j++) { + if (p_passes[last_pass].resolve_attachments[j] == i) { + used_last = true; + break; + } + } + } else { + for (int j = 0; j < p_passes[last_pass].color_attachments.size(); j++) { + if (p_passes[last_pass].color_attachments[j] == i) { + used_last = true; + break; + } + } + } + } + + if (!used_last) { + for (int j = 0; j < p_passes[last_pass].preserve_attachments.size(); j++) { + if (p_passes[last_pass].preserve_attachments[j] == i) { + used_last = true; + break; + } + } + } + } + + FinalAction final_action = p_final_action; + FinalAction final_depth_action = p_final_depth_action; + + if (!used_last) { + if (is_depth) { + final_depth_action = FINAL_ACTION_DISCARD; + + } else { + final_action = FINAL_ACTION_DISCARD; + } + } + + switch (is_depth ? final_depth_action : final_action) { case FINAL_ACTION_READ: { if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; @@ -3466,7 +3552,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF reference.layout = VK_IMAGE_LAYOUT_UNDEFINED; } else { ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), input attachment (" + itos(j) + ")."); - ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it's marked as depth, but it's not usable as input attachment."); + ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it isn't marked as an input texture."); ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass."); reference.attachment = attachment; reference.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; @@ -3490,32 +3576,17 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF } else { ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachment (" + itos(j) + ")."); ERR_FAIL_COND_V_MSG(pass->color_attachments[j] == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachment (" + itos(j) + "), the respective color attachment is marked as unused."); - ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it's marked as depth, but it's not usable as resolve attachment."); + ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachment, it isn't marked as a color texture."); ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass."); bool multisample = p_attachments[attachment].samples > TEXTURE_SAMPLES_1; ERR_FAIL_COND_V_MSG(multisample, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachments can't be multisample."); reference.attachment = attachment; - reference.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; // VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; attachment_last_pass[attachment] = i; } resolve_references.push_back(reference); } - LocalVector<uint32_t> &preserve_references = preserve_reference_array[i]; - - for (int j = 0; j < pass->preserve_attachments.size(); j++) { - int32_t attachment = pass->preserve_attachments[j]; - - ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused."); - - ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ")."); - ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass."); - - attachment_last_pass[attachment] = i; - - preserve_references.push_back(attachment); - } - VkAttachmentReference &depth_stencil_reference = depth_reference_array[i]; if (pass->depth_attachment != FramebufferPass::ATTACHMENT_UNUSED) { @@ -3539,6 +3610,22 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF depth_stencil_reference.layout = VK_IMAGE_LAYOUT_UNDEFINED; } + LocalVector<uint32_t> &preserve_references = preserve_reference_array[i]; + + for (int j = 0; j < pass->preserve_attachments.size(); j++) { + int32_t attachment = pass->preserve_attachments[j]; + + ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused."); + + ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ")."); + + if (attachment_last_pass[attachment] != i) { + //preserve can still be used to keep depth or color from being discarded after use + attachment_last_pass[attachment] = i; + preserve_references.push_back(attachment); + } + } + VkSubpassDescription &subpass = subpasses[i]; subpass.flags = 0; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; @@ -3625,8 +3712,10 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF render_pass_create_info.pDependencies = nullptr; } + // These are only used if we use multiview but we need to define them in scope. const uint32_t view_mask = (1 << p_view_count) - 1; const uint32_t correlation_mask = (1 << p_view_count) - 1; + Vector<uint32_t> view_masks; VkRenderPassMultiviewCreateInfo render_pass_multiview_create_info; if (p_view_count > 1) { @@ -3638,10 +3727,15 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF // Make sure we limit this to the number of views we support. ERR_FAIL_COND_V_MSG(p_view_count > capabilities.max_view_count, VK_NULL_HANDLE, "Hardware does not support requested number of views for Multiview render pass"); + // Set view masks for each subpass + for (uint32_t i = 0; i < subpasses.size(); i++) { + view_masks.push_back(view_mask); + }; + render_pass_multiview_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO; render_pass_multiview_create_info.pNext = nullptr; - render_pass_multiview_create_info.subpassCount = 1; - render_pass_multiview_create_info.pViewMasks = &view_mask; + render_pass_multiview_create_info.subpassCount = subpasses.size(); + render_pass_multiview_create_info.pViewMasks = view_masks.ptr(); render_pass_multiview_create_info.dependencyCount = 0; render_pass_multiview_create_info.pViewOffsets = nullptr; render_pass_multiview_create_info.correlationMaskCount = 1; @@ -3739,7 +3833,7 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_c VkRenderPass render_pass; VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass); - ERR_FAIL_COND_V_MSG(res, VK_NULL_HANDLE, "vkCreateRenderPass for empty fb failed with error " + itos(res) + "."); + ERR_FAIL_COND_V_MSG(res, 0, "vkCreateRenderPass for empty fb failed with error " + itos(res) + "."); if (render_pass == VK_NULL_HANDLE) { //was likely invalid return INVALID_ID; @@ -4356,51 +4450,91 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa } #endif -RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages) { - //descriptor layouts - Vector<Vector<VkDescriptorSetLayoutBinding>> set_bindings; - Vector<Vector<UniformInfo>> uniform_info; - Shader::PushConstant push_constant; - push_constant.push_constant_size = 0; - push_constant.push_constants_vk_stage = 0; +//version 1: initial +//version 2: Added shader name - uint32_t vertex_input_mask = 0; +#define SHADER_BINARY_VERSION 2 - uint32_t fragment_outputs = 0; +String RenderingDeviceVulkan::shader_get_binary_cache_key() const { + return "Vulkan-SV" + itos(SHADER_BINARY_VERSION); +} - uint32_t stages_processed = 0; +struct RenderingDeviceVulkanShaderBinaryDataBinding { + uint32_t type; + uint32_t binding; + uint32_t stages; + uint32_t length; //size of arrays (in total elements), or ubos (in bytes * total elements) +}; - bool is_compute = false; +struct RenderingDeviceVulkanShaderBinarySpecializationConstant { + uint32_t type; + uint32_t constant_id; + union { + uint32_t int_value; + float float_value; + bool bool_value; + }; + uint32_t stage_flags; +}; + +struct RenderingDeviceVulkanShaderBinaryData { + uint32_t vertex_input_mask; + uint32_t fragment_outputs; + uint32_t specialization_constant_count; + uint32_t is_compute; + uint32_t compute_local_size[3]; + uint32_t set_count; + uint32_t push_constant_size; + uint32_t push_constants_vk_stage; + uint32_t stage_count; + uint32_t shader_name_len; +}; - uint32_t compute_local_size[3] = { 0, 0, 0 }; +Vector<uint8_t> RenderingDeviceVulkan::shader_compile_binary_from_spirv(const Vector<ShaderStageSPIRVData> &p_spirv, const String &p_shader_name) { + RenderingDeviceVulkanShaderBinaryData binary_data; + binary_data.vertex_input_mask = 0; + binary_data.fragment_outputs = 0; + binary_data.specialization_constant_count = 0; + binary_data.is_compute = 0; + binary_data.compute_local_size[0] = 0; + binary_data.compute_local_size[1] = 0; + binary_data.compute_local_size[2] = 0; + binary_data.set_count = 0; + binary_data.push_constant_size = 0; + binary_data.push_constants_vk_stage = 0; + + Vector<Vector<RenderingDeviceVulkanShaderBinaryDataBinding>> uniform_info; //set bindings + Vector<RenderingDeviceVulkanShaderBinarySpecializationConstant> specialization_constants; - for (int i = 0; i < p_stages.size(); i++) { - if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) { - is_compute = true; - ERR_FAIL_COND_V_MSG(p_stages.size() != 1, RID(), + uint32_t stages_processed = 0; + + for (int i = 0; i < p_spirv.size(); i++) { + if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) { + binary_data.is_compute = true; + ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, Vector<uint8_t>(), "Compute shaders can only receive one stage, dedicated to compute."); } - ERR_FAIL_COND_V_MSG(stages_processed & (1 << p_stages[i].shader_stage), RID(), - "Stage " + String(shader_stage_names[p_stages[i].shader_stage]) + " submitted more than once."); + ERR_FAIL_COND_V_MSG(stages_processed & (1 << p_spirv[i].shader_stage), Vector<uint8_t>(), + "Stage " + String(shader_stage_names[p_spirv[i].shader_stage]) + " submitted more than once."); { SpvReflectShaderModule module; - const uint8_t *spirv = p_stages[i].spir_v.ptr(); - SpvReflectResult result = spvReflectCreateShaderModule(p_stages[i].spir_v.size(), spirv, &module); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed parsing shader."); - - if (is_compute) { - compute_local_size[0] = module.entry_points->local_size.x; - compute_local_size[1] = module.entry_points->local_size.y; - compute_local_size[2] = module.entry_points->local_size.z; + const uint8_t *spirv = p_spirv[i].spir_v.ptr(); + SpvReflectResult result = spvReflectCreateShaderModule(p_spirv[i].spir_v.size(), spirv, &module); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed parsing shader."); + + if (binary_data.is_compute) { + binary_data.compute_local_size[0] = module.entry_points->local_size.x; + binary_data.compute_local_size[1] = module.entry_points->local_size.y; + binary_data.compute_local_size[2] = module.entry_points->local_size.z; } uint32_t binding_count = 0; result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed enumerating descriptor bindings."); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating descriptor bindings."); - uint32_t stage = p_stages[i].shader_stage; + uint32_t stage = p_spirv[i].shader_stage; if (binding_count > 0) { //Parse bindings @@ -4409,56 +4543,47 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages bindings.resize(binding_count); result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, bindings.ptrw()); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed getting descriptor bindings."); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed getting descriptor bindings."); for (uint32_t j = 0; j < binding_count; j++) { const SpvReflectDescriptorBinding &binding = *bindings[j]; - VkDescriptorSetLayoutBinding layout_binding; - UniformInfo info; + RenderingDeviceVulkanShaderBinaryDataBinding info; bool need_array_dimensions = false; bool need_block_size = false; switch (binding.descriptor_type) { case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLER: { - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; info.type = UNIFORM_TYPE_SAMPLER; need_array_dimensions = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; info.type = UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; need_array_dimensions = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLED_IMAGE: { - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; info.type = UNIFORM_TYPE_TEXTURE; need_array_dimensions = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: { - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; info.type = UNIFORM_TYPE_IMAGE; need_array_dimensions = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; info.type = UNIFORM_TYPE_TEXTURE_BUFFER; need_array_dimensions = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; info.type = UNIFORM_TYPE_IMAGE_BUFFER; need_array_dimensions = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; info.type = UNIFORM_TYPE_UNIFORM_BUFFER; need_block_size = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER: { - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; info.type = UNIFORM_TYPE_STORAGE_BUFFER; need_block_size = true; } break; @@ -4471,8 +4596,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages continue; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: { - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; info.type = UNIFORM_TYPE_INPUT_ATTACHMENT; + need_array_dimensions = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { ERR_PRINT("Acceleration structure not supported."); @@ -4493,42 +4618,35 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } } - layout_binding.descriptorCount = info.length; - } else if (need_block_size) { info.length = binding.block.size; - layout_binding.descriptorCount = 1; } else { info.length = 0; - layout_binding.descriptorCount = 1; } info.binding = binding.binding; uint32_t set = binding.set; - //print_line("Stage: " + String(shader_stage_names[stage]) + " set=" + itos(set) + " binding=" + itos(info.binding) + " type=" + shader_uniform_names[info.type] + " length=" + itos(info.length)); - - ERR_FAIL_COND_V_MSG(set >= MAX_UNIFORM_SETS, RID(), + ERR_FAIL_COND_V_MSG(set >= MAX_UNIFORM_SETS, Vector<uint8_t>(), "On shader stage '" + String(shader_stage_names[stage]) + "', uniform '" + binding.name + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ")."); - ERR_FAIL_COND_V_MSG(set >= limits.maxBoundDescriptorSets, RID(), + ERR_FAIL_COND_V_MSG(set >= limits.maxBoundDescriptorSets, Vector<uint8_t>(), "On shader stage '" + String(shader_stage_names[stage]) + "', uniform '" + binding.name + "' uses a set (" + itos(set) + ") index larger than what is supported by the hardware (" + itos(limits.maxBoundDescriptorSets) + ")."); - if (set < (uint32_t)set_bindings.size()) { + if (set < (uint32_t)uniform_info.size()) { //check if this already exists bool exists = false; - for (int k = 0; k < set_bindings[set].size(); k++) { - if (set_bindings[set][k].binding == (uint32_t)info.binding) { + for (int k = 0; k < uniform_info[set].size(); k++) { + if (uniform_info[set][k].binding == (uint32_t)info.binding) { //already exists, verify that it's the same type - ERR_FAIL_COND_V_MSG(set_bindings[set][k].descriptorType != layout_binding.descriptorType, RID(), + ERR_FAIL_COND_V_MSG(uniform_info[set][k].type != info.type, Vector<uint8_t>(), "On shader stage '" + String(shader_stage_names[stage]) + "', uniform '" + binding.name + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(info.binding) + " with different uniform type."); //also, verify that it's the same size - ERR_FAIL_COND_V_MSG(set_bindings[set][k].descriptorCount != layout_binding.descriptorCount || uniform_info[set][k].length != info.length, RID(), + ERR_FAIL_COND_V_MSG(uniform_info[set][k].length != info.length, Vector<uint8_t>(), "On shader stage '" + String(shader_stage_names[stage]) + "', uniform '" + binding.name + "' trying to re-use location for set=" + itos(set) + ", binding=" + itos(info.binding) + " with different uniform size."); //just append stage mask and return - set_bindings.write[set].write[k].stageFlags |= shader_stage_masks[stage]; uniform_info.write[set].write[k].stages |= 1 << stage; exists = true; } @@ -4539,40 +4657,87 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } } - layout_binding.binding = info.binding; - layout_binding.stageFlags = shader_stage_masks[stage]; - layout_binding.pImmutableSamplers = nullptr; //no support for this yet - info.stages = 1 << stage; - info.binding = info.binding; - if (set >= (uint32_t)set_bindings.size()) { - set_bindings.resize(set + 1); + if (set >= (uint32_t)uniform_info.size()) { uniform_info.resize(set + 1); } - set_bindings.write[set].push_back(layout_binding); uniform_info.write[set].push_back(info); } } + { + //specialization constants + + uint32_t sc_count = 0; + result = spvReflectEnumerateSpecializationConstants(&module, &sc_count, nullptr); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating specialization constants."); + + if (sc_count) { + Vector<SpvReflectSpecializationConstant *> spec_constants; + spec_constants.resize(sc_count); + + result = spvReflectEnumerateSpecializationConstants(&module, &sc_count, spec_constants.ptrw()); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed obtaining specialization constants."); + + for (uint32_t j = 0; j < sc_count; j++) { + int32_t existing = -1; + RenderingDeviceVulkanShaderBinarySpecializationConstant sconst; + sconst.constant_id = spec_constants[j]->constant_id; + switch (spec_constants[j]->constant_type) { + case SPV_REFLECT_SPECIALIZATION_CONSTANT_BOOL: { + sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; + sconst.bool_value = spec_constants[j]->default_value.int_bool_value != 0; + } break; + case SPV_REFLECT_SPECIALIZATION_CONSTANT_INT: { + sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; + sconst.int_value = spec_constants[j]->default_value.int_bool_value; + } break; + case SPV_REFLECT_SPECIALIZATION_CONSTANT_FLOAT: { + sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; + sconst.float_value = spec_constants[j]->default_value.float_value; + } break; + } + sconst.stage_flags = 1 << p_spirv[i].shader_stage; + + for (int k = 0; k < specialization_constants.size(); k++) { + if (specialization_constants[k].constant_id == sconst.constant_id) { + ERR_FAIL_COND_V_MSG(specialization_constants[k].type != sconst.type, Vector<uint8_t>(), "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ."); + ERR_FAIL_COND_V_MSG(specialization_constants[k].int_value != sconst.int_value, Vector<uint8_t>(), "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ."); + existing = k; + break; + } + } + + if (existing > 0) { + specialization_constants.write[existing].stage_flags |= sconst.stage_flags; + } else { + specialization_constants.push_back(sconst); + } + } + } + } + if (stage == SHADER_STAGE_VERTEX) { uint32_t iv_count = 0; result = spvReflectEnumerateInputVariables(&module, &iv_count, nullptr); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed enumerating input variables."); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating input variables."); if (iv_count) { Vector<SpvReflectInterfaceVariable *> input_vars; input_vars.resize(iv_count); result = spvReflectEnumerateInputVariables(&module, &iv_count, input_vars.ptrw()); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed obtaining input variables."); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed obtaining input variables."); for (uint32_t j = 0; j < iv_count; j++) { if (input_vars[j] && input_vars[j]->decoration_flags == 0) { //regular input - vertex_input_mask |= (1 << uint32_t(input_vars[j]->location)); + binary_data.vertex_input_mask |= (1 << uint32_t(input_vars[j]->location)); } } } @@ -4581,21 +4746,21 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages if (stage == SHADER_STAGE_FRAGMENT) { uint32_t ov_count = 0; result = spvReflectEnumerateOutputVariables(&module, &ov_count, nullptr); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed enumerating output variables."); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating output variables."); if (ov_count) { Vector<SpvReflectInterfaceVariable *> output_vars; output_vars.resize(ov_count); result = spvReflectEnumerateOutputVariables(&module, &ov_count, output_vars.ptrw()); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed obtaining output variables."); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed obtaining output variables."); for (uint32_t j = 0; j < ov_count; j++) { const SpvReflectInterfaceVariable *refvar = output_vars[j]; if (refvar != nullptr && refvar->built_in != SpvBuiltInFragDepth) { - fragment_outputs |= 1 << refvar->location; + binary_data.fragment_outputs |= 1 << refvar->location; } } } @@ -4603,18 +4768,18 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages uint32_t pc_count = 0; result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed enumerating push constants."); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed enumerating push constants."); if (pc_count) { - ERR_FAIL_COND_V_MSG(pc_count > 1, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "': Only one push constant is supported, which should be the same across shader stages."); + ERR_FAIL_COND_V_MSG(pc_count > 1, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "': Only one push constant is supported, which should be the same across shader stages."); Vector<SpvReflectBlockVariable *> pconstants; pconstants.resize(pc_count); result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, pconstants.ptrw()); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed obtaining push constants."); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "' failed obtaining push constants."); #if 0 if (pconstants[0] == nullptr) { FileAccess *f = FileAccess::open("res://popo.spv", FileAccess::WRITE); @@ -4623,11 +4788,11 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages } #endif - ERR_FAIL_COND_V_MSG(push_constant.push_constant_size && push_constant.push_constant_size != pconstants[0]->size, RID(), - "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "': Push constant block must be the same across shader stages."); + ERR_FAIL_COND_V_MSG(binary_data.push_constant_size && binary_data.push_constant_size != pconstants[0]->size, Vector<uint8_t>(), + "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_spirv[i].shader_stage]) + "': Push constant block must be the same across shader stages."); - push_constant.push_constant_size = pconstants[0]->size; - push_constant.push_constants_vk_stage |= shader_stage_masks[stage]; + binary_data.push_constant_size = pconstants[0]->size; + binary_data.push_constants_vk_stage |= shader_stage_masks[stage]; //print_line("Stage: " + String(shader_stage_names[stage]) + " push constant of size=" + itos(push_constant.push_constant_size)); } @@ -4636,9 +4801,317 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages spvReflectDestroyShaderModule(&module); } - stages_processed |= (1 << p_stages[i].shader_stage); + stages_processed |= (1 << p_spirv[i].shader_stage); + } + + Vector<Vector<uint8_t>> compressed_stages; + Vector<uint32_t> smolv_size; + Vector<uint32_t> zstd_size; //if 0, stdno t used + + uint32_t stages_binary_size = 0; + + bool strip_debug = false; + + for (int i = 0; i < p_spirv.size(); i++) { + smolv::ByteArray smolv; + if (!smolv::Encode(p_spirv[i].spir_v.ptr(), p_spirv[i].spir_v.size(), smolv, strip_debug ? smolv::kEncodeFlagStripDebugInfo : 0)) { + ERR_FAIL_V_MSG(Vector<uint8_t>(), "Error compressing shader stage :" + String(shader_stage_names[p_spirv[i].shader_stage])); + } else { + smolv_size.push_back(smolv.size()); + { //zstd + Vector<uint8_t> zstd; + zstd.resize(Compression::get_max_compressed_buffer_size(smolv.size(), Compression::MODE_ZSTD)); + int dst_size = Compression::compress(zstd.ptrw(), &smolv[0], smolv.size(), Compression::MODE_ZSTD); + + if (dst_size > 0 && (uint32_t)dst_size < smolv.size()) { + zstd_size.push_back(dst_size); + zstd.resize(dst_size); + compressed_stages.push_back(zstd); + } else { + Vector<uint8_t> smv; + smv.resize(smolv.size()); + memcpy(smv.ptrw(), &smolv[0], smolv.size()); + zstd_size.push_back(0); //not using zstd + compressed_stages.push_back(smv); + } + } + } + uint32_t s = compressed_stages[i].size(); + if (s % 4 != 0) { + s += 4 - (s % 4); + } + stages_binary_size += s; + } + + binary_data.specialization_constant_count = specialization_constants.size(); + binary_data.set_count = uniform_info.size(); + binary_data.stage_count = p_spirv.size(); + + CharString shader_name_utf = p_shader_name.utf8(); + + binary_data.shader_name_len = shader_name_utf.length(); + + uint32_t total_size = sizeof(uint32_t) * 3; //header + version + main datasize; + total_size += sizeof(RenderingDeviceVulkanShaderBinaryData); + + total_size += binary_data.shader_name_len; + + if ((binary_data.shader_name_len % 4) != 0) { //alignment rules are really strange + total_size += 4 - (binary_data.shader_name_len % 4); + } + + for (int i = 0; i < uniform_info.size(); i++) { + total_size += sizeof(uint32_t); + total_size += uniform_info[i].size() * sizeof(RenderingDeviceVulkanShaderBinaryDataBinding); + } + + total_size += sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) * specialization_constants.size(); + + total_size += compressed_stages.size() * sizeof(uint32_t) * 3; //sizes + total_size += stages_binary_size; + + Vector<uint8_t> ret; + ret.resize(total_size); + uint32_t offset = 0; + { + uint8_t *binptr = ret.ptrw(); + binptr[0] = 'G'; + binptr[1] = 'V'; + binptr[2] = 'B'; + binptr[3] = 'D'; //godot vulkan binary data + offset += 4; + encode_uint32(SHADER_BINARY_VERSION, binptr + offset); + offset += sizeof(uint32_t); + encode_uint32(sizeof(RenderingDeviceVulkanShaderBinaryData), binptr + offset); + offset += sizeof(uint32_t); + memcpy(binptr + offset, &binary_data, sizeof(RenderingDeviceVulkanShaderBinaryData)); + offset += sizeof(RenderingDeviceVulkanShaderBinaryData); + memcpy(binptr + offset, shader_name_utf.ptr(), binary_data.shader_name_len); + offset += binary_data.shader_name_len; + + if ((binary_data.shader_name_len % 4) != 0) { //alignment rules are really strange + offset += 4 - (binary_data.shader_name_len % 4); + } + + for (int i = 0; i < uniform_info.size(); i++) { + int count = uniform_info[i].size(); + encode_uint32(count, binptr + offset); + offset += sizeof(uint32_t); + if (count > 0) { + memcpy(binptr + offset, uniform_info[i].ptr(), sizeof(RenderingDeviceVulkanShaderBinaryDataBinding) * count); + offset += sizeof(RenderingDeviceVulkanShaderBinaryDataBinding) * count; + } + } + + if (specialization_constants.size()) { + memcpy(binptr + offset, specialization_constants.ptr(), sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) * specialization_constants.size()); + offset += sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) * specialization_constants.size(); + } + + for (int i = 0; i < compressed_stages.size(); i++) { + encode_uint32(p_spirv[i].shader_stage, binptr + offset); + offset += sizeof(uint32_t); + encode_uint32(smolv_size[i], binptr + offset); + offset += sizeof(uint32_t); + encode_uint32(zstd_size[i], binptr + offset); + offset += sizeof(uint32_t); + memcpy(binptr + offset, compressed_stages[i].ptr(), compressed_stages[i].size()); + + uint32_t s = compressed_stages[i].size(); + + if (s % 4 != 0) { + s += 4 - (s % 4); + } + + offset += s; + } + + ERR_FAIL_COND_V(offset != (uint32_t)ret.size(), Vector<uint8_t>()); + } + + return ret; +} + +RID RenderingDeviceVulkan::shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary) { + const uint8_t *binptr = p_shader_binary.ptr(); + uint32_t binsize = p_shader_binary.size(); + + uint32_t read_offset = 0; + //consistency check + ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 3 + sizeof(RenderingDeviceVulkanShaderBinaryData), RID()); + ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'V' || binptr[2] != 'B' || binptr[3] != 'D', RID()); + + uint32_t bin_version = decode_uint32(binptr + 4); + ERR_FAIL_COND_V(bin_version > SHADER_BINARY_VERSION, RID()); + + uint32_t bin_data_size = decode_uint32(binptr + 8); + + const RenderingDeviceVulkanShaderBinaryData &binary_data = *(const RenderingDeviceVulkanShaderBinaryData *)(binptr + 12); + + Shader::PushConstant push_constant; + push_constant.push_constant_size = binary_data.push_constant_size; + push_constant.push_constants_vk_stage = binary_data.push_constants_vk_stage; + + uint32_t vertex_input_mask = binary_data.vertex_input_mask; + + uint32_t fragment_outputs = binary_data.fragment_outputs; + + bool is_compute = binary_data.is_compute; + + uint32_t compute_local_size[3] = { binary_data.compute_local_size[0], binary_data.compute_local_size[1], binary_data.compute_local_size[2] }; + + read_offset += sizeof(uint32_t) * 3 + bin_data_size; + + String name; + + if (binary_data.shader_name_len) { + name.parse_utf8((const char *)(binptr + read_offset), binary_data.shader_name_len); + read_offset += binary_data.shader_name_len; + if ((binary_data.shader_name_len % 4) != 0) { //alignment rules are really strange + read_offset += 4 - (binary_data.shader_name_len % 4); + } + } + + Vector<Vector<VkDescriptorSetLayoutBinding>> set_bindings; + Vector<Vector<UniformInfo>> uniform_info; + + set_bindings.resize(binary_data.set_count); + uniform_info.resize(binary_data.set_count); + + for (uint32_t i = 0; i < binary_data.set_count; i++) { + ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) >= binsize, RID()); + uint32_t set_count = decode_uint32(binptr + read_offset); + read_offset += sizeof(uint32_t); + const RenderingDeviceVulkanShaderBinaryDataBinding *set_ptr = (const RenderingDeviceVulkanShaderBinaryDataBinding *)(binptr + read_offset); + uint32_t set_size = set_count * sizeof(RenderingDeviceVulkanShaderBinaryDataBinding); + ERR_FAIL_COND_V(read_offset + set_size >= binsize, RID()); + + for (uint32_t j = 0; j < set_count; j++) { + UniformInfo info; + info.type = UniformType(set_ptr[j].type); + info.length = set_ptr[j].length; + info.binding = set_ptr[j].binding; + info.stages = set_ptr[j].stages; + + VkDescriptorSetLayoutBinding layout_binding; + layout_binding.pImmutableSamplers = nullptr; + layout_binding.binding = set_ptr[j].binding; + layout_binding.descriptorCount = 1; + layout_binding.stageFlags = 0; + for (uint32_t k = 0; k < SHADER_STAGE_MAX; k++) { + if (set_ptr[j].stages & (1 << k)) { + layout_binding.stageFlags |= shader_stage_masks[k]; + } + } + + switch (info.type) { + case UNIFORM_TYPE_SAMPLER: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + layout_binding.descriptorCount = set_ptr[j].length; + } break; + case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + layout_binding.descriptorCount = set_ptr[j].length; + } break; + case UNIFORM_TYPE_TEXTURE: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + layout_binding.descriptorCount = set_ptr[j].length; + } break; + case UNIFORM_TYPE_IMAGE: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + layout_binding.descriptorCount = set_ptr[j].length; + } break; + case UNIFORM_TYPE_TEXTURE_BUFFER: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + layout_binding.descriptorCount = set_ptr[j].length; + } break; + case UNIFORM_TYPE_IMAGE_BUFFER: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + } break; + case UNIFORM_TYPE_UNIFORM_BUFFER: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + } break; + case UNIFORM_TYPE_STORAGE_BUFFER: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + } break; + case UNIFORM_TYPE_INPUT_ATTACHMENT: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + } break; + default: { + ERR_FAIL_V(RID()); + } + } + + set_bindings.write[i].push_back(layout_binding); + uniform_info.write[i].push_back(info); + } + + read_offset += set_size; + } + + ERR_FAIL_COND_V(read_offset + binary_data.specialization_constant_count * sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) >= binsize, RID()); + + Vector<Shader::SpecializationConstant> specialization_constants; + + for (uint32_t i = 0; i < binary_data.specialization_constant_count; i++) { + const RenderingDeviceVulkanShaderBinarySpecializationConstant &src_sc = *(const RenderingDeviceVulkanShaderBinarySpecializationConstant *)(binptr + read_offset); + Shader::SpecializationConstant sc; + sc.constant.int_value = src_sc.int_value; + sc.constant.type = PipelineSpecializationConstantType(src_sc.type); + sc.constant.constant_id = src_sc.constant_id; + sc.stage_flags = src_sc.stage_flags; + specialization_constants.push_back(sc); + + read_offset += sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant); + } + + Vector<Vector<uint8_t>> stage_spirv_data; + Vector<ShaderStage> stage_type; + + for (uint32_t i = 0; i < binary_data.stage_count; i++) { + ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, RID()); + uint32_t stage = decode_uint32(binptr + read_offset); + read_offset += sizeof(uint32_t); + uint32_t smolv_size = decode_uint32(binptr + read_offset); + read_offset += sizeof(uint32_t); + uint32_t zstd_size = decode_uint32(binptr + read_offset); + read_offset += sizeof(uint32_t); + + uint32_t buf_size = (zstd_size > 0) ? zstd_size : smolv_size; + + Vector<uint8_t> smolv; + const uint8_t *src_smolv = nullptr; + + if (zstd_size > 0) { + //decompress to smolv + smolv.resize(smolv_size); + int dec_smolv_size = Compression::decompress(smolv.ptrw(), smolv.size(), binptr + read_offset, zstd_size, Compression::MODE_ZSTD); + ERR_FAIL_COND_V(dec_smolv_size != (int32_t)smolv_size, RID()); + src_smolv = smolv.ptr(); + } else { + src_smolv = binptr + read_offset; + } + + Vector<uint8_t> spirv; + uint32_t spirv_size = smolv::GetDecodedBufferSize(src_smolv, smolv_size); + spirv.resize(spirv_size); + if (!smolv::Decode(src_smolv, smolv_size, spirv.ptrw(), spirv_size)) { + ERR_FAIL_V_MSG(RID(), "Malformed smolv input uncompressing shader stage:" + String(shader_stage_names[stage])); + } + stage_spirv_data.push_back(spirv); + stage_type.push_back(ShaderStage(stage)); + + if (buf_size % 4 != 0) { + buf_size += 4 - (buf_size % 4); + } + + ERR_FAIL_COND_V(read_offset + buf_size > binsize, RID()); + + read_offset += buf_size; } + ERR_FAIL_COND_V(read_offset != binsize, RID()); + //all good, let's create modules _THREAD_SAFE_METHOD_ @@ -4652,17 +5125,19 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages shader.compute_local_size[0] = compute_local_size[0]; shader.compute_local_size[1] = compute_local_size[1]; shader.compute_local_size[2] = compute_local_size[2]; + shader.specialization_constants = specialization_constants; + shader.name = name; String error_text; bool success = true; - for (int i = 0; i < p_stages.size(); i++) { + for (int i = 0; i < stage_spirv_data.size(); i++) { VkShaderModuleCreateInfo shader_module_create_info; shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; shader_module_create_info.pNext = nullptr; shader_module_create_info.flags = 0; - shader_module_create_info.codeSize = p_stages[i].spir_v.size(); - const uint8_t *r = p_stages[i].spir_v.ptr(); + shader_module_create_info.codeSize = stage_spirv_data[i].size(); + const uint8_t *r = stage_spirv_data[i].ptr(); shader_module_create_info.pCode = (const uint32_t *)r; @@ -4670,7 +5145,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages VkResult res = vkCreateShaderModule(device, &shader_module_create_info, nullptr, &module); if (res) { success = false; - error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(shader_stage_names[p_stages[i].shader_stage]); + error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(shader_stage_names[stage_type[i]]); break; } @@ -4686,7 +5161,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages shader_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shader_stage.pNext = nullptr; shader_stage.flags = 0; - shader_stage.stage = shader_stage_bits[p_stages[i].shader_stage]; + shader_stage.stage = shader_stage_bits[stage_type[i]]; shader_stage.module = module; shader_stage.pName = "main"; shader_stage.pSpecializationInfo = nullptr; @@ -5134,7 +5609,7 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, img_info.sampler = *sampler; img_info.imageView = texture->view; - if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)) { + if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT)) { UniformSet::AttachableTexture attachable_texture; attachable_texture.bind = set_uniform.binding; attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.ids[j + 1]; @@ -5187,7 +5662,7 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, img_info.sampler = VK_NULL_HANDLE; img_info.imageView = texture->view; - if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)) { + if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT)) { UniformSet::AttachableTexture attachable_texture; attachable_texture.bind = set_uniform.binding; attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.ids[j]; @@ -5501,6 +5976,13 @@ bool RenderingDeviceVulkan::uniform_set_is_valid(RID p_uniform_set) { return uniform_set_owner.owns(p_uniform_set); } +void RenderingDeviceVulkan::uniform_set_set_invalidation_callback(RID p_uniform_set, UniformSetInvalidatedCallback p_callback, void *p_userdata) { + UniformSet *us = uniform_set_owner.getornull(p_uniform_set); + ERR_FAIL_COND(!us); + us->invalidated_callback = p_callback; + us->invalidated_callback_userdata = p_userdata; +} + Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier) { _THREAD_SAFE_METHOD_ @@ -5640,7 +6122,7 @@ Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { /**** RENDER PIPELINE ****/ /*************************/ -RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags, uint32_t p_for_render_pass) { +RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags, uint32_t p_for_render_pass, const Vector<PipelineSpecializationConstant> &p_specialization_constants) { _THREAD_SAFE_METHOD_ //needs a shader @@ -5740,7 +6222,7 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma tessellation_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; tessellation_create_info.pNext = nullptr; tessellation_create_info.flags = 0; - ERR_FAIL_COND_V(p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > limits.maxTessellationPatchSize, RID()); + ERR_FAIL_COND_V(limits.maxTessellationPatchSize > 0 && (p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > limits.maxTessellationPatchSize), RID()); tessellation_create_info.patchControlPoints = p_rasterization_state.patch_control_points; VkPipelineViewportStateCreateInfo viewport_state_create_info; @@ -5958,8 +6440,62 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma graphics_pipeline_create_info.pNext = nullptr; graphics_pipeline_create_info.flags = 0; - graphics_pipeline_create_info.stageCount = shader->pipeline_stages.size(); - graphics_pipeline_create_info.pStages = shader->pipeline_stages.ptr(); + Vector<VkPipelineShaderStageCreateInfo> pipeline_stages = shader->pipeline_stages; + Vector<VkSpecializationInfo> specialization_info; + Vector<Vector<VkSpecializationMapEntry>> specialization_map_entries; + Vector<uint32_t> specialization_constant_data; + + if (shader->specialization_constants.size()) { + specialization_constant_data.resize(shader->specialization_constants.size()); + uint32_t *data_ptr = specialization_constant_data.ptrw(); + specialization_info.resize(pipeline_stages.size()); + specialization_map_entries.resize(pipeline_stages.size()); + for (int i = 0; i < shader->specialization_constants.size(); i++) { + //see if overridden + const Shader::SpecializationConstant &sc = shader->specialization_constants[i]; + data_ptr[i] = sc.constant.int_value; //just copy the 32 bits + + for (int j = 0; j < p_specialization_constants.size(); j++) { + const PipelineSpecializationConstant &psc = p_specialization_constants[j]; + if (psc.constant_id == sc.constant.constant_id) { + ERR_FAIL_COND_V_MSG(psc.type != sc.constant.type, RID(), "Specialization constant provided for id (" + itos(sc.constant.constant_id) + ") is of the wrong type."); + data_ptr[i] = psc.int_value; + break; + } + } + + VkSpecializationMapEntry entry; + + entry.constantID = sc.constant.constant_id; + entry.offset = i * sizeof(uint32_t); + entry.size = sizeof(uint32_t); + + for (int j = 0; j < SHADER_STAGE_MAX; j++) { + if (sc.stage_flags & (1 << j)) { + VkShaderStageFlagBits stage = shader_stage_masks[j]; + for (int k = 0; k < pipeline_stages.size(); k++) { + if (pipeline_stages[k].stage == stage) { + specialization_map_entries.write[k].push_back(entry); + } + } + } + } + } + + for (int i = 0; i < pipeline_stages.size(); i++) { + if (specialization_map_entries[i].size()) { + specialization_info.write[i].dataSize = specialization_constant_data.size() * sizeof(uint32_t); + specialization_info.write[i].pData = data_ptr; + specialization_info.write[i].mapEntryCount = specialization_map_entries[i].size(); + specialization_info.write[i].pMapEntries = specialization_map_entries[i].ptr(); + pipeline_stages.write[i].pSpecializationInfo = specialization_info.ptr() + i; + } + } + } + + graphics_pipeline_create_info.stageCount = pipeline_stages.size(); + graphics_pipeline_create_info.pStages = pipeline_stages.ptr(); + graphics_pipeline_create_info.pVertexInputState = &pipeline_vertex_input_state_create_info; graphics_pipeline_create_info.pInputAssemblyState = &input_assembly_create_info; graphics_pipeline_create_info.pTessellationState = &tessellation_create_info; @@ -5978,7 +6514,7 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma RenderPipeline pipeline; VkResult err = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &graphics_pipeline_create_info, nullptr, &pipeline.pipeline); - ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateGraphicsPipelines failed with error " + itos(err) + "."); + ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateGraphicsPipelines failed with error " + itos(err) + " for shader '" + shader->name + "'."); pipeline.set_formats = shader->set_formats; pipeline.push_constant_stages = shader->push_constant.push_constants_vk_stage; @@ -6028,7 +6564,7 @@ bool RenderingDeviceVulkan::render_pipeline_is_valid(RID p_pipeline) { /**** COMPUTE PIPELINE ****/ /**************************/ -RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) { +RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader, const Vector<PipelineSpecializationConstant> &p_specialization_constants) { _THREAD_SAFE_METHOD_ //needs a shader @@ -6050,6 +6586,44 @@ RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) { compute_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; compute_pipeline_create_info.basePipelineIndex = 0; + VkSpecializationInfo specialization_info; + Vector<VkSpecializationMapEntry> specialization_map_entries; + Vector<uint32_t> specialization_constant_data; + + if (shader->specialization_constants.size()) { + specialization_constant_data.resize(shader->specialization_constants.size()); + uint32_t *data_ptr = specialization_constant_data.ptrw(); + for (int i = 0; i < shader->specialization_constants.size(); i++) { + //see if overridden + const Shader::SpecializationConstant &sc = shader->specialization_constants[i]; + data_ptr[i] = sc.constant.int_value; //just copy the 32 bits + + for (int j = 0; j < p_specialization_constants.size(); j++) { + const PipelineSpecializationConstant &psc = p_specialization_constants[j]; + if (psc.constant_id == sc.constant.constant_id) { + ERR_FAIL_COND_V_MSG(psc.type != sc.constant.type, RID(), "Specialization constant provided for id (" + itos(sc.constant.constant_id) + ") is of the wrong type."); + data_ptr[i] = sc.constant.int_value; + break; + } + } + + VkSpecializationMapEntry entry; + + entry.constantID = sc.constant.constant_id; + entry.offset = i * sizeof(uint32_t); + entry.size = sizeof(uint32_t); + + specialization_map_entries.push_back(entry); + } + + specialization_info.dataSize = specialization_constant_data.size() * sizeof(uint32_t); + specialization_info.pData = data_ptr; + specialization_info.mapEntryCount = specialization_map_entries.size(); + specialization_info.pMapEntries = specialization_map_entries.ptr(); + + compute_pipeline_create_info.stage.pSpecializationInfo = &specialization_info; + } + ComputePipeline pipeline; VkResult err = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &compute_pipeline_create_info, nullptr, &pipeline.pipeline); ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateComputePipelines failed with error " + itos(err) + "."); @@ -6960,6 +7534,10 @@ void RenderingDeviceVulkan::draw_list_disable_scissor(DrawListID p_list) { vkCmdSetScissor(dl->command_buffer, 0, 1, &scissor); } +uint32_t RenderingDeviceVulkan::draw_list_get_current_pass() { + return draw_list_current_subpass; +} + RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_switch_to_next_pass() { ERR_FAIL_COND_V(draw_list == nullptr, INVALID_ID); ERR_FAIL_COND_V(draw_list_current_subpass >= draw_list_subpass_count - 1, INVALID_FORMAT_ID); @@ -7010,7 +7588,7 @@ Error RenderingDeviceVulkan::_draw_list_allocate(const Rect2i &p_viewport, uint3 VkCommandPoolCreateInfo cmd_pool_info; cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cmd_pool_info.pNext = nullptr; - cmd_pool_info.queueFamilyIndex = context->get_graphics_queue(); + cmd_pool_info.queueFamilyIndex = context->get_graphics_queue_family_index(); cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &split_draw_list_allocators.write[i].command_pool); @@ -7387,13 +7965,13 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, textures_to_storage[i]->used_in_compute = false; textures_to_storage[i]->used_in_raster = false; - textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_transfer = false; } else { src_access_flags = 0; textures_to_storage[i]->used_in_compute = false; textures_to_storage[i]->used_in_raster = false; - textures_to_storage[i]->used_in_compute = false; + textures_to_storage[i]->used_in_transfer = false; textures_to_storage[i]->used_in_frame = frames_drawn; } @@ -7840,6 +8418,10 @@ void RenderingDeviceVulkan::_free_internal(RID p_id) { } else if (uniform_set_owner.owns(p_id)) { UniformSet *uniform_set = uniform_set_owner.getornull(p_id); frames[frame].uniform_sets_to_dispose_of.push_back(*uniform_set); + if (uniform_set->invalidated_callback != nullptr) { + uniform_set->invalidated_callback(p_id, uniform_set->invalidated_callback_userdata); + } + uniform_set_owner.free(p_id); } else if (render_pipeline_owner.owns(p_id)) { RenderPipeline *pipeline = render_pipeline_owner.getornull(p_id); @@ -8250,6 +8832,7 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_de memset(&allocatorInfo, 0, sizeof(VmaAllocatorCreateInfo)); allocatorInfo.physicalDevice = p_context->get_physical_device(); allocatorInfo.device = device; + allocatorInfo.instance = p_context->get_instance(); vmaCreateAllocator(&allocatorInfo, &allocator); } @@ -8263,7 +8846,7 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_de VkCommandPoolCreateInfo cmd_pool_info; cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; cmd_pool_info.pNext = nullptr; - cmd_pool_info.queueFamilyIndex = p_context->get_graphics_queue(); + cmd_pool_info.queueFamilyIndex = p_context->get_graphics_queue_family_index(); cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &frames[i].command_pool); @@ -8377,13 +8960,14 @@ void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) { } else { WARN_PRINT(vformat("%d RIDs of type \"%s\" were leaked.", owned.size(), p_type)); } - for (List<RID>::Element *E = owned.front(); E; E = E->next()) { - free(E->get()); + for (const RID &E : owned) { + free(E); } } } void RenderingDeviceVulkan::capture_timestamp(const String &p_name) { + ERR_FAIL_COND_MSG(draw_list != nullptr, "Capturing timestamps during draw list creation is not allowed. Offending timestap was: " + p_name); ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements); //this should be optional for profiling, else it will slow things down @@ -8432,6 +9016,92 @@ void RenderingDeviceVulkan::capture_timestamp(const String &p_name) { frames[frame].timestamp_count++; } +uint64_t RenderingDeviceVulkan::get_driver_resource(DriverResource p_resource, RID p_rid, uint64_t p_index) { + _THREAD_SAFE_METHOD_ + + switch (p_resource) { + case DRIVER_RESOURCE_VULKAN_DEVICE: { + return (uint64_t)context->get_device(); + }; break; + case DRIVER_RESOURCE_VULKAN_PHYSICAL_DEVICE: { + return (uint64_t)context->get_physical_device(); + }; break; + case DRIVER_RESOURCE_VULKAN_INSTANCE: { + return (uint64_t)context->get_instance(); + }; break; + case DRIVER_RESOURCE_VULKAN_QUEUE: { + return (uint64_t)context->get_graphics_queue(); + }; break; + case DRIVER_RESOURCE_VULKAN_QUEUE_FAMILY_INDEX: { + return context->get_graphics_queue_family_index(); + }; break; + case DRIVER_RESOURCE_VULKAN_IMAGE: { + Texture *tex = texture_owner.getornull(p_rid); + ERR_FAIL_NULL_V(tex, 0); + + return (uint64_t)tex->image; + }; break; + case DRIVER_RESOURCE_VULKAN_IMAGE_VIEW: { + Texture *tex = texture_owner.getornull(p_rid); + ERR_FAIL_NULL_V(tex, 0); + + return (uint64_t)tex->view; + }; break; + case DRIVER_RESOURCE_VULKAN_IMAGE_NATIVE_TEXTURE_FORMAT: { + Texture *tex = texture_owner.getornull(p_rid); + ERR_FAIL_NULL_V(tex, 0); + + return vulkan_formats[tex->format]; + }; break; + case DRIVER_RESOURCE_VULKAN_SAMPLER: { + VkSampler *sampler = sampler_owner.getornull(p_rid); + ERR_FAIL_NULL_V(sampler, 0); + + return uint64_t(*sampler); + }; break; + case DRIVER_RESOURCE_VULKAN_DESCRIPTOR_SET: { + UniformSet *uniform_set = uniform_set_owner.getornull(p_rid); + ERR_FAIL_NULL_V(uniform_set, 0); + + return uint64_t(uniform_set->descriptor_set); + }; break; + case DRIVER_RESOURCE_VULKAN_BUFFER: { + Buffer *buffer = nullptr; + if (vertex_buffer_owner.owns(p_rid)) { + buffer = vertex_buffer_owner.getornull(p_rid); + } else if (index_buffer_owner.owns(p_rid)) { + buffer = index_buffer_owner.getornull(p_rid); + } else if (uniform_buffer_owner.owns(p_rid)) { + buffer = uniform_buffer_owner.getornull(p_rid); + } else if (texture_buffer_owner.owns(p_rid)) { + buffer = &texture_buffer_owner.getornull(p_rid)->buffer; + } else if (storage_buffer_owner.owns(p_rid)) { + buffer = storage_buffer_owner.getornull(p_rid); + } + + ERR_FAIL_NULL_V(buffer, 0); + + return uint64_t(buffer->buffer); + }; break; + case DRIVER_RESOURCE_VULKAN_COMPUTE_PIPELINE: { + ComputePipeline *compute_pipeline = compute_pipeline_owner.getornull(p_rid); + ERR_FAIL_NULL_V(compute_pipeline, 0); + + return uint64_t(compute_pipeline->pipeline); + }; break; + case DRIVER_RESOURCE_VULKAN_RENDER_PIPELINE: { + RenderPipeline *render_pipeline = render_pipeline_owner.getornull(p_rid); + ERR_FAIL_NULL_V(render_pipeline, 0); + + return uint64_t(render_pipeline->pipeline); + }; break; + default: { + // not supported for this driver + return 0; + }; break; + } +} + uint32_t RenderingDeviceVulkan::get_captured_timestamps_count() const { return frames[frame].timestamp_result_count; } @@ -8599,13 +9269,13 @@ void RenderingDeviceVulkan::finalize() { List<RID>::Element *N = E->next(); if (texture_is_shared(E->get())) { free(E->get()); - owned.erase(E->get()); + owned.erase(E); } E = N; } //free non shared second, this will avoid an error trying to free unexisting textures due to dependencies. - for (List<RID>::Element *E = owned.front(); E; E = E->next()) { - free(E->get()); + for (const RID &E : owned) { + free(E); } } } diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index 1f86fe9e48..cf0b725cfc 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -44,7 +44,11 @@ #endif #include "vk_mem_alloc.h" +#ifdef USE_VOLK +#include <volk.h> +#else #include <vulkan/vulkan.h> +#endif class VulkanContext; @@ -156,6 +160,7 @@ class RenderingDeviceVulkan : public RenderingDevice { uint32_t texture_upload_region_size_px = 0; Vector<uint8_t> _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer, bool p_2d = false); + Error _texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, uint32_t p_post_barrier, bool p_use_setup_queue); /*****************/ /**** SAMPLER ****/ @@ -622,12 +627,19 @@ class RenderingDeviceVulkan : public RenderingDevice { uint32_t compute_local_size[3] = { 0, 0, 0 }; + struct SpecializationConstant { + PipelineSpecializationConstant constant; + uint32_t stage_flags = 0; + }; + bool is_compute = false; int max_output = 0; Vector<Set> sets; Vector<uint32_t> set_formats; Vector<VkPipelineShaderStageCreateInfo> pipeline_stages; + Vector<SpecializationConstant> specialization_constants; VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; + String name; //used for debug }; String _shader_uniform_debug(RID p_shader, int p_set = -1); @@ -645,8 +657,8 @@ class RenderingDeviceVulkan : public RenderingDevice { // Basically, you can mix and match pools as you // like, but you'll run into fragmentation issues. // Because of this, the recommended approach is to - // create a a pool for every descriptor set type, - // as this prevents fragmentation. + // create a pool for every descriptor set type, as + // this prevents fragmentation. // // This is implemented here as a having a list of // pools (each can contain up to 64 sets) for each @@ -731,6 +743,8 @@ class RenderingDeviceVulkan : public RenderingDevice { LocalVector<AttachableTexture> attachable_textures; //used for validation Vector<Texture *> mutable_sampled_textures; //used for layout change Vector<Texture *> mutable_storage_textures; //used for layout change + UniformSetInvalidatedCallback invalidated_callback = nullptr; + void *invalidated_callback_userdata = nullptr; }; RID_Owner<UniformSet, true> uniform_set_owner; @@ -799,7 +813,7 @@ class RenderingDeviceVulkan : public RenderingDevice { // When using split command lists, this is // implemented internally using secondary command // buffers. As they can be created in threads, - // each needs it's own command pool. + // each needs its own command pool. struct SplitDrawListAllocator { VkCommandPool command_pool = VK_NULL_HANDLE; @@ -1030,6 +1044,7 @@ public: virtual bool texture_is_format_supported_for_usage(DataFormat p_format, uint32_t p_usage) const; virtual bool texture_is_shared(RID p_texture); virtual bool texture_is_valid(RID p_texture); + virtual Size2i texture_size(RID p_texture); virtual Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, uint32_t p_post_barrier = BARRIER_MASK_ALL); virtual Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, uint32_t p_post_barrier = BARRIER_MASK_ALL); @@ -1074,7 +1089,11 @@ public: /**** SHADER ****/ /****************/ - virtual RID shader_create(const Vector<ShaderStageData> &p_stages); + virtual String shader_get_binary_cache_key() const; + virtual Vector<uint8_t> shader_compile_binary_from_spirv(const Vector<ShaderStageSPIRVData> &p_spirv, const String &p_shader_name = ""); + + virtual RID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary); + virtual uint32_t shader_get_vertex_input_attribute_mask(RID p_shader); /*****************/ @@ -1087,6 +1106,7 @@ public: virtual RID uniform_set_create(const Vector<Uniform> &p_uniforms, RID p_shader, uint32_t p_shader_set); virtual bool uniform_set_is_valid(RID p_uniform_set); + virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, UniformSetInvalidatedCallback p_callback, void *p_userdata); virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, uint32_t p_post_barrier = BARRIER_MASK_ALL); //works for any buffer virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, uint32_t p_post_barrier = BARRIER_MASK_ALL); @@ -1096,14 +1116,14 @@ public: /**** RENDER PIPELINE ****/ /*************************/ - virtual RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags = 0, uint32_t p_for_render_pass = 0); + virtual RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags = 0, uint32_t p_for_render_pass = 0, const Vector<PipelineSpecializationConstant> &p_specialization_constants = Vector<PipelineSpecializationConstant>()); virtual bool render_pipeline_is_valid(RID p_pipeline); /**************************/ /**** COMPUTE PIPELINE ****/ /**************************/ - virtual RID compute_pipeline_create(RID p_shader); + virtual RID compute_pipeline_create(RID p_shader, const Vector<PipelineSpecializationConstant> &p_specialization_constants = Vector<PipelineSpecializationConstant>()); virtual bool compute_pipeline_is_valid(RID p_pipeline); /****************/ @@ -1135,6 +1155,7 @@ public: virtual void draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect); virtual void draw_list_disable_scissor(DrawListID p_list); + virtual uint32_t draw_list_get_current_pass(); virtual DrawListID draw_list_switch_to_next_pass(); virtual Error draw_list_switch_to_next_pass_split(uint32_t p_splits, DrawListID *r_split_ids); @@ -1206,6 +1227,8 @@ public: virtual String get_device_name() const; virtual String get_device_pipeline_cache_uuid() const; + virtual uint64_t get_driver_resource(DriverResource p_resource, RID p_rid = RID(), uint64_t p_index = 0); + RenderingDeviceVulkan(); ~RenderingDeviceVulkan(); }; diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index 74bd938822..bb0123e536 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -275,22 +275,21 @@ Error VulkanContext::_obtain_vulkan_version() { if (res == VK_SUCCESS) { vulkan_major = VK_VERSION_MAJOR(api_version); vulkan_minor = VK_VERSION_MINOR(api_version); - uint32_t vulkan_patch = VK_VERSION_PATCH(api_version); - - print_line("Vulkan API " + itos(vulkan_major) + "." + itos(vulkan_minor) + "." + itos(vulkan_patch)); + vulkan_patch = VK_VERSION_PATCH(api_version); } else { // according to the documentation this shouldn't fail with anything except a memory allocation error // in which case we're in deep trouble anyway ERR_FAIL_V(ERR_CANT_CREATE); } } else { - print_line("vkEnumerateInstanceVersion not available, assuming Vulkan 1.0"); + print_line("vkEnumerateInstanceVersion not available, assuming Vulkan 1.0."); } // we don't go above 1.2 if ((vulkan_major > 1) || (vulkan_major == 1 && vulkan_minor > 2)) { vulkan_major = 1; vulkan_minor = 2; + vulkan_patch = 0; } return OK; @@ -577,26 +576,22 @@ Error VulkanContext::_check_capabilities() { multiview_capabilities.max_view_count = multiviewProperties.maxMultiviewViewCount; multiview_capabilities.max_instance_count = multiviewProperties.maxMultiviewInstanceIndex; -#ifdef DEBUG_ENABLED - print_line("- Vulkan multiview supported:"); - print_line(" max view count: " + itos(multiview_capabilities.max_view_count)); - print_line(" max instances: " + itos(multiview_capabilities.max_instance_count)); + print_verbose("- Vulkan multiview supported:"); + print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); + print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); } else { - print_line("- Vulkan multiview not supported"); -#endif + print_verbose("- Vulkan multiview not supported"); } -#ifdef DEBUG_ENABLED - print_line("- Vulkan subgroup:"); - print_line(" size: " + itos(subgroup_capabilities.size)); - print_line(" stages: " + subgroup_capabilities.supported_stages_desc()); - print_line(" supported ops: " + subgroup_capabilities.supported_operations_desc()); + print_verbose("- Vulkan subgroup:"); + print_verbose(" size: " + itos(subgroup_capabilities.size)); + print_verbose(" stages: " + subgroup_capabilities.supported_stages_desc()); + print_verbose(" supported ops: " + subgroup_capabilities.supported_operations_desc()); if (subgroup_capabilities.quadOperationsInAllStages) { - print_line(" quad operations in all stages"); + print_verbose(" quad operations in all stages"); } } else { - print_line("- Couldn't call vkGetPhysicalDeviceProperties2"); -#endif + print_verbose("- Couldn't call vkGetPhysicalDeviceProperties2"); } return OK; @@ -685,6 +680,10 @@ Error VulkanContext::_create_physical_device() { inst_initialized = true; +#ifdef USE_VOLK + volkLoadInstance(inst); +#endif + /* Make initial call to query gpu_count, then second call for gpu info*/ err = vkEnumeratePhysicalDevices(inst, &gpu_count, nullptr); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); @@ -737,10 +736,11 @@ Error VulkanContext::_create_physical_device() { } vendor_names[] = { { 0x1002, "AMD" }, { 0x1010, "ImgTec" }, + { 0x106B, "Apple" }, { 0x10DE, "NVIDIA" }, { 0x13B5, "ARM" }, { 0x5143, "Qualcomm" }, - { 0x8086, "INTEL" }, + { 0x8086, "Intel" }, { 0, nullptr }, }; device_name = gpu_props.deviceName; @@ -757,9 +757,11 @@ Error VulkanContext::_create_physical_device() { vendor_idx++; } } -#ifdef DEBUG_ENABLED - print_line("Using Vulkan Device #" + itos(device_index) + ": " + device_vendor + " - " + device_name); -#endif + + print_line( + "Vulkan API " + itos(vulkan_major) + "." + itos(vulkan_minor) + "." + itos(vulkan_patch) + + " - " + "Using Vulkan Device #" + itos(device_index) + ": " + device_vendor + " - " + device_name); + device_api_version = gpu_props.apiVersion; err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); @@ -975,37 +977,35 @@ Error VulkanContext::_create_device() { sdevice.queueCreateInfoCount = 2; } -#ifdef VK_VERSION_1_2 VkPhysicalDeviceVulkan11Features vulkan11features; - - vulkan11features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; - vulkan11features.pNext = nullptr; - // !BAS! Need to figure out which ones of these we want enabled... - vulkan11features.storageBuffer16BitAccess = 0; - vulkan11features.uniformAndStorageBuffer16BitAccess = 0; - vulkan11features.storagePushConstant16 = 0; - vulkan11features.storageInputOutput16 = 0; - vulkan11features.multiview = multiview_capabilities.is_supported; - vulkan11features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported; - vulkan11features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported; - vulkan11features.variablePointersStorageBuffer = 0; - vulkan11features.variablePointers = 0; - vulkan11features.protectedMemory = 0; - vulkan11features.samplerYcbcrConversion = 0; - vulkan11features.shaderDrawParameters = 0; - - sdevice.pNext = &vulkan11features; -#elif VK_VERSION_1_1 VkPhysicalDeviceMultiviewFeatures multiview_features; + if (vulkan_major > 1 || vulkan_minor >= 2) { + vulkan11features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + vulkan11features.pNext = nullptr; + // !BAS! Need to figure out which ones of these we want enabled... + vulkan11features.storageBuffer16BitAccess = 0; + vulkan11features.uniformAndStorageBuffer16BitAccess = 0; + vulkan11features.storagePushConstant16 = 0; + vulkan11features.storageInputOutput16 = 0; + vulkan11features.multiview = multiview_capabilities.is_supported; + vulkan11features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported; + vulkan11features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported; + vulkan11features.variablePointersStorageBuffer = 0; + vulkan11features.variablePointers = 0; + vulkan11features.protectedMemory = 0; + vulkan11features.samplerYcbcrConversion = 0; + vulkan11features.shaderDrawParameters = 0; + + sdevice.pNext = &vulkan11features; + } else if (vulkan_major == 1 && vulkan_minor == 1) { + multiview_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES; + multiview_features.pNext = nullptr; + multiview_features.multiview = multiview_capabilities.is_supported; + multiview_features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported; + multiview_features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported; - multiview_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES; - multiview_features.pNext = nullptr; - multiview_features.multiview = multiview_capabilities.is_supported; - multiview_features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported; - multiview_features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported; - - sdevice.pNext = &multiview_features; -#endif + sdevice.pNext = &multiview_features; + } err = vkCreateDevice(gpu, &sdevice, nullptr, &device); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); @@ -1189,7 +1189,7 @@ bool VulkanContext::_use_validation_layers() { return Engine::get_singleton()->is_validation_layers_enabled(); } -Error VulkanContext::_window_create(DisplayServer::WindowID p_window_id, VkSurfaceKHR p_surface, int p_width, int p_height) { +Error VulkanContext::_window_create(DisplayServer::WindowID p_window_id, DisplayServer::VSyncMode p_vsync_mode, VkSurfaceKHR p_surface, int p_width, int p_height) { ERR_FAIL_COND_V(windows.has(p_window_id), ERR_INVALID_PARAMETER); if (!queues_initialized) { @@ -1217,6 +1217,7 @@ Error VulkanContext::_window_create(DisplayServer::WindowID p_window_id, VkSurfa window.surface = p_surface; window.width = p_width; window.height = p_height; + window.vsync_mode = p_vsync_mode; Error err = _update_swap_chain(&window); ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); @@ -1360,7 +1361,6 @@ Error VulkanContext::_update_swap_chain(Window *window) { } // The FIFO present mode is guaranteed by the spec to be supported // and to have no tearing. It's a great default present mode to use. - VkPresentModeKHR swapchainPresentMode = VK_PRESENT_MODE_FIFO_KHR; // There are times when you may wish to use another present mode. The // following code shows how to select them, and the comments provide some @@ -1389,16 +1389,41 @@ Error VulkanContext::_update_swap_chain(Window *window) { // the application wants the late image to be immediately displayed, even // though that may mean some tearing. - if (window->presentMode != swapchainPresentMode) { - for (size_t i = 0; i < presentModeCount; ++i) { - if (presentModes[i] == window->presentMode) { - swapchainPresentMode = window->presentMode; - break; - } + VkPresentModeKHR requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_KHR; + switch (window->vsync_mode) { + case DisplayServer::VSYNC_MAILBOX: + requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_MAILBOX_KHR; + break; + case DisplayServer::VSYNC_ADAPTIVE: + requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_RELAXED_KHR; + break; + case DisplayServer::VSYNC_ENABLED: + requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_KHR; + break; + case DisplayServer::VSYNC_DISABLED: + requested_present_mode = VkPresentModeKHR::VK_PRESENT_MODE_IMMEDIATE_KHR; + break; + } + + // Check if the requested mode is available. + bool present_mode_available = false; + for (uint32_t i = 0; i < presentModeCount; i++) { + if (presentModes[i] == requested_present_mode) { + present_mode_available = true; } } + + // Set the windows present mode if it is available, otherwise FIFO is used (guaranteed supported). + if (present_mode_available) { + window->presentMode = requested_present_mode; + } else { + WARN_PRINT("Requested VSync mode is not available!"); + window->vsync_mode = DisplayServer::VSYNC_ENABLED; //Set to default + } + + print_verbose("Using present mode: " + String(string_VkPresentModeKHR(window->presentMode))); + free(presentModes); - ERR_FAIL_COND_V_MSG(swapchainPresentMode != window->presentMode, ERR_CANT_CREATE, "Present mode specified is not supported\n"); // Determine the number of VkImages to use in the swap chain. // Application desires to acquire 3 images at a time for triple @@ -1455,7 +1480,7 @@ Error VulkanContext::_update_swap_chain(Window *window) { /*pQueueFamilyIndices*/ nullptr, /*preTransform*/ (VkSurfaceTransformFlagBitsKHR)preTransform, /*compositeAlpha*/ compositeAlpha, - /*presentMode*/ swapchainPresentMode, + /*presentMode*/ window->presentMode, /*clipped*/ true, /*oldSwapchain*/ VK_NULL_HANDLE, }; @@ -1647,6 +1672,11 @@ Error VulkanContext::_update_swap_chain(Window *window) { } Error VulkanContext::initialize() { +#ifdef USE_VOLK + if (volkInitialize() != VK_SUCCESS) { + return FAILED; + } +#endif Error err = _create_physical_device(); if (err) { return err; @@ -1999,7 +2029,11 @@ int VulkanContext::get_swapchain_image_count() const { return swapchainImageCount; } -uint32_t VulkanContext::get_graphics_queue() const { +VkQueue VulkanContext::get_graphics_queue() const { + return graphics_queue; +} + +uint32_t VulkanContext::get_graphics_queue_family_index() const { return graphics_queue_family_index; } @@ -2162,6 +2196,17 @@ String VulkanContext::get_device_pipeline_cache_uuid() const { return pipeline_cache_id; } +DisplayServer::VSyncMode VulkanContext::get_vsync_mode(DisplayServer::WindowID p_window) const { + ERR_FAIL_COND_V_MSG(!windows.has(p_window), DisplayServer::VSYNC_ENABLED, "Could not get VSync mode for window with WindowID " + itos(p_window) + " because it does not exist."); + return windows[p_window].vsync_mode; +} + +void VulkanContext::set_vsync_mode(DisplayServer::WindowID p_window, DisplayServer::VSyncMode p_mode) { + ERR_FAIL_COND_MSG(!windows.has(p_window), "Could not set VSync mode for window with WindowID " + itos(p_window) + " because it does not exist."); + windows[p_window].vsync_mode = p_mode; + _update_swap_chain(&windows[p_window]); +} + VulkanContext::VulkanContext() { command_buffer_queue.resize(1); // First one is always the setup command. command_buffer_queue.write[0] = nullptr; diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h index 8f1005d07f..ae7c697be8 100644 --- a/drivers/vulkan/vulkan_context.h +++ b/drivers/vulkan/vulkan_context.h @@ -38,7 +38,11 @@ #include "core/templates/rid_owner.h" #include "servers/display_server.h" +#ifdef USE_VOLK +#include <volk.h> +#else #include <vulkan/vulkan.h> +#endif class VulkanContext { public: @@ -81,6 +85,7 @@ private: // Vulkan 1.0 doesn't return version info so we assume this by default until we know otherwise uint32_t vulkan_major = 1; uint32_t vulkan_minor = 0; + uint32_t vulkan_patch = 0; SubgroupCapabilities subgroup_capabilities; MultiviewCapabilities multiview_capabilities; @@ -124,6 +129,7 @@ private: uint32_t current_buffer = 0; int width = 0; int height = 0; + DisplayServer::VSyncMode vsync_mode = DisplayServer::VSYNC_ENABLED; VkCommandPool present_cmd_pool = VK_NULL_HANDLE; // For separate present queue. VkRenderPass render_pass = VK_NULL_HANDLE; }; @@ -222,16 +228,12 @@ private: protected: virtual const char *_get_platform_surface_extension() const = 0; - virtual Error _window_create(DisplayServer::WindowID p_window_id, VkSurfaceKHR p_surface, int p_width, int p_height); + virtual Error _window_create(DisplayServer::WindowID p_window_id, DisplayServer::VSyncMode p_vsync_mode, VkSurfaceKHR p_surface, int p_width, int p_height); virtual bool _use_validation_layers(); Error _get_preferred_validation_layers(uint32_t *count, const char *const **names); - VkInstance _get_instance() { - return inst; - } - public: uint32_t get_vulkan_major() const { return vulkan_major; }; uint32_t get_vulkan_minor() const { return vulkan_minor; }; @@ -240,8 +242,10 @@ public: VkDevice get_device(); VkPhysicalDevice get_physical_device(); + VkInstance get_instance() { return inst; } int get_swapchain_image_count() const; - uint32_t get_graphics_queue() const; + VkQueue get_graphics_queue() const; + uint32_t get_graphics_queue_family_index() const; void window_resize(DisplayServer::WindowID p_window_id, int p_width, int p_height); int window_get_width(DisplayServer::WindowID p_window = 0); @@ -276,6 +280,9 @@ public: String get_device_name() const; String get_device_pipeline_cache_uuid() const; + void set_vsync_mode(DisplayServer::WindowID p_window, DisplayServer::VSyncMode p_mode); + DisplayServer::VSyncMode get_vsync_mode(DisplayServer::WindowID p_window = 0) const; + VulkanContext(); virtual ~VulkanContext(); }; diff --git a/drivers/wasapi/audio_driver_wasapi.cpp b/drivers/wasapi/audio_driver_wasapi.cpp index 43c8722b06..276fda2a8f 100644 --- a/drivers/wasapi/audio_driver_wasapi.cpp +++ b/drivers/wasapi/audio_driver_wasapi.cpp @@ -35,8 +35,60 @@ #include "core/config/project_settings.h" #include "core/os/os.h" +#include <stdint.h> // INT32_MAX + #include <functiondiscoverykeys.h> +// Define IAudioClient3 if not already defined by MinGW headers +#if defined __MINGW32__ || defined __MINGW64__ + +#ifndef __IAudioClient3_FWD_DEFINED__ +#define __IAudioClient3_FWD_DEFINED__ + +typedef interface IAudioClient3 IAudioClient3; + +#endif // __IAudioClient3_FWD_DEFINED__ + +#ifndef __IAudioClient3_INTERFACE_DEFINED__ +#define __IAudioClient3_INTERFACE_DEFINED__ + +MIDL_INTERFACE("7ED4EE07-8E67-4CD4-8C1A-2B7A5987AD42") +IAudioClient3 : public IAudioClient2 { +public: + virtual HRESULT STDMETHODCALLTYPE GetSharedModeEnginePeriod( + /* [annotation][in] */ + _In_ const WAVEFORMATEX *pFormat, + /* [annotation][out] */ + _Out_ UINT32 *pDefaultPeriodInFrames, + /* [annotation][out] */ + _Out_ UINT32 *pFundamentalPeriodInFrames, + /* [annotation][out] */ + _Out_ UINT32 *pMinPeriodInFrames, + /* [annotation][out] */ + _Out_ UINT32 *pMaxPeriodInFrames) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetCurrentSharedModeEnginePeriod( + /* [unique][annotation][out] */ + _Out_ WAVEFORMATEX * *ppFormat, + /* [annotation][out] */ + _Out_ UINT32 * pCurrentPeriodInFrames) = 0; + + virtual HRESULT STDMETHODCALLTYPE InitializeSharedAudioStream( + /* [annotation][in] */ + _In_ DWORD StreamFlags, + /* [annotation][in] */ + _In_ UINT32 PeriodInFrames, + /* [annotation][in] */ + _In_ const WAVEFORMATEX *pFormat, + /* [annotation][in] */ + _In_opt_ LPCGUID AudioSessionGuid) = 0; +}; +__CRT_UUID_DECL(IAudioClient3, 0x7ED4EE07, 0x8E67, 0x4CD4, 0x8C, 0x1A, 0x2B, 0x7A, 0x59, 0x87, 0xAD, 0x42) + +#endif // __IAudioClient3_INTERFACE_DEFINED__ + +#endif // __MINGW32__ || __MINGW64__ + #ifndef PKEY_Device_FriendlyName #undef DEFINE_PROPERTYKEY @@ -51,6 +103,7 @@ DEFINE_PROPERTYKEY(PKEY_Device_FriendlyName, 0xa45c254e, 0xdf1c, 0x4efd, 0x80, 0 const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator); const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator); const IID IID_IAudioClient = __uuidof(IAudioClient); +const IID IID_IAudioClient3 = __uuidof(IAudioClient3); const IID IID_IAudioRenderClient = __uuidof(IAudioRenderClient); const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient); @@ -221,7 +274,22 @@ Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_c ERR_PRINT("WASAPI: RegisterEndpointNotificationCallback error"); } - hr = device->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr, (void **)&p_device->audio_client); + bool using_audio_client_3 = !p_capture; // IID_IAudioClient3 is only used for adjustable output latency (not input) + if (using_audio_client_3) { + hr = device->Activate(IID_IAudioClient3, CLSCTX_ALL, nullptr, (void **)&p_device->audio_client); + if (hr != S_OK) { + // IID_IAudioClient3 will never activate on OS versions before Windows 10. + // Older Windows versions should fall back gracefully. + using_audio_client_3 = false; + print_verbose("WASAPI: Couldn't activate device with IAudioClient3 interface, falling back to IAudioClient interface"); + } else { + print_verbose("WASAPI: Activated device using IAudioClient3 interface"); + } + } + if (!using_audio_client_3) { + hr = device->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr, (void **)&p_device->audio_client); + } + SAFE_RELEASE(device) if (reinit) { @@ -232,6 +300,16 @@ Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_c ERR_FAIL_COND_V(hr != S_OK, ERR_CANT_OPEN); } + if (using_audio_client_3) { + AudioClientProperties audioProps; + audioProps.cbSize = sizeof(AudioClientProperties); + audioProps.bIsOffload = FALSE; + audioProps.eCategory = AudioCategory_GameEffects; + + hr = ((IAudioClient3 *)p_device->audio_client)->SetClientProperties(&audioProps); + ERR_FAIL_COND_V_MSG(hr != S_OK, ERR_CANT_OPEN, "WASAPI: SetClientProperties failed with error 0x" + String::num_uint64(hr, 16) + "."); + } + hr = p_device->audio_client->GetMixFormat(&pwfex); ERR_FAIL_COND_V(hr != S_OK, ERR_CANT_OPEN); @@ -285,15 +363,55 @@ Error AudioDriverWASAPI::audio_device_init(AudioDeviceWASAPI *p_device, bool p_c } } - DWORD streamflags = 0; - if ((DWORD)mix_rate != pwfex->nSamplesPerSec) { - streamflags |= AUDCLNT_STREAMFLAGS_RATEADJUST; - pwfex->nSamplesPerSec = mix_rate; - pwfex->nAvgBytesPerSec = pwfex->nSamplesPerSec * pwfex->nChannels * (pwfex->wBitsPerSample / 8); - } + if (!using_audio_client_3) { + DWORD streamflags = 0; + if ((DWORD)mix_rate != pwfex->nSamplesPerSec) { + streamflags |= AUDCLNT_STREAMFLAGS_RATEADJUST; + pwfex->nSamplesPerSec = mix_rate; + pwfex->nAvgBytesPerSec = pwfex->nSamplesPerSec * pwfex->nChannels * (pwfex->wBitsPerSample / 8); + } + hr = p_device->audio_client->Initialize(AUDCLNT_SHAREMODE_SHARED, streamflags, p_capture ? REFTIMES_PER_SEC : 0, 0, pwfex, nullptr); + ERR_FAIL_COND_V_MSG(hr != S_OK, ERR_CANT_OPEN, "WASAPI: Initialize failed with error 0x" + String::num_uint64(hr, 16) + "."); + UINT32 max_frames; + HRESULT hr = p_device->audio_client->GetBufferSize(&max_frames); + ERR_FAIL_COND_V(hr != S_OK, ERR_CANT_OPEN); - hr = p_device->audio_client->Initialize(AUDCLNT_SHAREMODE_SHARED, streamflags, p_capture ? REFTIMES_PER_SEC : 0, 0, pwfex, nullptr); - ERR_FAIL_COND_V_MSG(hr != S_OK, ERR_CANT_OPEN, "WASAPI: Initialize failed with error 0x" + String::num_uint64(hr, 16) + "."); + // Due to WASAPI Shared Mode we have no control of the buffer size + buffer_frames = max_frames; + } else { + IAudioClient3 *device_audio_client_3 = (IAudioClient3 *)p_device->audio_client; + + // AUDCLNT_STREAMFLAGS_RATEADJUST is an invalid flag with IAudioClient3, therefore we have to use + // the closest supported mix rate supported by the audio driver. + mix_rate = pwfex->nSamplesPerSec; + print_verbose("WASAPI: mix_rate = " + itos(mix_rate)); + + UINT32 default_period_frames, fundamental_period_frames, min_period_frames, max_period_frames; + hr = device_audio_client_3->GetSharedModeEnginePeriod( + pwfex, + &default_period_frames, + &fundamental_period_frames, + &min_period_frames, + &max_period_frames); + ERR_FAIL_COND_V_MSG(hr != S_OK, ERR_CANT_OPEN, "WASAPI: GetSharedModeEnginePeriod failed with error 0x" + String::num_uint64(hr, 16) + "."); + + // Period frames must be an integral multiple of fundamental_period_frames or IAudioClient3 initialization will fail, + // so we need to select the closest multiple to the user-specified latency. + UINT32 desired_period_frames = target_latency_ms * mix_rate / 1000; + UINT32 period_frames = (desired_period_frames / fundamental_period_frames) * fundamental_period_frames; + if (ABS((int64_t)period_frames - (int64_t)desired_period_frames) > ABS((int64_t)(period_frames + fundamental_period_frames) - (int64_t)desired_period_frames)) { + period_frames = period_frames + fundamental_period_frames; + } + period_frames = CLAMP(period_frames, min_period_frames, max_period_frames); + print_verbose("WASAPI: fundamental_period_frames = " + itos(fundamental_period_frames)); + print_verbose("WASAPI: min_period_frames = " + itos(min_period_frames)); + print_verbose("WASAPI: max_period_frames = " + itos(max_period_frames)); + print_verbose("WASAPI: selected a period frame size of " + itos(period_frames)); + buffer_frames = period_frames; + + hr = device_audio_client_3->InitializeSharedAudioStream(0, period_frames, pwfex, nullptr); + ERR_FAIL_COND_V_MSG(hr != S_OK, ERR_CANT_OPEN, "WASAPI: InitializeSharedAudioStream failed with error 0x" + String::num_uint64(hr, 16) + "."); + } if (p_capture) { hr = p_device->audio_client->GetService(IID_IAudioCaptureClient, (void **)&p_device->capture_client); @@ -328,13 +446,6 @@ Error AudioDriverWASAPI::init_render_device(bool reinit) { break; } - UINT32 max_frames; - HRESULT hr = audio_output.audio_client->GetBufferSize(&max_frames); - ERR_FAIL_COND_V(hr != S_OK, ERR_CANT_OPEN); - - // Due to WASAPI Shared Mode we have no control of the buffer size - buffer_frames = max_frames; - // Sample rate is independent of channels (ref: https://stackoverflow.com/questions/11048825/audio-sample-frequency-rely-on-channels) samples_in.resize(buffer_frames * channels); @@ -367,7 +478,6 @@ Error AudioDriverWASAPI::audio_device_finish(AudioDeviceWASAPI *p_device) { if (p_device->audio_client) { p_device->audio_client->Stop(); } - p_device->active = false; } @@ -389,6 +499,8 @@ Error AudioDriverWASAPI::finish_capture_device() { Error AudioDriverWASAPI::init() { mix_rate = GLOBAL_GET("audio/driver/mix_rate"); + target_latency_ms = GLOBAL_GET("audio/driver/output_latency"); + Error err = init_render_device(); if (err != OK) { ERR_PRINT("WASAPI: init_render_device error"); diff --git a/drivers/wasapi/audio_driver_wasapi.h b/drivers/wasapi/audio_driver_wasapi.h index b9b325f0fb..312b6a6781 100644 --- a/drivers/wasapi/audio_driver_wasapi.h +++ b/drivers/wasapi/audio_driver_wasapi.h @@ -71,6 +71,7 @@ class AudioDriverWASAPI : public AudioDriver { unsigned int channels = 0; int mix_rate = 0; int buffer_frames = 0; + int target_latency_ms = 0; bool thread_exited = false; mutable bool exit_thread = false; @@ -114,5 +115,5 @@ public: AudioDriverWASAPI(); }; +#endif // WASAPI_ENABLED #endif // AUDIO_DRIVER_WASAPI_H -#endif diff --git a/drivers/windows/dir_access_windows.cpp b/drivers/windows/dir_access_windows.cpp index 325bae5b56..f7a5f7279e 100644 --- a/drivers/windows/dir_access_windows.cpp +++ b/drivers/windows/dir_access_windows.cpp @@ -155,7 +155,7 @@ Error DirAccessWindows::make_dir(String p_dir) { GLOBAL_LOCK_FUNCTION p_dir = fix_path(p_dir); - if (p_dir.is_rel_path()) { + if (p_dir.is_relative_path()) { p_dir = current_dir.plus_file(p_dir); } @@ -227,7 +227,7 @@ bool DirAccessWindows::file_exists(String p_file) { bool DirAccessWindows::dir_exists(String p_dir) { GLOBAL_LOCK_FUNCTION - if (p_dir.is_rel_path()) { + if (p_dir.is_relative_path()) { p_dir = get_current_dir().plus_file(p_dir); } @@ -242,13 +242,13 @@ bool DirAccessWindows::dir_exists(String p_dir) { } Error DirAccessWindows::rename(String p_path, String p_new_path) { - if (p_path.is_rel_path()) { + if (p_path.is_relative_path()) { p_path = get_current_dir().plus_file(p_path); } p_path = fix_path(p_path); - if (p_new_path.is_rel_path()) { + if (p_new_path.is_relative_path()) { p_new_path = get_current_dir().plus_file(p_new_path); } @@ -281,7 +281,7 @@ Error DirAccessWindows::rename(String p_path, String p_new_path) { } Error DirAccessWindows::remove(String p_path) { - if (p_path.is_rel_path()) { + if (p_path.is_relative_path()) { p_path = get_current_dir().plus_file(p_path); } diff --git a/drivers/windows/file_access_windows.cpp b/drivers/windows/file_access_windows.cpp index d6deda7b5d..775c999b15 100644 --- a/drivers/windows/file_access_windows.cpp +++ b/drivers/windows/file_access_windows.cpp @@ -35,6 +35,7 @@ #include "core/os/os.h" #include "core/string/print_string.h" +#include <share.h> // _SH_DENYNO #include <shlwapi.h> #include <windows.h> @@ -77,8 +78,8 @@ Error FileAccessWindows::_open(const String &p_path, int p_mode_flags) { return ERR_INVALID_PARAMETER; } - /* pretty much every implementation that uses fopen as primary - backend supports utf8 encoding */ + /* Pretty much every implementation that uses fopen as primary + backend supports utf8 encoding. */ struct _stat st; if (_wstat((LPCWSTR)(path.utf16().get_data()), &st) == 0) { @@ -108,15 +109,15 @@ Error FileAccessWindows::_open(const String &p_path, int p_mode_flags) { } #endif - if (is_backup_save_enabled() && p_mode_flags & WRITE && !(p_mode_flags & READ)) { + if (is_backup_save_enabled() && p_mode_flags == WRITE) { save_path = path; path = path + ".tmp"; } - errno_t errcode = _wfopen_s(&f, (LPCWSTR)(path.utf16().get_data()), mode_string); + f = _wfsopen((LPCWSTR)(path.utf16().get_data()), mode_string, _SH_DENYNO); if (f == nullptr) { - switch (errcode) { + switch (errno) { case ENOENT: { last_error = ERR_FILE_NOT_FOUND; } break; @@ -157,10 +158,10 @@ void FileAccessWindows::close() { #else if (!PathFileExistsW((LPCWSTR)(save_path.utf16().get_data()))) { #endif - //creating new file + // Creating new file rename_error = _wrename((LPCWSTR)((save_path + ".tmp").utf16().get_data()), (LPCWSTR)(save_path.utf16().get_data())) != 0; } else { - //atomic replace for existing file + // Atomic replace for existing file rename_error = !ReplaceFileW((LPCWSTR)(save_path.utf16().get_data()), (LPCWSTR)((save_path + ".tmp").utf16().get_data()), nullptr, 2 | 4, nullptr, nullptr); } if (rename_error) { @@ -205,6 +206,7 @@ void FileAccessWindows::seek(uint64_t p_position) { void FileAccessWindows::seek_end(int64_t p_position) { ERR_FAIL_COND(!f); + if (_fseeki64(f, p_position, SEEK_END)) { check_errors(); } @@ -237,6 +239,7 @@ bool FileAccessWindows::eof_reached() const { uint8_t FileAccessWindows::get_8() const { ERR_FAIL_COND_V(!f, 0); + if (flags == READ_WRITE || flags == WRITE_READ) { if (prev_op == WRITE) { fflush(f); @@ -273,6 +276,7 @@ Error FileAccessWindows::get_error() const { void FileAccessWindows::flush() { ERR_FAIL_COND(!f); + fflush(f); if (prev_op == WRITE) { prev_op = 0; @@ -281,6 +285,7 @@ void FileAccessWindows::flush() { void FileAccessWindows::store_8(uint8_t p_dest) { ERR_FAIL_COND(!f); + if (flags == READ_WRITE || flags == WRITE_READ) { if (prev_op == READ) { if (last_error != ERR_FILE_EOF) { @@ -295,6 +300,7 @@ void FileAccessWindows::store_8(uint8_t p_dest) { void FileAccessWindows::store_buffer(const uint8_t *p_src, uint64_t p_length) { ERR_FAIL_COND(!f); ERR_FAIL_COND(!p_src && p_length > 0); + if (flags == READ_WRITE || flags == WRITE_READ) { if (prev_op == READ) { if (last_error != ERR_FILE_EOF) { @@ -307,10 +313,8 @@ void FileAccessWindows::store_buffer(const uint8_t *p_src, uint64_t p_length) { } bool FileAccessWindows::file_exists(const String &p_name) { - FILE *g; - //printf("opening file %s\n", p_fname.utf8().get_data()); String filename = fix_path(p_name); - _wfopen_s(&g, (LPCWSTR)(filename.utf16().get_data()), L"rb"); + FILE *g = _wfsopen((LPCWSTR)(filename.utf16().get_data()), L"rb", _SH_DENYNO); if (g == nullptr) { return false; } else { |