diff options
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.cpp | 28 | ||||
-rw-r--r-- | drivers/vulkan/rendering_device_vulkan.h | 2 | ||||
-rw-r--r-- | thirdparty/README.md | 2 | ||||
-rw-r--r-- | thirdparty/vulkan/patches/VMA-use-volk.patch | 2 | ||||
-rw-r--r-- | thirdparty/vulkan/vk_mem_alloc.h | 1744 |
5 files changed, 1193 insertions, 585 deletions
diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 62de01e8bb..84ca7dbfc2 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -1318,7 +1318,7 @@ const VkImageType RenderingDeviceVulkan::vulkan_image_type[RenderingDevice::TEXT /**** BUFFER MANAGEMENT ****/ /***************************/ -Error RenderingDeviceVulkan::_buffer_allocate(Buffer *p_buffer, uint32_t p_size, uint32_t p_usage, VmaMemoryUsage p_mapping) { +Error RenderingDeviceVulkan::_buffer_allocate(Buffer *p_buffer, uint32_t p_size, uint32_t p_usage, VmaMemoryUsage p_mem_usage, VmaAllocationCreateFlags p_mem_flags) { VkBufferCreateInfo bufferInfo; bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bufferInfo.pNext = nullptr; @@ -1330,8 +1330,8 @@ Error RenderingDeviceVulkan::_buffer_allocate(Buffer *p_buffer, uint32_t p_size, bufferInfo.pQueueFamilyIndices = nullptr; VmaAllocationCreateInfo allocInfo; - allocInfo.flags = 0; - allocInfo.usage = p_mapping; + allocInfo.flags = p_mem_flags; + allocInfo.usage = p_mem_usage; allocInfo.requiredFlags = 0; allocInfo.preferredFlags = 0; allocInfo.memoryTypeBits = 0; @@ -1380,8 +1380,8 @@ Error RenderingDeviceVulkan::_insert_staging_block() { bufferInfo.pQueueFamilyIndices = nullptr; VmaAllocationCreateInfo allocInfo; - allocInfo.flags = 0; - allocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + allocInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; allocInfo.requiredFlags = 0; allocInfo.preferredFlags = 0; allocInfo.memoryTypeBits = 0; @@ -1847,9 +1847,9 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T uint32_t image_size = get_image_format_required_size(p_format.format, p_format.width, p_format.height, p_format.depth, p_format.mipmaps, &width, &height); VmaAllocationCreateInfo allocInfo; - allocInfo.flags = 0; + allocInfo.flags = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT : 0; allocInfo.pool = nullptr; - allocInfo.usage = p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT ? VMA_MEMORY_USAGE_CPU_ONLY : VMA_MEMORY_USAGE_GPU_ONLY; + allocInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; allocInfo.requiredFlags = 0; allocInfo.preferredFlags = 0; allocInfo.memoryTypeBits = 0; @@ -2703,7 +2703,7 @@ Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t //allocate buffer VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; //makes more sense to retrieve Buffer tmp_buffer; - _buffer_allocate(&tmp_buffer, buffer_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_CPU_ONLY); + _buffer_allocate(&tmp_buffer, buffer_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST, VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT); { //Source image barrier VkImageMemoryBarrier image_memory_barrier; @@ -4097,7 +4097,7 @@ RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vec usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; } Buffer buffer; - _buffer_allocate(&buffer, p_size_bytes, usage, VMA_MEMORY_USAGE_GPU_ONLY); + _buffer_allocate(&buffer, p_size_bytes, usage, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0); if (p_data.size()) { uint64_t data_size = p_data.size(); const uint8_t *r = p_data.ptr(); @@ -4259,7 +4259,7 @@ RID RenderingDeviceVulkan::index_buffer_create(uint32_t p_index_count, IndexBuff #else index_buffer.max_index = 0xFFFFFFFF; #endif - _buffer_allocate(&index_buffer, size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY); + _buffer_allocate(&index_buffer, size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0); if (p_data.size()) { uint64_t data_size = p_data.size(); const uint8_t *r = p_data.ptr(); @@ -5371,7 +5371,7 @@ RID RenderingDeviceVulkan::uniform_buffer_create(uint32_t p_size_bytes, const Ve "Creating buffers with data is forbidden during creation of a draw list"); Buffer buffer; - Error err = _buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY); + Error err = _buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0); ERR_FAIL_COND_V(err != OK, RID()); if (p_data.size()) { uint64_t data_size = p_data.size(); @@ -5397,7 +5397,7 @@ RID RenderingDeviceVulkan::storage_buffer_create(uint32_t p_size_bytes, const Ve if (p_usage & STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT) { flags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; } - Error err = _buffer_allocate(&buffer, p_size_bytes, flags, VMA_MEMORY_USAGE_GPU_ONLY); + Error err = _buffer_allocate(&buffer, p_size_bytes, flags, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0); ERR_FAIL_COND_V(err != OK, RID()); if (p_data.size()) { @@ -5423,7 +5423,7 @@ RID RenderingDeviceVulkan::texture_buffer_create(uint32_t p_size_elements, DataF ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != size_bytes, RID()); TextureBuffer texture_buffer; - Error err = _buffer_allocate(&texture_buffer.buffer, size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY); + Error err = _buffer_allocate(&texture_buffer.buffer, size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0); ERR_FAIL_COND_V(err != OK, RID()); if (p_data.size()) { @@ -6170,7 +6170,7 @@ Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) { VkCommandBuffer command_buffer = frames[frame].setup_command_buffer; Buffer tmp_buffer; - _buffer_allocate(&tmp_buffer, buffer->size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_CPU_ONLY); + _buffer_allocate(&tmp_buffer, buffer->size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST, VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT); VkBufferCopy region; region.srcOffset = 0; region.dstOffset = 0; diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index a4d5af91a4..7d9bd19309 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -219,7 +219,7 @@ class RenderingDeviceVulkan : public RenderingDevice { } }; - Error _buffer_allocate(Buffer *p_buffer, uint32_t p_size, uint32_t p_usage, VmaMemoryUsage p_mapping); + Error _buffer_allocate(Buffer *p_buffer, uint32_t p_size, uint32_t p_usage, VmaMemoryUsage p_mem_usage, VmaAllocationCreateFlags p_mem_flags); Error _buffer_free(Buffer *p_buffer); Error _buffer_update(Buffer *p_buffer, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_command_buffer = false, uint32_t p_required_align = 32); diff --git a/thirdparty/README.md b/thirdparty/README.md index ced1165d69..73ed69beaf 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -688,7 +688,7 @@ Files extracted from upstream source: SDK release: https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/master/layers/generated/vk_enum_string_helper.h `vk_mem_alloc.h` is taken from https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator -Version: 3.0.0-development (2022-02-24), commit `dc3f6bb9159df22ceed69c7765ddfb4fbb1b6ed0` +Version: 3.0.1-development (2022-03-28), commit `5b598e0a359381d7e2a94149210a1b7642024ae5` `vk_mem_alloc.cpp` is a Godot file and should be preserved on updates. Patches in the `patches` directory should be re-applied after updates. diff --git a/thirdparty/vulkan/patches/VMA-use-volk.patch b/thirdparty/vulkan/patches/VMA-use-volk.patch index 1b6e0f04b8..eebe0c1bc3 100644 --- a/thirdparty/vulkan/patches/VMA-use-volk.patch +++ b/thirdparty/vulkan/patches/VMA-use-volk.patch @@ -1,5 +1,5 @@ diff --git a/thirdparty/vulkan/vk_mem_alloc.h b/thirdparty/vulkan/vk_mem_alloc.h -index 52b403bede..7c450be211 100644 +index 44affc5ca4..d96f2dacc0 100644 --- a/thirdparty/vulkan/vk_mem_alloc.h +++ b/thirdparty/vulkan/vk_mem_alloc.h @@ -127,7 +127,11 @@ extern "C" { diff --git a/thirdparty/vulkan/vk_mem_alloc.h b/thirdparty/vulkan/vk_mem_alloc.h index 6618f1d1f0..d96f2dacc0 100644 --- a/thirdparty/vulkan/vk_mem_alloc.h +++ b/thirdparty/vulkan/vk_mem_alloc.h @@ -25,7 +25,7 @@ /** \mainpage Vulkan Memory Allocator -<b>Version 3.0.0-development</b> +<b>Version 3.0.1-development (2022-03-28)</b> Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. \n License: MIT @@ -84,11 +84,14 @@ License: MIT - [Custom host memory allocator](@ref custom_memory_allocator) - [Device memory allocation callbacks](@ref allocation_callbacks) - [Device heap memory limit](@ref heap_memory_limit) -- \subpage vk_khr_dedicated_allocation -- \subpage enabling_buffer_device_address -- \subpage vk_amd_device_coherent_memory +- <b>Extension support</b> + - \subpage vk_khr_dedicated_allocation + - \subpage enabling_buffer_device_address + - \subpage vk_ext_memory_priority + - \subpage vk_amd_device_coherent_memory - \subpage general_considerations - [Thread safety](@ref general_considerations_thread_safety) + - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) - [Allocation algorithm](@ref general_considerations_allocation_algorithm) - [Features not supported](@ref general_considerations_features_not_supported) @@ -427,6 +430,7 @@ typedef enum VmaAllocatorCreateFlagBits VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocatorCreateFlagBits; +/// See #VmaAllocatorCreateFlagBits. typedef VkFlags VmaAllocatorCreateFlags; /** @} */ @@ -534,8 +538,7 @@ typedef enum VmaAllocationCreateFlagBits You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense. - - If VmaAllocationCreateInfo::pool is not null, this flag is implied and ignored. */ + */ VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. @@ -548,13 +551,11 @@ typedef enum VmaAllocationCreateFlagBits support it (e.g. Intel GPU). */ VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004, - /// \deprecated Removed. Do not use. - VMA_ALLOCATION_CREATE_RESERVED_1_BIT = 0x00000008, - /// \deprecated Removed. Do not use. - VMA_ALLOCATION_CREATE_RESERVED_2_BIT = 0x00000010, - /** Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a + /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead. + + Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a null-terminated string. Instead of copying pointer value, a local copy of the - string is made and stored in allocation's `pUserData`. The string is automatically + string is made and stored in allocation's `pName`. The string is automatically freed together with the allocation. It is also used in vmaBuildStatsString(). */ VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020, @@ -567,6 +568,10 @@ typedef enum VmaAllocationCreateFlagBits It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions. The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage(). Otherwise it is ignored. + + If you want to make sure the new buffer/image is not tied to the new memory allocation + through `VkMemoryDedicatedAllocateInfoKHR` structure in case the allocation ends up in its own memory block, + use also flag #VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT. */ VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080, /** Create allocation only if additional device memory required for it, if any, won't exceed @@ -591,7 +596,7 @@ typedef enum VmaAllocationCreateFlagBits never read or accessed randomly, so a memory type can be selected that is uncached and write-combined. \warning Violating this declaration may work correctly, but will likely be very slow. - Watch out for implicit reads introduces by doing e.g. `pMappedData[i] += x;` + Watch out for implicit reads introduced by doing e.g. `pMappedData[i] += x;` Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once. */ VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400, @@ -604,7 +609,7 @@ typedef enum VmaAllocationCreateFlagBits This includes allocations created in \ref custom_memory_pools. Declares that mapped memory can be read, written, and accessed in random order, - so a `HOST_CACHED` memory type is preferred. + so a `HOST_CACHED` memory type is required. */ VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, /** @@ -648,6 +653,7 @@ typedef enum VmaAllocationCreateFlagBits VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocationCreateFlagBits; +/// See #VmaAllocationCreateFlagBits. typedef VkFlags VmaAllocationCreateFlags; /// Flags to be passed as VmaPoolCreateInfo::flags. @@ -726,16 +732,17 @@ typedef enum VmaDefragmentationFlagBits VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaDefragmentationFlagBits; +/// See #VmaDefragmentationFlagBits. typedef VkFlags VmaDefragmentationFlags; -/// Operation performed on single defragmentation move. +/// Operation performed on single defragmentation move. See structure #VmaDefragmentationMove. typedef enum VmaDefragmentationMoveOperation { - /// Buffer/image has been recreated at `dstMemory` + `dstOffset`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass(). + /// Buffer/image has been recreated at `dstTmpAllocation`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass(). VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, - /// Set this value if you cannot move the allocation. New place reserved `dstMemory` + `dstOffset` will be freed. `srcAllocation` will remain unchanged. + /// Set this value if you cannot move the allocation. New place reserved at `dstTmpAllocation` will be freed. `srcAllocation` will remain unchanged. VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, - /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved `dstMemory` + `dstOffset` will be freed, along with `srcAllocation`. + /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved at `dstTmpAllocation` will be freed, along with `srcAllocation`, which will be destroyed. VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, } VmaDefragmentationMoveOperation; @@ -789,7 +796,7 @@ typedef enum VmaVirtualAllocationCreateFlagBits /** Allocation strategy that chooses always the lowest offset in available space. This is not the most efficient strategy but achieves highly packed data. */ - VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_PACKED_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT , + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits. @@ -1197,7 +1204,7 @@ typedef struct VmaBudget Fetched from system using VK_EXT_memory_budget extension if enabled. It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors - external to the program, like other programs also consuming system resources. + external to the program, decided by the operating system. Difference `budget - usage` is the amount of additional memory that can probably be allocated without problems. Exceeding the budget may result in various problems. */ @@ -1211,6 +1218,10 @@ typedef struct VmaBudget @{ */ +/** \brief Parameters of new #VmaAllocation. + +To be used with functions like vmaCreateBuffer(), vmaCreateImage(), and many others. +*/ typedef struct VmaAllocationCreateInfo { /// Use #VmaAllocationCreateFlagBits enum. @@ -1337,7 +1348,7 @@ typedef struct VmaAllocationInfo Same memory object can be shared by multiple allocations. - It can change after call to vmaDefragment() if this allocation is passed to the function. + It can change after the allocation is moved during \ref defragmentation. */ VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory; /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation. @@ -1347,7 +1358,7 @@ typedef struct VmaAllocationInfo not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation and apply this offset automatically. - It can change after call to vmaDefragment() if this allocation is passed to the function. + It can change after the allocation is moved during \ref defragmentation. */ VkDeviceSize offset; /** \brief Size of this allocation, in bytes. @@ -1367,7 +1378,7 @@ typedef struct VmaAllocationInfo created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null. It can change after call to vmaMapMemory(), vmaUnmapMemory(). - It can also change after call to vmaDefragment() if this allocation is passed to the function. + It can also change after the allocation is moved during \ref defragmentation. */ void* VMA_NULLABLE pMappedData; /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). @@ -1375,6 +1386,14 @@ typedef struct VmaAllocationInfo It can change after call to vmaSetAllocationUserData() for this allocation. */ void* VMA_NULLABLE pUserData; + /** \brief Custom allocation name that was set with vmaSetAllocationName(). + + It can change after call to vmaSetAllocationName() for this allocation. + + Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with + additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED]. + */ + const char* VMA_NULLABLE pName; } VmaAllocationInfo; /** \brief Parameters for defragmentation. @@ -1409,12 +1428,13 @@ typedef struct VmaDefragmentationMove VmaDefragmentationMoveOperation operation; /// Allocation that should be moved. VmaAllocation VMA_NOT_NULL srcAllocation; - /// Destination memory block where the allocation should be moved. - VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE dstMemory; - /// Destination offset where the allocation should be moved. - VkDeviceSize dstOffset; - /// Internal data used by VMA. Do not use or modify! - void* VMA_NOT_NULL internalData; + /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`. + + \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass, + to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory(). + vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory. + */ + VmaAllocation VMA_NOT_NULL dstTmpAllocation; } VmaDefragmentationMove; /** \brief Parameters for incremental defragmentation steps. @@ -1825,14 +1845,19 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo); -/** +/** \brief Allocates memory suitable for given `VkBuffer`. + \param allocator \param buffer \param pCreateInfo \param[out] pAllocation Handle to allocated memory. \param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). -You should free the memory using vmaFreeMemory(). +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindBufferMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateBuffer(). + +You must free the allocation using vmaFreeMemory() when no longer needed. */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( VmaAllocator VMA_NOT_NULL allocator, @@ -1841,7 +1866,20 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); -/// Function similar to vmaAllocateMemoryForBuffer(). +/** \brief Allocates memory suitable for given `VkImage`. + +\param allocator +\param image +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindImageMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateImage(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( VmaAllocator VMA_NOT_NULL allocator, VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, @@ -1889,15 +1927,8 @@ VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( /** \brief Sets pUserData in given allocation to new value. -If the allocation was created with VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT, -pUserData must be either null, or pointer to a null-terminated string. The function -makes local copy of the string and sets it as allocation's `pUserData`. String -passed as pUserData doesn't need to be valid for whole lifetime of the allocation - -you can free it after this call. String previously pointed by allocation's -pUserData is freed from memory. - -If the flag was not used, the value of pointer `pUserData` is just copied to -allocation's `pUserData`. It is opaque, so you can use it however you want - e.g. +The value of pointer `pUserData` is copied to allocation's `pUserData`. +It is opaque, so you can use it however you want - e.g. as a pointer, ordinal number or some handle to you own data. */ VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( @@ -1905,6 +1936,19 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( VmaAllocation VMA_NOT_NULL allocation, void* VMA_NULLABLE pUserData); +/** \brief Sets pName in given allocation to new value. + +`pName` must be either null, or pointer to a null-terminated string. The function +makes local copy of the string and sets it as allocation's `pName`. String +passed as pName doesn't need to be valid for whole lifetime of the allocation - +you can free it after this call. String previously pointed by allocation's +`pName` is freed from memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName); + /** \brief Given an allocation, returns Property Flags of its memory type. @@ -2092,6 +2136,9 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( \param allocator Allocator object. \param pInfo Structure filled with parameters of defragmentation. \param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation. +\returns +- `VK_SUCCESS` if defragmentation can begin. +- `VK_ERROR_FEATURE_NOT_PRESENT` if defragmentation is not supported. For more information about defragmentation, see documentation chapter: [Defragmentation](@ref defragmentation). @@ -2109,7 +2156,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( Use this function to finish defragmentation started by vmaBeginDefragmentation(). */ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentation( +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( VmaAllocator VMA_NOT_NULL allocator, VmaDefragmentationContext VMA_NOT_NULL context, VmaDefragmentationStats* VMA_NULLABLE pStats); @@ -2226,7 +2273,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, const void* VMA_NULLABLE pNext); -/** +/** \brief Creates a new `VkBuffer`, allocates and binds memory for it. + \param allocator \param pBufferCreateInfo \param pAllocationCreateInfo @@ -2241,7 +2289,7 @@ This function automatically: -# Binds the buffer with the memory. If any of these operations fail, buffer and allocation are not created, -returned value is negative error code, *pBuffer and *pAllocation are null. +returned value is negative error code, `*pBuffer` and `*pAllocation` are null. If the function succeeded, you must destroy both buffer and allocation when you no longer need them using either convenience function vmaDestroyBuffer() or @@ -2282,6 +2330,31 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + /** \brief Destroys Vulkan buffer and frees allocated memory. This is just a convenience function equivalent to: @@ -2307,6 +2380,13 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); +/// Function similar to vmaCreateAliasingBuffer(). +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + /** \brief Destroys Vulkan image and frees allocated memory. This is just a convenience function equivalent to: @@ -2493,6 +2573,7 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( #include <cstdlib> #include <cstring> #include <utility> +#include <type_traits> #ifdef _MSC_VER #include <intrin.h> // For functions like __popcnt, _BitScanForward etc. @@ -2689,6 +2770,11 @@ static void vma_aligned_free(void* VMA_NULLABLE ptr) #endif #endif +#ifndef VMA_COUNT_BITS_SET + // Returns number of bits set to 1 in (v) + #define VMA_COUNT_BITS_SET(v) VmaCountBitsSet(v) +#endif + #ifndef VMA_BITSCAN_LSB // Scans integer for index of first nonzero value from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX #define VMA_BITSCAN_LSB(mask) VmaBitScanLSB(mask) @@ -3077,22 +3163,29 @@ class VmaAllocationObjectAllocator; #endif // _VMA_FORWARD_DECLARATIONS -#ifndef _VMA_FUNCTIONS -// Returns number of bits set to 1 in (v). -static inline uint32_t VmaCountBitsSet(uint32_t v) -{ -#ifdef _MSC_VER +#ifndef _VMA_FUNCTIONS + +/* +Returns number of bits set to 1 in (v). + +On specific platforms and compilers you can use instrinsics like: + +Visual Studio: return __popcnt(v); -#elif defined __GNUC__ || defined __clang__ +GCC, Clang: return static_cast<uint32_t>(__builtin_popcount(v)); -#else + +Define macro VMA_COUNT_BITS_SET to provide your optimized implementation. +But you need to check in runtime whether user's CPU supports these, as some old processors don't. +*/ +static inline uint32_t VmaCountBitsSet(uint32_t v) +{ uint32_t c = v - ((v >> 1) & 0x55555555); c = ((c >> 2) & 0x33333333) + (c & 0x33333333); c = ((c >> 4) + c) & 0x0F0F0F0F; c = ((c >> 8) + c) & 0x00FF00FF; c = ((c >> 16) + c) & 0x0000FFFF; return c; -#endif } static inline uint8_t VmaBitScanLSB(uint64_t mask) @@ -3631,7 +3724,11 @@ static bool FindMemoryPreferences( // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory if(deviceAccess) { - outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + // ...unless there is a clear preference from the user not to do so. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; } // No direct GPU access, no CPU access, just transfers. // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or @@ -5399,6 +5496,7 @@ public: // Posts next part of an open string. The number is converted to decimal characters. void ContinueString(uint32_t n); void ContinueString(uint64_t n); + void ContinueString_Size(size_t n); // Posts next part of an open string. Pointer value is converted to characters // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00 void ContinueString_Pointer(const void* ptr); @@ -5408,6 +5506,7 @@ public: // Writes a number value. void WriteNumber(uint32_t n); void WriteNumber(uint64_t n); + void WriteSize(size_t n); // Writes a boolean value - false or true. void WriteBool(bool b); // Writes a null value. @@ -5432,6 +5531,11 @@ private: VmaVector< StackItem, VmaStlAllocator<StackItem> > m_Stack; bool m_InsideString; + // Write size_t for less than 64bits + void WriteSize(size_t n, std::integral_constant<bool, false>) { m_SB.AddNumber(static_cast<uint32_t>(n)); } + // Write size_t for 64bits + void WriteSize(size_t n, std::integral_constant<bool, true>) { m_SB.AddNumber(static_cast<uint64_t>(n)); } + void BeginValue(bool isString); void WriteIndent(bool oneLess = false); }; @@ -5574,6 +5678,14 @@ void VmaJsonWriter::ContinueString(uint64_t n) m_SB.AddNumber(n); } +void VmaJsonWriter::ContinueString_Size(size_t n) +{ + VMA_ASSERT(m_InsideString); + // Fix for AppleClang incorrect type casting + // TODO: Change to if constexpr when C++17 used as minimal standard + WriteSize(n, std::is_same<size_t, uint64_t>{}); +} + void VmaJsonWriter::ContinueString_Pointer(const void* ptr) { VMA_ASSERT(m_InsideString); @@ -5605,6 +5717,15 @@ void VmaJsonWriter::WriteNumber(uint64_t n) m_SB.AddNumber(n); } +void VmaJsonWriter::WriteSize(size_t n) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + // Fix for AppleClang incorrect type casting + // TODO: Change to if constexpr when C++17 used as minimal standard + WriteSize(n, std::is_same<size_t, uint64_t>{}); +} + void VmaJsonWriter::WriteBool(bool b) { VMA_ASSERT(!m_InsideString); @@ -5673,41 +5794,29 @@ static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedSta json.WriteString("BlockCount"); json.WriteNumber(stat.statistics.blockCount); - - json.WriteString("AllocationCount"); - json.WriteNumber(stat.statistics.allocationCount); - - json.WriteString("UnusedRangeCount"); - json.WriteNumber(stat.unusedRangeCount); - json.WriteString("BlockBytes"); json.WriteNumber(stat.statistics.blockBytes); - + json.WriteString("AllocationCount"); + json.WriteNumber(stat.statistics.allocationCount); json.WriteString("AllocationBytes"); json.WriteNumber(stat.statistics.allocationBytes); + json.WriteString("UnusedRangeCount"); + json.WriteNumber(stat.unusedRangeCount); if (stat.statistics.allocationCount > 1) { - json.WriteString("AllocationSize"); - json.BeginObject(true); - json.WriteString("Min"); + json.WriteString("AllocationSizeMin"); json.WriteNumber(stat.allocationSizeMin); - json.WriteString("Max"); + json.WriteString("AllocationSizeMax"); json.WriteNumber(stat.allocationSizeMax); - json.EndObject(); } - if (stat.unusedRangeCount > 1) { - json.WriteString("UnusedRangeSize"); - json.BeginObject(true); - json.WriteString("Min"); + json.WriteString("UnusedRangeSizeMin"); json.WriteNumber(stat.unusedRangeSizeMin); - json.WriteString("Max"); + json.WriteString("UnusedRangeSizeMax"); json.WriteNumber(stat.unusedRangeSizeMax); - json.EndObject(); } - json.EndObject(); } #endif // _VMA_JSON_WRITER @@ -5799,9 +5908,14 @@ private: void PostMinorCounter() { if(m_MinorCounter < m_MajorCounter) + { ++m_MinorCounter; + } else if(m_MajorCounter > 0) - --m_MajorCounter, --m_MinorCounter; + { + --m_MajorCounter; + --m_MinorCounter; + } } }; @@ -5899,9 +6013,8 @@ struct VmaAllocation_T enum FLAGS { - FLAG_USER_DATA_STRING = 0x01, - FLAG_PERSISTENT_MAP = 0x02, - FLAG_MAPPING_ALLOWED = 0x04, + FLAG_PERSISTENT_MAP = 0x01, + FLAG_MAPPING_ALLOWED = 0x02, }; public: @@ -5913,7 +6026,7 @@ public: }; // This struct is allocated using VmaPoolAllocator. - VmaAllocation_T(bool userDataString, bool mappingAllowed); + VmaAllocation_T(bool mappingAllowed); ~VmaAllocation_T(); void InitBlockAllocation( @@ -5936,8 +6049,8 @@ public: ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } VkDeviceSize GetAlignment() const { return m_Alignment; } VkDeviceSize GetSize() const { return m_Size; } - bool IsUserDataString() const { return (m_Flags & FLAG_USER_DATA_STRING) != 0; } void* GetUserData() const { return m_pUserData; } + const char* GetName() const { return m_pName; } VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; } VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; } @@ -5945,8 +6058,10 @@ public: bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; } bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; } - void SetUserData(VmaAllocator hAllocator, void* pUserData); - void SwapBlockAllocation(VmaAllocation allocation); + void SetUserData(VmaAllocator hAllocator, void* pUserData) { m_pUserData = pUserData; } + void SetName(VmaAllocator hAllocator, const char* pName); + void FreeName(VmaAllocator hAllocator); + uint8_t SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation); VmaAllocHandle GetAllocHandle() const; VkDeviceSize GetOffset() const; VmaPool GetParentPool() const; @@ -5992,6 +6107,7 @@ private: VkDeviceSize m_Alignment; VkDeviceSize m_Size; void* m_pUserData; + char* m_pName; uint32_t m_MemoryTypeIndex; uint8_t m_Type; // ALLOCATION_TYPE uint8_t m_SuballocationType; // VmaSuballocationType @@ -6001,8 +6117,6 @@ private: #if VMA_STATS_STRING_ENABLED uint32_t m_BufferImageUsage; // 0 if unknown. #endif - - void FreeUserDataString(VmaAllocator hAllocator); }; #endif // _VMA_ALLOCATION_T @@ -6239,6 +6353,7 @@ public: // Validates all data structures inside this object. If not valid, returns false. virtual bool Validate() const = 0; virtual size_t GetAllocationCount() const = 0; + virtual size_t GetFreeRegionsCount() const = 0; virtual VkDeviceSize GetSumFreeSize() const = 0; // Returns true if this block is empty - contains only single free suballocation. virtual bool IsEmpty() const = 0; @@ -6248,14 +6363,14 @@ public: virtual VmaAllocHandle GetAllocationListBegin() const = 0; virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0; + virtual VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const = 0; // Shouldn't modify blockCount. virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0; virtual void AddStatistics(VmaStatistics& inoutStats) const = 0; #if VMA_STATS_STRING_ENABLED - // mapRefCount == UINT32_MAX means unspecified. - virtual void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const = 0; + virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0; #endif // Tries to find a place for suballocation with given parameters inside this block. @@ -6299,8 +6414,7 @@ protected: void PrintDetailedMap_Begin(class VmaJsonWriter& json, VkDeviceSize unusedBytes, size_t allocationCount, - size_t unusedRangeCount, - uint32_t mapRefCount) const; + size_t unusedRangeCount) const; void PrintDetailedMap_Allocation(class VmaJsonWriter& json, VkDeviceSize offset, VkDeviceSize size, void* userData) const; void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, @@ -6336,35 +6450,17 @@ void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size VmaAllocation allocation = reinterpret_cast<VmaAllocation>(userData); userData = allocation->GetUserData(); + const char* name = allocation->GetName(); #if VMA_STATS_STRING_ENABLED - if (userData != VMA_NULL && allocation->IsUserDataString()) - { - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %s; Type: %s; Usage: %u", - offset, size, reinterpret_cast<const char*>(userData), - VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], - allocation->GetBufferImageUsage()); - } - else - { - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Type: %s; Usage: %u", - offset, size, userData, - VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], - allocation->GetBufferImageUsage()); - } + VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Name: %s; Type: %s; Usage: %u", + offset, size, userData, name ? name : "vma_empty", + VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], + allocation->GetBufferImageUsage()); #else - if (userData != VMA_NULL && allocation->IsUserDataString()) - { - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %s; Type: %u", - offset, size, reinterpret_cast<const char*>(userData), - (uint32_t)allocation->GetSuballocationType()); - } - else - { - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Type: %u", - offset, size, userData, - (uint32_t)allocation->GetSuballocationType()); - } + VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Name: %s; Type: %u", + offset, size, userData, name ? name : "vma_empty", + (uint32_t)allocation->GetSuballocationType()); #endif // VMA_STATS_STRING_ENABLED } @@ -6372,27 +6468,19 @@ void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size #if VMA_STATS_STRING_ENABLED void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, - VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount, uint32_t mapRefCount) const + VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const { - json.BeginObject(); - json.WriteString("TotalBytes"); json.WriteNumber(GetSize()); json.WriteString("UnusedBytes"); - json.WriteNumber(unusedBytes); + json.WriteSize(unusedBytes); json.WriteString("Allocations"); - json.WriteNumber((uint64_t)allocationCount); + json.WriteSize(allocationCount); json.WriteString("UnusedRanges"); - json.WriteNumber((uint64_t)unusedRangeCount); - - if(mapRefCount != UINT32_MAX) - { - json.WriteString("MapRefCount"); - json.WriteNumber(mapRefCount); - } + json.WriteSize(unusedRangeCount); json.WriteString("Suballocations"); json.BeginArray(); @@ -6408,15 +6496,11 @@ void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json, if (IsVirtual()) { - json.WriteString("Type"); - json.WriteString("VirtualAllocation"); - json.WriteString("Size"); json.WriteNumber(size); - - if (userData != VMA_NULL) + if (userData) { - json.WriteString("UserData"); + json.WriteString("CustomData"); json.BeginString(); json.ContinueString_Pointer(userData); json.EndString(); @@ -6450,7 +6534,6 @@ void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const { json.EndArray(); - json.EndObject(); } #endif // VMA_STATS_STRING_ENABLED #endif // _VMA_BLOCK_METADATA_FUNCTIONS @@ -7549,12 +7632,13 @@ public: void Init(VkDeviceSize size) override; bool Validate() const override; size_t GetAllocationCount() const override; + size_t GetFreeRegionsCount() const override; void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; + void PrintDetailedMap(class VmaJsonWriter& json) const override; #endif bool CreateAllocationRequest( @@ -7577,6 +7661,7 @@ public: void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; VmaAllocHandle GetAllocationListBegin() const override; VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; void DebugLogAllAllocations() const override; @@ -7815,6 +7900,13 @@ size_t VmaBlockMetadata_Linear::GetAllocationCount() const AccessSuballocations2nd().size() - m_2ndNullItemsCount; } +size_t VmaBlockMetadata_Linear::GetFreeRegionsCount() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return SIZE_MAX; +} + void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const { const VkDeviceSize size = GetSize(); @@ -8133,7 +8225,7 @@ void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const +void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const { const VkDeviceSize size = GetSize(); const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); @@ -8295,7 +8387,7 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json, uint32 } const VkDeviceSize unusedBytes = size - usedBytes; - PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount, mapRefCount); + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); // SECOND PASS lastOffset = 0; @@ -8686,6 +8778,13 @@ VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAll return VK_NULL_HANDLE; } +VkDeviceSize VmaBlockMetadata_Linear::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return 0; +} + void VmaBlockMetadata_Linear::Clear() { m_SumFreeSize = GetSize(); @@ -9884,6 +9983,7 @@ public: virtual ~VmaBlockMetadata_TLSF(); size_t GetAllocationCount() const override { return m_AllocCount; } + size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } bool IsEmpty() const override { return m_NullBlock->offset == 0; } VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; }; @@ -9895,7 +9995,7 @@ public: void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; + void PrintDetailedMap(class VmaJsonWriter& json) const override; #endif bool CreateAllocationRequest( @@ -9917,6 +10017,7 @@ public: void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; VmaAllocHandle GetAllocationListBegin() const override; VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; void DebugLogAllAllocations() const override; @@ -10167,7 +10268,7 @@ void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const +void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const { size_t blockCount = m_AllocCount + m_BlocksFreeCount; VmaStlAllocator<Block*> allocator(GetAllocationCallbacks()); @@ -10184,12 +10285,10 @@ void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json, uint32_t VmaClearDetailedStatistics(stats); AddDetailedStatistics(stats); - PrintDetailedMap_Begin( - json, + PrintDetailedMap_Begin(json, stats.statistics.blockBytes - stats.statistics.allocationBytes, stats.statistics.allocationCount, - stats.unusedRangeCount, - mapRefCount); + stats.unusedRangeCount); for (; i < blockCount; ++i) { @@ -10593,6 +10692,16 @@ VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc return VK_NULL_HANDLE; } +VkDeviceSize VmaBlockMetadata_TLSF::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + Block* block = (Block*)alloc; + VMA_ASSERT(!block->IsFree() && "Incorrect block!"); + + if (block->prevPhysical) + return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0; + return 0; +} + void VmaBlockMetadata_TLSF::Clear() { m_AllocCount = 0; @@ -10837,6 +10946,7 @@ public: size_t GetBlockCount() const { return m_Blocks.size(); } // To be used only while the m_Mutex is locked. Used during defragmentation. VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } + VMA_RW_MUTEX &GetMutex() { return m_Mutex; } VkResult CreateMinBlocks(); void AddStatistics(VmaStatistics& inoutStats); @@ -10852,7 +10962,7 @@ public: size_t allocationCount, VmaAllocation* pAllocations); - void Free(const VmaAllocation hAllocation, bool incrementalSort = true); + void Free(const VmaAllocation hAllocation); #if VMA_STATS_STRING_ENABLED void PrintDetailedMap(class VmaJsonWriter& json); @@ -10878,6 +10988,9 @@ private: // Incrementally sorted by sumFreeSize, ascending. VmaVector<VmaDeviceMemoryBlock*, VmaStlAllocator<VmaDeviceMemoryBlock*>> m_Blocks; uint32_t m_NextBlockId; + bool m_IncrementalSort = true; + + void SetIncrementalSort(bool val) { m_IncrementalSort = val; } VkDeviceSize CalcMaxBlockSize() const; // Finds and removes given block from vector. @@ -10928,17 +11041,26 @@ public: const VmaDefragmentationInfo& info); ~VmaDefragmentationContext_T(); - void GetStats(VmaDefragmentationStats& outStats) { outStats = m_Stats; } + void GetStats(VmaDefragmentationStats& outStats) { outStats = m_GlobalStats; } VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo); VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo); private: - struct ImmovableBlock + // Max number of allocations to ignore due to size constraints before ending single pass + static const uint8_t MAX_ALLOCS_TO_IGNORE = 16; + enum class CounterStatus { Pass, Ignore, End }; + + struct FragmentedBlock { - uint32_t vectorIndex; + uint32_t data; VmaDeviceMemoryBlock* block; }; + struct StateBalanced + { + VkDeviceSize avgFreeSize = 0; + VkDeviceSize avgAllocSize = UINT64_MAX; + }; struct StateExtensive { enum class Operation : uint8_t @@ -10966,25 +11088,29 @@ private: VmaStlAllocator<VmaDefragmentationMove> m_MoveAllocator; VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>> m_Moves; + uint8_t m_IgnoredAllocs = 0; uint32_t m_Algorithm; uint32_t m_BlockVectorCount; VmaBlockVector* m_PoolBlockVector; VmaBlockVector** m_pBlockVectors; size_t m_ImmovableBlockCount = 0; - VmaDefragmentationStats m_Stats = { 0 }; + VmaDefragmentationStats m_GlobalStats = { 0 }; + VmaDefragmentationStats m_PassStats = { 0 }; void* m_AlgorithmState = VMA_NULL; static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata); - bool IncrementCounters(uint32_t& allocations, VkDeviceSize bytes); + CounterStatus CheckCounters(VkDeviceSize bytes); + bool IncrementCounters(VkDeviceSize bytes); bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block); bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector); bool ComputeDefragmentation(VmaBlockVector& vector, size_t index); bool ComputeDefragmentation_Fast(VmaBlockVector& vector); - bool ComputeDefragmentation_Balanced(VmaBlockVector& vector); + bool ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update); bool ComputeDefragmentation_Full(VmaBlockVector& vector); bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index); + void UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state); bool MoveDataToFreeBlocks(VmaSuballocationType currentType, VmaBlockVector& vector, size_t firstFreeBlock, bool& texturePresent, bool& bufferPresent, bool& otherPresent); @@ -11259,8 +11385,9 @@ void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) if (detailedMap) { json.WriteString("Details"); - m_Metadata->PrintDetailedMap(json, - UINT32_MAX); // mapRefCount + json.BeginObject(); + m_Metadata->PrintDetailedMap(json); + json.EndObject(); } json.EndObject(); @@ -11868,18 +11995,17 @@ VkResult VmaDeviceMemoryBlock::BindImageMemory( #endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS #ifndef _VMA_ALLOCATION_T_FUNCTIONS -VmaAllocation_T::VmaAllocation_T(bool userDataString, bool mappingAllowed) +VmaAllocation_T::VmaAllocation_T(bool mappingAllowed) : m_Alignment{ 1 }, m_Size{ 0 }, m_pUserData{ VMA_NULL }, + m_pName{ VMA_NULL }, m_MemoryTypeIndex{ 0 }, m_Type{ (uint8_t)ALLOCATION_TYPE_NONE }, m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN }, m_MapCount{ 0 }, m_Flags{ 0 } { - if(userDataString) - m_Flags |= (uint8_t)FLAG_USER_DATA_STRING; if(mappingAllowed) m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; @@ -11893,7 +12019,7 @@ VmaAllocation_T::~VmaAllocation_T() VMA_ASSERT(m_MapCount == 0 && "Allocation was not unmapped before destruction."); // Check if owned string was freed. - VMA_ASSERT((IsUserDataString() && m_pUserData == VMA_NULL) || !IsUserDataString()); + VMA_ASSERT(m_pName == VMA_NULL); } void VmaAllocation_T::InitBlockAllocation( @@ -11948,31 +12074,25 @@ void VmaAllocation_T::InitDedicatedAllocation( m_DedicatedAllocation.m_Next = VMA_NULL; } -void VmaAllocation_T::SetUserData(VmaAllocator hAllocator, void* pUserData) +void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName) { - if (IsUserDataString()) - { - VMA_ASSERT(pUserData == VMA_NULL || pUserData != m_pUserData); + VMA_ASSERT(pName == VMA_NULL || pName != m_pName); - FreeUserDataString(hAllocator); + FreeName(hAllocator); - if (pUserData != VMA_NULL) - { - m_pUserData = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), (const char*)pUserData); - } - } - else - { - m_pUserData = pUserData; - } + if (pName != VMA_NULL) + m_pName = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), pName); } -void VmaAllocation_T::SwapBlockAllocation(VmaAllocation allocation) +uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation) { VMA_ASSERT(allocation != VMA_NULL); VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK); + if (m_MapCount != 0) + m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount); + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation); VMA_SWAP(m_BlockAllocation, allocation->m_BlockAllocation); m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this); @@ -11980,6 +12100,7 @@ void VmaAllocation_T::SwapBlockAllocation(VmaAllocation allocation) #if VMA_STATS_STRING_ENABLED VMA_SWAP(m_BufferImageUsage, allocation->m_BufferImageUsage); #endif + return m_MapCount; } VmaAllocHandle VmaAllocation_T::GetAllocHandle() const @@ -12165,35 +12286,31 @@ void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const json.WriteString("Size"); json.WriteNumber(m_Size); + json.WriteString("Usage"); + json.WriteNumber(m_BufferImageUsage); if (m_pUserData != VMA_NULL) { - json.WriteString("UserData"); - if (IsUserDataString()) - { - json.WriteString((const char*)m_pUserData); - } - else - { - json.BeginString(); - json.ContinueString_Pointer(m_pUserData); - json.EndString(); - } + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(m_pUserData); + json.EndString(); } - - if (m_BufferImageUsage != 0) + if (m_pName != VMA_NULL) { - json.WriteString("Usage"); - json.WriteNumber(m_BufferImageUsage); + json.WriteString("Name"); + json.WriteString(m_pName); } } #endif // VMA_STATS_STRING_ENABLED -void VmaAllocation_T::FreeUserDataString(VmaAllocator hAllocator) +void VmaAllocation_T::FreeName(VmaAllocator hAllocator) { - VMA_ASSERT(IsUserDataString()); - VmaFreeString(hAllocator->GetAllocationCallbacks(), (char*)m_pUserData); - m_pUserData = VMA_NULL; + if(m_pName) + { + VmaFreeString(hAllocator->GetAllocationCallbacks(), m_pName); + m_pName = VMA_NULL; + } } #endif // _VMA_ALLOCATION_T_FUNCTIONS @@ -12542,9 +12659,7 @@ VkResult VmaBlockVector::AllocatePage( return VK_ERROR_OUT_OF_DEVICE_MEMORY; } -void VmaBlockVector::Free( - const VmaAllocation hAllocation, - bool incrementalSort) +void VmaBlockVector::Free(const VmaAllocation hAllocation) { VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; @@ -12604,8 +12719,7 @@ void VmaBlockVector::Free( } } - if (incrementalSort) - IncrementallySortBlocks(); + IncrementallySortBlocks(); } // Destruction of a free block. Deferred until this point, outside of mutex @@ -12650,6 +12764,8 @@ void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock) void VmaBlockVector::IncrementallySortBlocks() { + if (!m_IncrementalSort) + return; if (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) { // Bubble sort only until first swap. @@ -12724,7 +12840,7 @@ VkResult VmaBlockVector::CommitAllocationRequest( } } - *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isUserDataString, isMappingAllowed); + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isMappingAllowed); pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation); (*pAllocation)->InitBlockAllocation( pBlock, @@ -12735,7 +12851,10 @@ VkResult VmaBlockVector::CommitAllocationRequest( suballocType, mapped); VMA_HEAVY_ASSERT(pBlock->Validate()); - (*pAllocation)->SetUserData(m_hAllocator, pUserData); + if (isUserDataString) + (*pAllocation)->SetName(m_hAllocator, (const char*)pUserData); + else + (*pAllocation)->SetUserData(m_hAllocator, pUserData); m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size); if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) { @@ -12770,6 +12889,7 @@ VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIn VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; if (m_hAllocator->m_UseExtMemoryPriority) { + VMA_ASSERT(m_Priority >= 0.f && m_Priority <= 1.f); priorityInfo.priority = m_Priority; VmaPnextChainPushFront(&allocInfo, &priorityInfo); } @@ -12833,50 +12953,7 @@ void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) { VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); - if (IsCustomPool()) - { - const char* poolName = m_hParentPool->GetName(); - if (poolName != VMA_NULL && poolName[0] != '\0') - { - json.WriteString("Name"); - json.WriteString(poolName); - } - - json.WriteString("MemoryTypeIndex"); - json.WriteNumber(m_MemoryTypeIndex); - - json.WriteString("BlockSize"); - json.WriteNumber(m_PreferredBlockSize); - - json.WriteString("BlockCount"); - json.BeginObject(true); - if (m_MinBlockCount > 0) - { - json.WriteString("Min"); - json.WriteNumber((uint64_t)m_MinBlockCount); - } - if (m_MaxBlockCount < SIZE_MAX) - { - json.WriteString("Max"); - json.WriteNumber((uint64_t)m_MaxBlockCount); - } - json.WriteString("Cur"); - json.WriteNumber((uint64_t)m_Blocks.size()); - json.EndObject(); - - if (m_Algorithm != 0) - { - json.WriteString("Algorithm"); - json.WriteString(VmaAlgorithmToStr(m_Algorithm)); - } - } - else - { - json.WriteString("PreferredBlockSize"); - json.WriteNumber(m_PreferredBlockSize); - } - json.WriteString("Blocks"); json.BeginObject(); for (size_t i = 0; i < m_Blocks.size(); ++i) { @@ -12884,7 +12961,12 @@ void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) json.ContinueString(m_Blocks[i]->GetId()); json.EndString(); - m_Blocks[i]->m_pMetadata->PrintDetailedMap(json, m_Blocks[i]->GetMapRefCount()); + json.BeginObject(); + json.WriteString("MapRefCount"); + json.WriteNumber(m_Blocks[i]->GetMapRefCount()); + + m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); + json.EndObject(); } json.EndObject(); } @@ -12929,6 +13011,7 @@ VmaDefragmentationContext_T::VmaDefragmentationContext_T( m_BlockVectorCount = 1; m_PoolBlockVector = &info.pool->m_BlockVector; m_pBlockVectors = &m_PoolBlockVector; + m_PoolBlockVector->SetIncrementalSort(false); m_PoolBlockVector->SortByFreeSize(); } else @@ -12940,12 +13023,22 @@ VmaDefragmentationContext_T::VmaDefragmentationContext_T( { VmaBlockVector* vector = m_pBlockVectors[i]; if (vector != VMA_NULL) + { + vector->SetIncrementalSort(false); vector->SortByFreeSize(); + } } } switch (m_Algorithm) { + case 0: // Default algorithm + m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + { + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + } case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: { if (hAllocator->GetBufferImageGranularity() > 1) @@ -12959,10 +13052,27 @@ VmaDefragmentationContext_T::VmaDefragmentationContext_T( VmaDefragmentationContext_T::~VmaDefragmentationContext_T() { + if (m_PoolBlockVector != VMA_NULL) + { + m_PoolBlockVector->SetIncrementalSort(true); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + vector->SetIncrementalSort(true); + } + } + if (m_AlgorithmState) { switch (m_Algorithm) { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast<StateBalanced*>(m_AlgorithmState), m_BlockVectorCount); + break; case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast<StateExtensive*>(m_AlgorithmState), m_BlockVectorCount); break; @@ -12976,6 +13086,8 @@ VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPass { if (m_PoolBlockVector != VMA_NULL) { + VmaMutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->GetAllocator()->m_UseMutex); + if (m_PoolBlockVector->GetBlockCount() > 1) ComputeDefragmentation(*m_PoolBlockVector, 0); else if (m_PoolBlockVector->GetBlockCount() == 1) @@ -12987,6 +13099,8 @@ VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPass { if (m_pBlockVectors[i] != VMA_NULL) { + VmaMutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->GetAllocator()->m_UseMutex); + if (m_pBlockVectors[i]->GetBlockCount() > 1) { if (ComputeDefragmentation(*m_pBlockVectors[i], i)) @@ -13017,7 +13131,11 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true); VkResult result = VK_SUCCESS; - VmaVector<ImmovableBlock, VmaStlAllocator<ImmovableBlock>> immovableBlocks(VmaStlAllocator<ImmovableBlock>(m_MoveAllocator.m_pCallbacks)); + VmaStlAllocator<FragmentedBlock> blockAllocator(m_MoveAllocator.m_pCallbacks); + VmaVector<FragmentedBlock, VmaStlAllocator<FragmentedBlock>> immovableBlocks(blockAllocator); + VmaVector<FragmentedBlock, VmaStlAllocator<FragmentedBlock>> mappedBlocks(blockAllocator); + + VmaAllocator allocator = VMA_NULL; for (uint32_t i = 0; i < moveInfo.moveCount; ++i) { VmaDefragmentationMove& move = moveInfo.pMoves[i]; @@ -13037,29 +13155,54 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo vector = m_pBlockVectors[vectorIndex]; VMA_ASSERT(vector != VMA_NULL); } - - VmaAllocation dst = reinterpret_cast<VmaAllocation>(move.internalData); + switch (move.operation) { case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY: { - move.srcAllocation->SwapBlockAllocation(dst); - prevCount = vector->GetBlockCount(); - freedBlockSize = dst->GetBlock()->m_pMetadata->GetSize(); - vector->Free(dst, false); - currentCount = vector->GetBlockCount(); + uint8_t mapCount = move.srcAllocation->SwapBlockAllocation(vector->m_hAllocator, move.dstTmpAllocation); + if (mapCount > 0) + { + allocator = vector->m_hAllocator; + VmaDeviceMemoryBlock* newMapBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (FragmentedBlock& block : mappedBlocks) + { + if (block.block == newMapBlock) + { + notPresent = false; + block.data += mapCount; + break; + } + } + if (notPresent) + mappedBlocks.push_back({ mapCount, newMapBlock }); + } + + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } result = VK_INCOMPLETE; break; } case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE: { - m_Stats.bytesMoved -= move.srcAllocation->GetSize(); - vector->Free(dst, false); + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + vector->Free(move.dstTmpAllocation); VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock(); bool notPresent = true; - for (const ImmovableBlock& block : immovableBlocks) + for (const FragmentedBlock& block : immovableBlocks) { if (block.block == newBlock) { @@ -13073,16 +13216,32 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo } case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY: { - prevCount = vector->GetBlockCount(); - freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize(); - vector->Free(move.srcAllocation, false); - currentCount = vector->GetBlockCount(); + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.srcAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } freedBlockSize *= prevCount - currentCount; - VkDeviceSize dstBlockSize = dst->GetBlock()->m_pMetadata->GetSize(); - vector->Free(dst, false); - freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); - currentCount = vector->GetBlockCount(); + VkDeviceSize dstBlockSize; + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + dstBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); + currentCount = vector->GetBlockCount(); + } result = VK_INCOMPLETE; break; @@ -13094,8 +13253,8 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo if (prevCount > currentCount) { size_t freedBlocks = prevCount - currentCount; - m_Stats.deviceMemoryBlocksFreed += static_cast<uint32_t>(freedBlocks); - m_Stats.bytesFreed += freedBlockSize; + m_PassStats.deviceMemoryBlocksFreed += static_cast<uint32_t>(freedBlocks); + m_PassStats.bytesFreed += freedBlockSize; } switch (m_Algorithm) @@ -13108,9 +13267,15 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[vectorIndex]; if (state.firstFreeBlock != SIZE_MAX) { - state.firstFreeBlock -= prevCount - currentCount; - if (state.firstFreeBlock != 0) - state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); + const size_t diff = prevCount - currentCount; + if (state.firstFreeBlock >= diff) + { + state.firstFreeBlock -= diff; + if (state.firstFreeBlock != 0) + state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); + } + else + state.firstFreeBlock = 0; } } } @@ -13120,6 +13285,13 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo moveInfo.pMoves = VMA_NULL; m_Moves.clear(); + // Update stats + m_GlobalStats.allocationsMoved += m_PassStats.allocationsMoved; + m_GlobalStats.bytesFreed += m_PassStats.bytesFreed; + m_GlobalStats.bytesMoved += m_PassStats.bytesMoved; + m_GlobalStats.deviceMemoryBlocksFreed += m_PassStats.deviceMemoryBlocksFreed; + m_PassStats = { 0 }; + // Move blocks with immovable allocations according to algorithm if (immovableBlocks.size() > 0) { @@ -13131,12 +13303,14 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo { bool swapped = false; // Move to the start of free blocks range - for (const ImmovableBlock& block : immovableBlocks) + for (const FragmentedBlock& block : immovableBlocks) { - StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[block.vectorIndex]; + StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[block.data]; if (state.operation != StateExtensive::Operation::Cleanup) { - VmaBlockVector* vector = m_pBlockVectors[block.vectorIndex]; + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) { if (vector->GetBlock(i) == block.block) @@ -13144,9 +13318,12 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); if (state.firstFreeBlock != SIZE_MAX) { - if (i < state.firstFreeBlock - 1) + if (i + 1 < state.firstFreeBlock) { - VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); + if (state.firstFreeBlock > 1) + VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); + else + --state.firstFreeBlock; } } swapped = true; @@ -13163,10 +13340,12 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo default: { // Move to the begining - for (const ImmovableBlock& block : immovableBlocks) + for (const FragmentedBlock& block : immovableBlocks) { - VmaBlockVector* vector = m_pBlockVectors[block.vectorIndex]; - for (size_t i = m_ImmovableBlockCount; vector->GetBlockCount(); ++i) + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i) { if (vector->GetBlock(i) == block.block) { @@ -13179,6 +13358,13 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo } } } + + // Bulk-map destination blocks + for (const FragmentedBlock& block : mappedBlocks) + { + VkResult res = block.block->Map(allocator, block.data, VMA_NULL); + VMA_ASSERT(res == VK_SUCCESS); + } return result; } @@ -13188,9 +13374,10 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, { case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT: return ComputeDefragmentation_Fast(vector); - default: // Default algoritm + default: + VMA_ASSERT(0); case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: - return ComputeDefragmentation_Balanced(vector); + return ComputeDefragmentation_Balanced(vector, index, true); case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT: return ComputeDefragmentation_Full(vector); case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: @@ -13216,12 +13403,27 @@ VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::Get return moveData; } -bool VmaDefragmentationContext_T::IncrementCounters(uint32_t& allocations, VkDeviceSize bytes) +VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes) { - if (++allocations >= m_MaxPassAllocations || bytes >= m_MaxPassBytes) + // Ignore allocation if will exceed max size for copy + if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes) { - m_Stats.bytesMoved += bytes; - m_Stats.allocationsMoved += allocations; + if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE) + return CounterStatus::Ignore; + else + return CounterStatus::End; + } + return CounterStatus::Pass; +} + +bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes) +{ + m_PassStats.bytesMoved += bytes; + // Early return when max found + if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes) + { + VMA_ASSERT(m_PassStats.allocationsMoved == m_MaxPassAllocations || + m_PassStats.bytesMoved == m_MaxPassBytes && "Exceeded maximal pass threshold!"); return true; } return false; @@ -13229,8 +13431,6 @@ bool VmaDefragmentationContext_T::IncrementCounters(uint32_t& allocations, VkDev bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block) { - VkDeviceSize currentBytesMoved = 0; - uint32_t currentAllocsMoved = 0; VmaBlockMetadata* metadata = block->m_pMetadata; for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); @@ -13241,8 +13441,18 @@ bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, Vma // Ignore newly created allocations by defragmentation algorithm if (moveData.move.srcAllocation->GetUserData() == this) continue; - VmaAllocation& dst = reinterpret_cast<VmaAllocation&>(moveData.move.internalData); - + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) { @@ -13264,32 +13474,21 @@ bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, Vma moveData.flags, this, moveData.type, - &dst) == VK_SUCCESS) + &moveData.move.dstTmpAllocation) == VK_SUCCESS) { - moveData.move.dstMemory = dst->GetMemory(); - moveData.move.dstOffset = dst->GetOffset(); m_Moves.push_back(moveData.move); - currentBytesMoved += moveData.size; - - if (IncrementCounters(currentAllocsMoved, currentBytesMoved)) + if (IncrementCounters(moveData.size)) return true; } } } } } - - m_Stats.bytesMoved += currentBytesMoved; - m_Stats.allocationsMoved += currentAllocsMoved; return false; } bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector) { - VkDeviceSize currentBytesMoved = 0; - uint32_t currentAllocsMoved = 0; - VmaAllocation& dst = reinterpret_cast<VmaAllocation&>(data.move.internalData); - for (; start < end; ++start) { VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start); @@ -13302,22 +13501,15 @@ bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, Mo this, data.type, 0, - &dst) == VK_SUCCESS) + &data.move.dstTmpAllocation) == VK_SUCCESS) { - data.move.dstMemory = dst->GetMemory(); - data.move.dstOffset = dst->GetOffset(); m_Moves.push_back(data.move); - currentBytesMoved += data.size; - - if (IncrementCounters(currentAllocsMoved, currentBytesMoved)) + if (IncrementCounters(data.size)) return true; break; } } } - - m_Stats.bytesMoved += currentBytesMoved; - m_Stats.allocationsMoved += currentAllocsMoved; return false; } @@ -13338,6 +13530,17 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& ve // Ignore newly created allocations by defragmentation algorithm if (moveData.move.srcAllocation->GetUserData() == this) continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } // Check all previous blocks for free space if (AllocInOtherBlock(0, i, moveData, vector)) @@ -13347,19 +13550,24 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& ve return false; } -bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector) +bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update) { // Go over every allocation and try to fit it in previous blocks at lowest offsets, // if not possible: realloc within single block to minimize offset (exclude offset == 0), // but only if there are noticable gaps between them (some heuristic, ex. average size of allocation in block) + VMA_ASSERT(m_AlgorithmState != VMA_NULL); - VkDeviceSize currentBytesMoved = 0; - uint32_t currentAllocsMoved = 0; + StateBalanced& vectorState = reinterpret_cast<StateBalanced*>(m_AlgorithmState)[index]; + if (update && vectorState.avgAllocSize == UINT64_MAX) + UpdateVectorStatistics(vector, vectorState); + const size_t startMoveCount = m_Moves.size(); + VkDeviceSize minimalFreeRegion = vectorState.avgFreeSize / 2; for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) { VmaDeviceMemoryBlock* block = vector.GetBlock(i); VmaBlockMetadata* metadata = block->m_pMetadata; + VkDeviceSize prevFreeRegionSize = 0; for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); handle != VK_NULL_HANDLE; @@ -13369,53 +13577,72 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector // Ignore newly created allocations by defragmentation algorithm if (moveData.move.srcAllocation->GetUserData() == this) continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } // Check all previous blocks for free space const size_t prevMoveCount = m_Moves.size(); if (AllocInOtherBlock(0, i, moveData, vector)) return true; + VkDeviceSize nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle); // If no room found then realloc within block for lower offset VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) { - VmaAllocationRequest request = {}; - if (metadata->CreateAllocationRequest( - moveData.size, - moveData.alignment, - false, - moveData.type, - VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, - &request)) + // Check if realloc will make sense + if (prevFreeRegionSize >= minimalFreeRegion || + nextFreeRegionSize >= minimalFreeRegion || + moveData.size <= vectorState.avgFreeSize || + moveData.size <= vectorState.avgAllocSize) { - if (metadata->GetAllocationOffset(request.allocHandle) < offset) + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) { - VmaAllocation& dst = reinterpret_cast<VmaAllocation&>(moveData.move.internalData); - if (vector.CommitAllocationRequest( - request, - block, - moveData.alignment, - moveData.flags, - this, - moveData.type, - &dst) == VK_SUCCESS) + if (metadata->GetAllocationOffset(request.allocHandle) < offset) { - moveData.move.dstMemory = dst->GetMemory(); - moveData.move.dstOffset = dst->GetOffset(); - m_Moves.push_back(moveData.move); - currentBytesMoved += moveData.size; - - if (IncrementCounters(currentAllocsMoved, currentBytesMoved)) - return true; + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } } } } } + prevFreeRegionSize = nextFreeRegionSize; } } - - m_Stats.bytesMoved += currentBytesMoved; - m_Stats.allocationsMoved += currentAllocsMoved; + + // No moves perfomed, update statistics to current vector state + if (startMoveCount == m_Moves.size() && !update) + { + vectorState.avgAllocSize = UINT64_MAX; + return ComputeDefragmentation_Balanced(vector, index, false); + } return false; } @@ -13424,9 +13651,6 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& ve // Go over every allocation and try to fit it in previous blocks at lowest offsets, // if not possible: realloc within single block to minimize offset (exclude offset == 0) - VkDeviceSize currentBytesMoved = 0; - uint32_t currentAllocsMoved = 0; - for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) { VmaDeviceMemoryBlock* block = vector.GetBlock(i); @@ -13440,6 +13664,17 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& ve // Ignore newly created allocations by defragmentation algorithm if (moveData.move.srcAllocation->GetUserData() == this) continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } // Check all previous blocks for free space const size_t prevMoveCount = m_Moves.size(); @@ -13461,7 +13696,6 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& ve { if (metadata->GetAllocationOffset(request.allocHandle) < offset) { - VmaAllocation& dst = reinterpret_cast<VmaAllocation&>(moveData.move.internalData); if (vector.CommitAllocationRequest( request, block, @@ -13469,14 +13703,10 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& ve moveData.flags, this, moveData.type, - &dst) == VK_SUCCESS) + &moveData.move.dstTmpAllocation) == VK_SUCCESS) { - moveData.move.dstMemory = dst->GetMemory(); - moveData.move.dstOffset = dst->GetOffset(); m_Moves.push_back(moveData.move); - currentBytesMoved += moveData.size; - - if (IncrementCounters(currentAllocsMoved, currentBytesMoved)) + if (IncrementCounters(moveData.size)) return true; } } @@ -13484,9 +13714,6 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& ve } } } - - m_Stats.bytesMoved += currentBytesMoved; - m_Stats.allocationsMoved += currentAllocsMoved; return false; } @@ -13511,6 +13738,13 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto case StateExtensive::Operation::FindFreeBlockTexture: case StateExtensive::Operation::FindFreeBlockAll: { + // No more blocks to free, just perform fast realloc and move to cleanup + if (vectorState.firstFreeBlock == 0) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + return ComputeDefragmentation_Fast(vector); + } + // No free blocks, have to clear last one size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1; VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata; @@ -13521,6 +13755,17 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto handle = freeMetadata->GetNextAllocation(handle)) { MoveAllocationData moveData = GetMoveData(handle, freeMetadata); + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } // Check all previous blocks for free space if (AllocInOtherBlock(0, last, moveData, vector)) @@ -13568,8 +13813,7 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto } vectorState.firstFreeBlock = last; // Nothing done, block found without reallocations, can perform another reallocs in same pass - if (prevMoveCount == m_Moves.size()) - return ComputeDefragmentation_Extensive(vector, index); + return ComputeDefragmentation_Extensive(vector, index); } break; } @@ -13637,6 +13881,9 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto } break; } + case StateExtensive::Operation::Cleanup: + // Cleanup is handled below so that other operations may reuse the cleanup code. This case is here to prevent the unhandled enum value warning (C4062). + break; } if (vectorState.operation == StateExtensive::Operation::Cleanup) @@ -13655,6 +13902,27 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto return false; } +void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state) +{ + size_t allocCount = 0; + size_t freeCount = 0; + state.avgFreeSize = 0; + state.avgAllocSize = 0; + + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + allocCount += metadata->GetAllocationCount(); + freeCount += metadata->GetFreeRegionsCount(); + state.avgFreeSize += metadata->GetSumFreeSize(); + state.avgAllocSize += metadata->GetSize(); + } + + state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount; + state.avgFreeSize /= freeCount; +} + bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType, VmaBlockVector& vector, size_t firstFreeBlock, bool& texturePresent, bool& bufferPresent, bool& otherPresent) @@ -13673,6 +13941,17 @@ bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType curr // Ignore newly created allocations by defragmentation algorithm if (moveData.move.srcAllocation->GetUserData() == this) continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + VMA_ASSERT(0); + case CounterStatus::Pass: + break; + } // Move only single type of resources at once if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType)) @@ -14404,6 +14683,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; if(m_UseExtMemoryPriority) { + VMA_ASSERT(priority >= 0.f && priority <= 1.f); priorityInfo.priority = priority; VmaPnextChainPushFront(&allocInfo, &priorityInfo); } @@ -14515,9 +14795,12 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( } } - *pAllocation = m_AllocationObjectAllocator.Allocate(isUserDataString, isMappingAllowed); + *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed); (*pAllocation)->InitDedicatedAllocation(pool, memTypeIndex, hMemory, suballocType, pMappedData, size); - (*pAllocation)->SetUserData(this, pUserData); + if (isUserDataString) + (*pAllocation)->SetName(this, (const char*)pUserData); + else + (*pAllocation)->SetUserData(this, pUserData); m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { @@ -14629,8 +14912,8 @@ VkResult VmaAllocator_T::FindMemoryTypeIndex( if((requiredFlags & ~currFlags) == 0) { // Calculate cost as number of bits from preferredFlags not present in this memory type. - uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags) + - VmaCountBitsSet(currFlags & notPreferredFlags); + uint32_t currCost = VMA_COUNT_BITS_SET(preferredFlags & ~currFlags) + + VMA_COUNT_BITS_SET(currFlags & notPreferredFlags); // Remember memory type with lowest cost. if(currCost < minCost) { @@ -14849,6 +15132,8 @@ void VmaAllocator_T::FreeMemory( FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); } + allocation->FreeName(this); + switch(allocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: @@ -14994,6 +15279,7 @@ void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationI pAllocationInfo->size = hAllocation->GetSize(); pAllocationInfo->pMappedData = hAllocation->GetMappedData(); pAllocationInfo->pUserData = hAllocation->GetUserData(); + pAllocationInfo->pName = hAllocation->GetName(); } VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool) @@ -15664,89 +15950,90 @@ uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits() #if VMA_STATS_STRING_ENABLED void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) { - bool dedicatedAllocationsStarted = false; - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex]; - if(!dedicatedAllocList.IsEmpty()) - { - if(dedicatedAllocationsStarted == false) - { - dedicatedAllocationsStarted = true; - json.WriteString("DedicatedAllocations"); - json.BeginObject(); - } - - json.BeginString("Type "); - json.ContinueString(memTypeIndex); - json.EndString(); - - dedicatedAllocList.BuildStatsString(json); - } - } - if(dedicatedAllocationsStarted) - { - json.EndObject(); - } - + json.WriteString("DefaultPools"); + json.BeginObject(); { - bool allocationsStarted = false; - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { VmaBlockVector* pBlockVector = m_pBlockVectors[memTypeIndex]; - if(pBlockVector != VMA_NULL) + VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex]; + if (pBlockVector != VMA_NULL) { - if (pBlockVector->IsEmpty() == false) + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginObject(); { - if (allocationsStarted == false) - { - allocationsStarted = true; - json.WriteString("DefaultPools"); - json.BeginObject(); - } - - json.BeginString("Type "); - json.ContinueString(memTypeIndex); - json.EndString(); + json.WriteString("PreferredBlockSize"); + json.WriteNumber(pBlockVector->GetPreferredBlockSize()); - json.BeginObject(); + json.WriteString("Blocks"); pBlockVector->PrintDetailedMap(json); - json.EndObject(); + + json.WriteString("DedicatedAllocations"); + dedicatedAllocList.BuildStatsString(json); } + json.EndObject(); } } - if(allocationsStarted) - { - json.EndObject(); - } } + json.EndObject(); - // Custom pools + json.WriteString("CustomPools"); + json.BeginObject(); { VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); - if(!m_Pools.IsEmpty()) + if (!m_Pools.IsEmpty()) { - json.WriteString("Pools"); - json.BeginObject(); - for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { - json.BeginString(); - json.ContinueString(pool->GetId()); - json.EndString(); + bool displayType = true; + size_t index = 0; + for (VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + VmaBlockVector& blockVector = pool->m_BlockVector; + if (blockVector.GetMemoryTypeIndex() == memTypeIndex) + { + if (displayType) + { + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginArray(); + displayType = false; + } - json.BeginObject(); - pool->m_BlockVector.PrintDetailedMap(json); + json.BeginObject(); + { + json.WriteString("Name"); + json.BeginString(); + json.ContinueString_Size(index++); + if (pool->GetName()) + { + json.WriteString(" - "); + json.WriteString(pool->GetName()); + } + json.EndString(); - if (!pool->m_DedicatedAllocations.IsEmpty()) - { - json.WriteString("DedicatedAllocations"); - pool->m_DedicatedAllocations.BuildStatsString(json); + json.WriteString("PreferredBlockSize"); + json.WriteNumber(blockVector.GetPreferredBlockSize()); + + json.WriteString("Blocks"); + blockVector.PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + pool->m_DedicatedAllocations.BuildStatsString(json); + } + json.EndObject(); + } } - json.EndObject(); + + if (!displayType) + json.EndArray(); } - json.EndObject(); } } + json.EndObject(); } #endif // VMA_STATS_STRING_ENABLED #endif // _VMA_ALLOCATOR_T_FUNCTIONS @@ -15857,127 +16144,176 @@ VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( VmaStringBuilder sb(allocator->GetAllocationCallbacks()); { - VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); - json.BeginObject(); - VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount()); VmaTotalStatistics stats; allocator->CalculateStatistics(&stats); - json.WriteString("Total"); - VmaPrintDetailedStatistics(json, stats.total); - - for(uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) + VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); + json.BeginObject(); { - json.BeginString("Heap "); - json.ContinueString(heapIndex); - json.EndString(); + json.WriteString("General"); json.BeginObject(); + { + const VkPhysicalDeviceProperties& deviceProperties = allocator->m_PhysicalDeviceProperties; + const VkPhysicalDeviceMemoryProperties& memoryProperties = allocator->m_MemProps; - json.WriteString("Size"); - json.WriteNumber(allocator->m_MemProps.memoryHeaps[heapIndex].size); + json.WriteString("API"); + json.WriteString("Vulkan"); - json.WriteString("Flags"); - json.BeginArray(true); - if((allocator->m_MemProps.memoryHeaps[heapIndex].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) - { - json.WriteString("DEVICE_LOCAL"); - } - json.EndArray(); + json.WriteString("apiVersion"); + json.BeginString(); + json.ContinueString(VK_API_VERSION_MAJOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_API_VERSION_MINOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_API_VERSION_PATCH(deviceProperties.apiVersion)); + json.EndString(); - json.WriteString("Budget"); - json.BeginObject(); - { - json.WriteString("BlockBytes"); - json.WriteNumber(budgets[heapIndex].statistics.blockBytes); - json.WriteString("AllocationBytes"); - json.WriteNumber(budgets[heapIndex].statistics.allocationBytes); - json.WriteString("BlockCount"); - json.WriteNumber(budgets[heapIndex].statistics.blockCount); - json.WriteString("AllocationCount"); - json.WriteNumber(budgets[heapIndex].statistics.allocationCount); - json.WriteString("Usage"); - json.WriteNumber(budgets[heapIndex].usage); - json.WriteString("Budget"); - json.WriteNumber(budgets[heapIndex].budget); + json.WriteString("GPU"); + json.WriteString(deviceProperties.deviceName); + json.WriteString("deviceType"); + json.WriteNumber(static_cast<uint32_t>(deviceProperties.deviceType)); + + json.WriteString("maxMemoryAllocationCount"); + json.WriteNumber(deviceProperties.limits.maxMemoryAllocationCount); + json.WriteString("bufferImageGranularity"); + json.WriteNumber(deviceProperties.limits.bufferImageGranularity); + json.WriteString("nonCoherentAtomSize"); + json.WriteNumber(deviceProperties.limits.nonCoherentAtomSize); + + json.WriteString("memoryHeapCount"); + json.WriteNumber(memoryProperties.memoryHeapCount); + json.WriteString("memoryTypeCount"); + json.WriteNumber(memoryProperties.memoryTypeCount); } json.EndObject(); - - if(stats.memoryHeap[heapIndex].statistics.blockCount > 0) - { - json.WriteString("Stats"); - VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]); - } - - for(uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) + } + { + json.WriteString("Total"); + VmaPrintDetailedStatistics(json, stats.total); + } + { + json.WriteString("MemoryInfo"); + json.BeginObject(); { - if(allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) + for (uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) { - json.BeginString("Type "); - json.ContinueString(typeIndex); + json.BeginString("Heap "); + json.ContinueString(heapIndex); json.EndString(); - json.BeginObject(); - - json.WriteString("Flags"); - json.BeginArray(true); - VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; - if((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) - { - json.WriteString("DEVICE_LOCAL"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) - { - json.WriteString("HOST_VISIBLE"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0) - { - json.WriteString("HOST_COHERENT"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) - { - json.WriteString("HOST_CACHED"); - } - if((flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) != 0) - { - json.WriteString("LAZILY_ALLOCATED"); - } -#if VMA_VULKAN_VERSION >= 1001000 - if((flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) != 0) - { - json.WriteString("PROTECTED"); - } -#endif // #if VMA_VULKAN_VERSION >= 1001000 -#if VK_AMD_device_coherent_memory - if((flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) - { - json.WriteString("DEVICE_COHERENT"); - } - if((flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) != 0) { - json.WriteString("DEVICE_UNCACHED"); - } -#endif // #if VK_AMD_device_coherent_memory - json.EndArray(); + const VkMemoryHeap& heapInfo = allocator->m_MemProps.memoryHeaps[heapIndex]; + json.WriteString("Flags"); + json.BeginArray(true); + { + if (heapInfo.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + #if VMA_VULKAN_VERSION >= 1001000 + if (heapInfo.flags & VK_MEMORY_HEAP_MULTI_INSTANCE_BIT) + json.WriteString("MULTI_INSTANCE"); + #endif + + VkMemoryHeapFlags flags = heapInfo.flags & + ~(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_HEAP_MULTI_INSTANCE_BIT + #endif + ); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Size"); + json.WriteNumber(heapInfo.size); + + json.WriteString("Budget"); + json.BeginObject(); + { + json.WriteString("BudgetBytes"); + json.WriteNumber(budgets[heapIndex].budget); + json.WriteString("UsageBytes"); + json.WriteNumber(budgets[heapIndex].usage); + } + json.EndObject(); - if(stats.memoryType[typeIndex].statistics.blockCount > 0) - { json.WriteString("Stats"); - VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]); - } + VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]); + json.WriteString("MemoryPools"); + json.BeginObject(); + { + for (uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) + { + if (allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) + { + json.BeginString("Type "); + json.ContinueString(typeIndex); + json.EndString(); + json.BeginObject(); + { + json.WriteString("Flags"); + json.BeginArray(true); + { + VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; + if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + json.WriteString("HOST_VISIBLE"); + if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + json.WriteString("HOST_COHERENT"); + if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) + json.WriteString("HOST_CACHED"); + if (flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) + json.WriteString("LAZILY_ALLOCATED"); + #if VMA_VULKAN_VERSION >= 1001000 + if (flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) + json.WriteString("PROTECTED"); + #endif + #if VK_AMD_device_coherent_memory + if (flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) + json.WriteString("DEVICE_COHERENT_AMD"); + if (flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) + json.WriteString("DEVICE_UNCACHED_AMD"); + #endif + + flags &= ~(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT + #endif + #if VK_AMD_device_coherent_memory + | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY + | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY + #endif + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]); + } + json.EndObject(); + } + } + + } + json.EndObject(); + } json.EndObject(); } } - json.EndObject(); } - if(detailedMap == VK_TRUE) - { + + if (detailedMap == VK_TRUE) allocator->PrintDetailedMap(json); - } json.EndObject(); } @@ -16428,6 +16764,14 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( allocation->SetUserData(allocator, pUserData); } +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName) +{ + allocation->SetName(allocator, pName); +} + VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( VmaAllocator VMA_NOT_NULL allocator, VmaAllocation VMA_NOT_NULL allocation, @@ -16567,13 +16911,20 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( VMA_DEBUG_LOG("vmaBeginDefragmentation"); + if (pInfo->pool != VMA_NULL) + { + // Check if run on supported algorithms + if (pInfo->pool->m_BlockVector.GetAlgorithm() & VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + return VK_ERROR_FEATURE_NOT_PRESENT; + } + VMA_DEBUG_GLOBAL_MUTEX_LOCK *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo); return VK_SUCCESS; } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentation( +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( VmaAllocator allocator, VmaDefragmentationContext context, VmaDefragmentationStats* pStats) @@ -16587,7 +16938,6 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentation( if (pStats) context->GetStats(*pStats); vma_delete(allocator, context); - return VK_SUCCESS; } VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( @@ -16864,6 +17214,50 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( return res; } +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pBuffer && allocation); + + VMA_DEBUG_LOG("vmaCreateAliasingBuffer"); + + *pBuffer = VK_NULL_HANDLE; + + if (pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if ((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if (res >= 0) + { + // 2. Bind buffer with memory. + res = allocator->BindBufferMemory(allocation, 0, *pBuffer, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + } + return res; +} + VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( VmaAllocator allocator, VkBuffer buffer, @@ -16985,10 +17379,52 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( return res; } +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + VMA_ASSERT(allocator && pImageCreateInfo && pImage && allocation); + + *pImage = VK_NULL_HANDLE; + + VMA_DEBUG_LOG("vmaCreateImage"); + + if (pImageCreateInfo->extent.width == 0 || + pImageCreateInfo->extent.height == 0 || + pImageCreateInfo->extent.depth == 0 || + pImageCreateInfo->mipLevels == 0 || + pImageCreateInfo->arrayLayers == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkImage. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( + allocator->m_hDevice, + pImageCreateInfo, + allocator->GetAllocationCallbacks(), + pImage); + if (res >= 0) + { + // 2. Bind image with memory. + res = allocator->BindImageMemory(allocation, 0, *pImage, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + } + return res; +} + VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( - VmaAllocator allocator, - VkImage image, - VmaAllocation allocation) + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation) { VMA_ASSERT(allocator); @@ -17176,9 +17612,10 @@ before including these headers (like `WIN32_LEAN_AND_MEAN` or `WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define them before every `#include` of this library. -\note This library is written in C++, but has C-compatible interface. +This library is written in C++, but has C-compatible interface. Thus you can include and use vk_mem_alloc.h in C or C++ code, but full implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C. +Some features of C++14 used. STL containers, RTTI, or C++ exceptions are not used. \section quick_start_initialization Initialization @@ -17775,14 +18212,28 @@ To use custom memory pools: Example: \code +// Find memoryTypeIndex for the pool. +VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +sampleBufCreateInfo.size = 0x10000; // Doesn't matter. +sampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo sampleAllocCreateInfo = {}; +sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(allocator, + &sampleBufCreateInfo, &sampleAllocCreateInfo, &memTypeIndex); +// Check res... + // Create a pool that can have at most 2 blocks, 128 MiB each. VmaPoolCreateInfo poolCreateInfo = {}; -poolCreateInfo.memoryTypeIndex = ... +poolCreateInfo.memoryTypeIndex = memTypeIndex; poolCreateInfo.blockSize = 128ull * 1024 * 1024; poolCreateInfo.maxBlockCount = 2; VmaPool pool; -vmaCreatePool(allocator, &poolCreateInfo, &pool); +res = vmaCreatePool(allocator, &poolCreateInfo, &pool); +// Check res... // Allocate a buffer out of it. VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; @@ -17794,8 +18245,8 @@ allocCreateInfo.pool = pool; VkBuffer buf; VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +res = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... \endcode You have to free all allocations made from this pool before destroying it. @@ -17933,6 +18384,8 @@ you can achieve behavior of a ring buffer / queue. Ring buffer is available only in pools with one memory block - VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. +\note \ref defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + \page defragmentation Defragmentation @@ -17965,41 +18418,105 @@ for(;;) res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass); if(res == VK_SUCCESS) break; - else if(res == VK_INCOMPLETE) + else if(res != VK_INCOMPLETE) + // Handle error... + + for(uint32_t i = 0; i < pass.moveCount; ++i) { - for(uint32_t i = 0; i < pass.moveCount; ++i) - { - //- Inspect pass.pMoves[i].srcAllocation, identify what buffer or image it represents. - //- Recreate this buffer or image at pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. - //- Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. - } - //- Make sure the copy commands finished executing. - //- Update appropriate descriptors to point to the new places. - res = vmaEndDefragmentationPass(allocator, defragCtx, &pass); - if(res == VK_SUCCESS) - break; - else if(res != VK_INCOMPLETE) - // Handle error... + // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents. + VmaAllocationInfo allocInfo; + vmaGetAllocationInfo(allocator, pMoves[i].srcAllocation, &allocInfo); + MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData; + + // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. + VkImageCreateInfo imgCreateInfo = ... + VkImage newImg; + res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg); + // Check res... + res = vmaBindImageMemory(allocator, pMoves[i].dstTmpAllocation, newImg); + // Check res... + + // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. + vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...); } - else + + // Make sure the copy commands finished executing. + vkWaitForFences(...); + + // Destroy old buffers/images bound with pass.pMoves[i].srcAllocation. + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // ... + vkDestroyImage(device, resData->img, nullptr); + } + + // Update appropriate descriptors to point to the new places... + + res = vmaEndDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) // Handle error... } vmaEndDefragmentation(allocator, defragCtx, nullptr); \endcode -You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool -(like in the example above) or all the default pools by setting this member to null. +Although functions like vmaCreateBuffer(), vmaCreateImage(), vmaDestroyBuffer(), vmaDestroyImage() +create/destroy an allocation and a buffer/image at once, these are just a shortcut for +creating the resource, allocating memory, and binding them together. +Defragmentation works on memory allocations only. You must handle the rest manually. +Defragmentation is an iterative process that should repreat "passes" as long as related functions +return `VK_INCOMPLETE` not `VK_SUCCESS`. +In each pass: + +1. vmaBeginDefragmentationPass() function call: + - Calculates and returns the list of allocations to be moved in this pass. + Note this can be a time-consuming process. + - Reserves destination memory for them by creating temporary destination allocations + that you can query for their `VkDeviceMemory` + offset using vmaGetAllocationInfo(). +2. Inside the pass, **you should**: + - Inspect the returned list of allocations to be moved. + - Create new buffers/images and bind them at the returned destination temporary allocations. + - Copy data from source to destination resources if necessary. + - Destroy the source buffers/images, but NOT their allocations. +3. vmaEndDefragmentationPass() function call: + - Frees the source memory reserved for the allocations that are moved. + - Modifies source #VmaAllocation objects that are moved to point to the destination reserved memory. + - Frees `VkDeviceMemory` blocks that became empty. Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter. Defragmentation algorithm tries to move all suitable allocations. You can, however, refuse to move some of them inside a defragmentation pass, by setting `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. -However, this is not recommended and may result in suboptimal packing of the allocations after defragmentation. +This is not recommended and may result in suboptimal packing of the allocations after defragmentation. If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. -You can also decide to destroy an allocation instead of moving it. -You should then set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. +Inside a pass, for each allocation that should be moved: + +- You should copy its data from the source to the destination place by calling e.g. `vkCmdCopyBuffer()`, `vkCmdCopyImage()`. + - You need to make sure these commands finished executing before destroying the source buffers/images and before calling vmaEndDefragmentationPass(). +- If a resource doesn't contain any meaningful data, e.g. it is a transient color attachment image to be cleared, + filled, and used temporarily in each rendering frame, you can just recreate this image + without copying its data. +- If the resource is in `HOST_VISIBLE` and `HOST_CACHED` memory, you can copy its data on the CPU + using `memcpy()`. +- If you cannot move the allocation, you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + This will cancel the move. + - vmaEndDefragmentationPass() will then free the destination memory + not the source memory of the allocation, leaving it unchanged. +- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time), + you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + - vmaEndDefragmentationPass() will then free both source and destination memory, and will destroy the source #VmaAllocation object. + +You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool +(like in the example above) or all the default pools by setting this member to null. + +Defragmentation is always performed in each pool separately. +Allocations are never moved between different Vulkan memory types. +The size of the destination memory reserved for a moved allocation is the same as the original one. +Alignment of an allocation as it was determined using `vkGetBufferMemoryRequirements()` etc. is also respected after defragmentation. +Buffers/images should be recreated with the same `VkBufferCreateInfo` / `VkImageCreateInfo` parameters as the original ones. You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. @@ -18009,6 +18526,13 @@ It is also safe to perform the defragmentation asynchronously to render frames a usage, possibly from multiple threads, with the exception that allocations returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended. +<b>Mapping</b> is preserved on allocations that are moved during defragmentation. +Whether through #VMA_ALLOCATION_CREATE_MAPPED_BIT or vmaMapMemory(), the allocations +are mapped at their new place. Of course, pointer to the mapped data changes, so it needs to be queried +using VmaAllocationInfo::pMappedData. + +\note Defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + \page statistics Statistics @@ -18081,6 +18605,8 @@ To do that, fill VmaAllocationCreateInfo::pUserData field when creating an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer, some handle, index, key, ordinal number or any other value that would associate the allocation with your custom metadata. +It it useful to identify appropriate data structures in your engine given #VmaAllocation, +e.g. when doing \ref defragmentation. \code VkBufferCreateInfo bufCreateInfo = ... @@ -18111,44 +18637,21 @@ vmaBuildStatsString() in hexadecimal form. \section allocation_names Allocation names -There is alternative mode available where `pUserData` pointer is used to point to -a null-terminated string, giving a name to the allocation. To use this mode, -set #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT flag in VmaAllocationCreateInfo::flags. -Then `pUserData` passed as VmaAllocationCreateInfo::pUserData or argument to -vmaSetAllocationUserData() must be either null or a pointer to a null-terminated string. +An allocation can also carry a null-terminated string, giving a name to the allocation. +To set it, call vmaSetAllocationName(). The library creates internal copy of the string, so the pointer you pass doesn't need to be valid for whole lifetime of the allocation. You can free it after the call. \code -VkImageCreateInfo imageInfo = ... - std::string imageName = "Texture: "; imageName += fileName; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; -allocCreateInfo.pUserData = imageName.c_str(); - -VkImage image; -VmaAllocation allocation; -vmaCreateImage(allocator, &imageInfo, &allocCreateInfo, &image, &allocation, nullptr); +vmaSetAllocationName(allocator, allocation, imageName.c_str()); \endcode -The value of `pUserData` pointer of the allocation will be different than the one -you passed when setting allocation's name - pointing to a buffer managed -internally that holds copy of the string. - -\code -VmaAllocationInfo allocInfo; -vmaGetAllocationInfo(allocator, allocation, &allocInfo); -const char* imageName = (const char*)allocInfo.pUserData; -printf("Image name: %s\n", imageName); -\endcode +The string can be later retrieved by inspecting VmaAllocationInfo::pName. +It is also printed in JSON report created by vmaBuildStatsString(). -That string is also printed in JSON report created by vmaBuildStatsString(). - -\note Passing string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. +\note Setting string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library. @@ -18491,6 +18994,7 @@ imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; VkImage img; VmaAllocation alloc; @@ -18502,7 +19006,8 @@ Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DED especially if they are large or if you plan to destroy and recreate them with different sizes e.g. when display resolution changes. Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. - +When VK_EXT_memory_priority extension is enabled, it is also worth setting high priority to such allocation +to decrease chances to be evicted to system memory by the operating system. \section usage_patterns_staging_copy_upload Staging copy for upload @@ -18653,6 +19158,7 @@ else // [Executed in runtime]: memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); + //vkCmdPipelineBarrier: VK_ACCESS_HOST_WRITE_BIT --> VK_ACCESS_TRANSFER_READ_BIT VkBufferCopy bufCopy = { 0, // srcOffset 0, // dstOffset, @@ -18815,6 +19321,86 @@ To learn more about this extension, see: +\page vk_ext_memory_priority VK_EXT_memory_priority + +VK_EXT_memory_priority is a device extension that allows to pass additional "priority" +value to Vulkan memory allocations that the implementation may use prefer certain +buffers and images that are critical for performance to stay in device-local memory +in cases when the memory is over-subscribed, while some others may be moved to the system memory. + +VMA offers convenient usage of this extension. +If you enable it, you can pass "priority" parameter when creating allocations or custom pools +and the library automatically passes the value to Vulkan using this extension. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_ext_memory_priority_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_EXT_memory_priority". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_EXT_memory_priority" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to +`VkPhysicalDeviceFeatures2::pNext` chain and set its member `memoryPriority` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_ext_memory_priority_usage Usage + +When using this extension, you should initialize following member: + +- VmaAllocationCreateInfo::priority when creating a dedicated allocation with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +- VmaPoolCreateInfo::priority when creating a custom pool. + +It should be a floating-point value between `0.0f` and `1.0f`, where recommended default is `0.5f`. +Memory allocated with higher value can be treated by the Vulkan implementation as higher priority +and so it can have lower chances of being pushed out to system memory, experiencing degraded performance. + +It might be a good idea to create performance-critical resources like color-attachment or depth-stencil images +as dedicated and set high priority to them. For example: + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +`priority` member is ignored in the following situations: + +- Allocations created in custom pools: They inherit the priority, along with all other allocation parameters + from the parametrs passed in #VmaPoolCreateInfo when the pool was created. +- Allocations created in default pools: They inherit the priority from the parameters + VMA used when creating default pools, which means `priority == 0.5f`. + + \page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory VK_AMD_device_coherent_memory is a device extension that enables access to @@ -18939,6 +19525,28 @@ accompanying this library. functions. - #VmaVirtualBlock is not safe to be used from multiple threads simultaneously. +\section general_considerations_versioning_and_compatibility Versioning and compatibility + +The library uses [**Semantic Versioning**](https://semver.org/), +which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where: + +- Incremented Patch version means a release is backward- and forward-compatible, + introducing only some internal improvements, bug fixes, optimizations etc. + or changes that are out of scope of the official API described in this documentation. +- Incremented Minor version means a release is backward-compatible, + so existing code that uses the library should continue to work, while some new + symbols could have been added: new structures, functions, new values in existing + enums and bit flags, new structure members, but not new function parameters. +- Incrementing Major version means a release could break some backward compatibility. + +All changes between official releases are documented in file "CHANGELOG.md". + +\warning Backward compatiblity is considered on the level of C++ source code, not binary linkage. +Adding new members to existing structures is treated as backward compatible if initializing +the new members to binary zero results in the old behavior. +You should always fully initialize all library structures to zeros and not rely on their +exact binary size. + \section general_considerations_validation_layer_warnings Validation layer warnings When using this library, you can meet following types of warnings issued by |