diff options
Diffstat (limited to 'thirdparty/vulkan/vk_mem_alloc.h')
-rw-r--r-- | thirdparty/vulkan/vk_mem_alloc.h | 6523 |
1 files changed, 2811 insertions, 3712 deletions
diff --git a/thirdparty/vulkan/vk_mem_alloc.h b/thirdparty/vulkan/vk_mem_alloc.h index 89e00e6326..6618f1d1f0 100644 --- a/thirdparty/vulkan/vk_mem_alloc.h +++ b/thirdparty/vulkan/vk_mem_alloc.h @@ -1,5 +1,5 @@ // -/// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -49,7 +49,6 @@ License: MIT - [Mapping functions](@ref memory_mapping_mapping_functions) - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory) - [Cache flush and invalidate](@ref memory_mapping_cache_control) - - [Finding out if memory is mappable](@ref memory_mapping_finding_if_memory_mappable) - \subpage staying_within_budget - [Querying for budget](@ref staying_within_budget_querying_for_budget) - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage) @@ -61,12 +60,7 @@ License: MIT - [Stack](@ref linear_algorithm_stack) - [Double stack](@ref linear_algorithm_double_stack) - [Ring buffer](@ref linear_algorithm_ring_buffer) - - [Buddy allocation algorithm](@ref buddy_algorithm) - \subpage defragmentation - - [Defragmenting CPU memory](@ref defragmentation_cpu) - - [Defragmenting GPU memory](@ref defragmentation_gpu) - - [Additional notes](@ref defragmentation_additional_notes) - - [Writing custom allocation algorithm](@ref defragmentation_custom_algorithm) - \subpage statistics - [Numeric statistics](@ref statistics_numeric_statistics) - [JSON dump](@ref statistics_json_dump) @@ -80,17 +74,19 @@ License: MIT - [Corruption detection](@ref debugging_memory_usage_corruption_detection) - \subpage opengl_interop - \subpage usage_patterns - - [Common mistakes](@ref usage_patterns_common_mistakes) - - [Simple patterns](@ref usage_patterns_simple) - - [Advanced patterns](@ref usage_patterns_advanced) + - [GPU-only resource](@ref usage_patterns_gpu_only) + - [Staging copy for upload](@ref usage_patterns_staging_copy_upload) + - [Readback](@ref usage_patterns_readback) + - [Advanced data uploading](@ref usage_patterns_advanced_data_uploading) + - [Other use cases](@ref usage_patterns_other_use_cases) - \subpage configuration - [Pointers to Vulkan functions](@ref config_Vulkan_functions) - [Custom host memory allocator](@ref custom_memory_allocator) - [Device memory allocation callbacks](@ref allocation_callbacks) - [Device heap memory limit](@ref heap_memory_limit) - - \subpage vk_khr_dedicated_allocation - - \subpage enabling_buffer_device_address - - \subpage vk_amd_device_coherent_memory +- \subpage vk_khr_dedicated_allocation +- \subpage enabling_buffer_device_address +- \subpage vk_amd_device_coherent_memory - \subpage general_considerations - [Thread safety](@ref general_considerations_thread_safety) - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) @@ -99,8 +95,8 @@ License: MIT \section main_see_also See also -- [Product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/) -- [Source repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) +- [**Product page on GPUOpen**](https://gpuopen.com/gaming-product/vulkan-memory-allocator/) +- [**Source repository on GitHub**](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) \defgroup group_init Library initialization @@ -119,6 +115,7 @@ for user-defined purpose without allocating any real GPU memory. \defgroup group_stats Statistics \brief API elements that query current status of the allocator, from memory usage, budget, to full dump of the internal state in JSON format. +See documentation chapter: \ref statistics. */ @@ -178,13 +175,6 @@ extern "C" { #endif // #if VMA_VULKAN_VERSION >= 1001000 #endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES -#if !defined(VK_VERSION_1_2) - // This one is tricky. Vulkan specification defines this code as available since - // Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131. - // See pull request #207. - #define VK_ERROR_UNKNOWN ((VkResult)-13) -#endif - #if !defined(VMA_DEDICATED_ALLOCATION) #if VK_KHR_get_memory_requirements2 && VK_KHR_dedicated_allocation #define VMA_DEDICATED_ALLOCATION 1 @@ -307,9 +297,9 @@ extern "C" { //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -// +// // INTERFACE -// +// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -453,56 +443,33 @@ typedef enum VmaMemoryUsage Use other members of VmaAllocationCreateInfo to specify your requirements. */ VMA_MEMORY_USAGE_UNKNOWN = 0, - /** Memory will be used on device only, so fast access from the device is preferred. - It usually means device-local GPU (video) memory. - No need to be mappable on host. - It is roughly equivalent of `D3D12_HEAP_TYPE_DEFAULT`. - - Usage: - - - Resources written and read by device, e.g. images used as attachments. - - Resources transferred from host once (immutable) or infrequently and read by - device multiple times, e.g. textures to be sampled, vertex buffers, uniform - (constant) buffers, and majority of other types of resources used on GPU. - - Allocation may still end up in `HOST_VISIBLE` memory on some implementations. - In such case, you are free to map it. - You can use #VMA_ALLOCATION_CREATE_MAPPED_BIT with this usage type. + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. */ VMA_MEMORY_USAGE_GPU_ONLY = 1, - /** Memory will be mappable on host. - It usually means CPU (system) memory. - Guarantees to be `HOST_VISIBLE` and `HOST_COHERENT`. - CPU access is typically uncached. Writes may be write-combined. - Resources created in this pool may still be accessible to the device, but access to them can be slow. - It is roughly equivalent of `D3D12_HEAP_TYPE_UPLOAD`. - - Usage: Staging copy of resources used as transfer source. + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT`. */ VMA_MEMORY_USAGE_CPU_ONLY = 2, /** - Memory that is both mappable on host (guarantees to be `HOST_VISIBLE`) and preferably fast to access by GPU. - CPU access is typically uncached. Writes may be write-combined. - - Usage: Resources written frequently by host (dynamic), read by device. E.g. textures (with LINEAR layout), vertex buffers, uniform buffers updated every frame or every draw call. + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. */ VMA_MEMORY_USAGE_CPU_TO_GPU = 3, - /** Memory mappable on host (guarantees to be `HOST_VISIBLE`) and cached. - It is roughly equivalent of `D3D12_HEAP_TYPE_READBACK`. - - Usage: - - - Resources written by device, read by host - results of some computations, e.g. screen capture, average scene luminance for HDR tone mapping. - - Any resources read or accessed randomly on host, e.g. CPU-side copy of vertex buffer used as source of transfer, but also used for collision detection. + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. */ VMA_MEMORY_USAGE_GPU_TO_CPU = 4, - /** CPU memory - memory that is preferably not `DEVICE_LOCAL`, but also not guaranteed to be `HOST_VISIBLE`. - - Usage: Staging copy of resources moved from GPU memory to CPU memory as part - of custom paging/residency mechanism, to be moved back to GPU memory when needed. + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers not `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. */ VMA_MEMORY_USAGE_CPU_COPY = 5, - /** Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. + /** + Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation. Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`. @@ -510,6 +477,43 @@ typedef enum VmaMemoryUsage Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. */ VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6, + /** + Selects best memory type automatically. + This flag is recommended for most common use cases. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO = 7, + /** + Selects best memory type automatically with preference for GPU (device) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE = 8, + /** + Selects best memory type automatically with preference for CPU (host) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_HOST = 9, VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF } VmaMemoryUsage; @@ -570,11 +574,51 @@ typedef enum VmaAllocationCreateFlagBits */ VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100, /** \brief Set this flag if the allocated memory will have aliasing resources. - * + Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified. Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors. */ VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory will only be written sequentially, e.g. using `memcpy()` or a loop writing number-by-number, + never read or accessed randomly, so a memory type can be selected that is uncached and write-combined. + + \warning Violating this declaration may work correctly, but will likely be very slow. + Watch out for implicit reads introduces by doing e.g. `pMappedData[i] += x;` + Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory can be read, written, and accessed in random order, + so a `HOST_CACHED` memory type is preferred. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, + /** + Together with #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, + it says that despite request for host access, a not-`HOST_VISIBLE` memory type can be selected + if it may improve performance. + + By using this flag, you declare that you will check if the allocation ended up in a `HOST_VISIBLE` memory type + (e.g. using vmaGetAllocationMemoryProperties()) and if not, you will create some "staging" buffer and + issue an explicit transfer to write/read your data. + To prepare for this possibility, don't forget to add appropriate flags like + `VK_BUFFER_USAGE_TRANSFER_DST_BIT`, `VK_BUFFER_USAGE_TRANSFER_SRC_BIT` to the parameters of created buffer or image. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT = 0x00001000, /** Allocation strategy that chooses smallest possible free range for the allocation to minimize memory usage and fragmentation, possibly at the expense of allocation time. */ @@ -584,6 +628,11 @@ typedef enum VmaAllocationCreateFlagBits to minimize allocation time, possibly at the expense of allocation quality. */ VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = 0x00020000, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + Used internally by defragmentation, not recomended in typical usage. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = 0x00040000, /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT. */ VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, @@ -594,7 +643,8 @@ typedef enum VmaAllocationCreateFlagBits */ VMA_ALLOCATION_CREATE_STRATEGY_MASK = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT | - VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocationCreateFlagBits; @@ -635,48 +685,60 @@ typedef enum VmaPoolCreateFlagBits */ VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004, - /** \brief Enables alternative, buddy allocation algorithm in this pool. - - It operates on a tree of blocks, each having size that is a power of two and - a half of its parent's size. Comparing to default algorithm, this one provides - faster allocation and deallocation and decreased external fragmentation, - at the expense of more memory wasted (internal fragmentation). - For details, see documentation chapter \ref buddy_algorithm. - */ - VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT = 0x00000008, - - /** \brief Enables alternative, Two-Level Segregated Fit (TLSF) allocation algorithm in this pool. - - This algorithm is based on 2-level lists dividing address space into smaller - chunks. The first level is aligned to power of two which serves as buckets for requested - memory to fall into, and the second level is lineary subdivided into lists of free memory. - This algorithm aims to achieve bounded response time even in the worst case scenario. - Allocation time can be sometimes slightly longer than compared to other algorithms - but in return the application can avoid stalls in case of fragmentation, giving - predictable results, suitable for real-time use cases. - */ - VMA_POOL_CREATE_TLSF_ALGORITHM_BIT = 0x00000010, - /** Bit mask to extract only `ALGORITHM` bits from entire set of flags. */ VMA_POOL_CREATE_ALGORITHM_MASK = - VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT | - VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT | - VMA_POOL_CREATE_TLSF_ALGORITHM_BIT, + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT, VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaPoolCreateFlagBits; /// Flags to be passed as VmaPoolCreateInfo::flags. See #VmaPoolCreateFlagBits. typedef VkFlags VmaPoolCreateFlags; -/// Flags to be used in vmaDefragmentationBegin(). None at the moment. Reserved for future use. +/// Flags to be passed as VmaDefragmentationInfo::flags. typedef enum VmaDefragmentationFlagBits { - VMA_DEFRAGMENTATION_FLAG_INCREMENTAL = 0x1, + /* \brief Use simple but fast algorithm for defragmentation. + May not achieve best results but will require least time to compute and least allocations to copy. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT = 0x1, + /* \brief Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified. + Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT = 0x2, + /* \brief Perform full defragmentation of memory. + Can result in notably more time to compute and allocations to copy, but will achieve best memory packing. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT = 0x4, + /** \brief Use the most roboust algorithm at the cost of time to compute and number of copies to make. + Only available when bufferImageGranularity is greater than 1, since it aims to reduce + alignment issues between different types of resources. + Otherwise falls back to same behavior as #VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8, + + /// A bit mask to extract only `ALGORITHM` bits from entire set of flags. + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK = + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT, + VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaDefragmentationFlagBits; typedef VkFlags VmaDefragmentationFlags; +/// Operation performed on single defragmentation move. +typedef enum VmaDefragmentationMoveOperation +{ + /// Buffer/image has been recreated at `dstMemory` + `dstOffset`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass(). + VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, + /// Set this value if you cannot move the allocation. New place reserved `dstMemory` + `dstOffset` will be freed. `srcAllocation` will remain unchanged. + VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, + /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved `dstMemory` + `dstOffset` will be freed, along with `srcAllocation`. + VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, +} VmaDefragmentationMoveOperation; + /** @} */ /** @@ -700,34 +762,10 @@ typedef enum VmaVirtualBlockCreateFlagBits */ VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT = 0x00000001, - /** \brief Enables alternative, buddy allocation algorithm in this virtual block. - - It operates on a tree of blocks, each having size that is a power of two and - a half of its parent's size. Comparing to default algorithm, this one provides - faster allocation and deallocation and decreased external fragmentation, - at the expense of more memory wasted (internal fragmentation). - For details, see documentation chapter \ref buddy_algorithm. - */ - VMA_VIRTUAL_BLOCK_CREATE_BUDDY_ALGORITHM_BIT = 0x00000002, - - /** \brief Enables alternative, TLSF allocation algorithm in virtual block. - - This algorithm is based on 2-level lists dividing address space into smaller - chunks. The first level is aligned to power of two which serves as buckets for requested - memory to fall into, and the second level is lineary subdivided into lists of free memory. - This algorithm aims to achieve bounded response time even in the worst case scenario. - Allocation time can be sometimes slightly longer than compared to other algorithms - but in return the application can avoid stalls in case of fragmentation, giving - predictable results, suitable for real-time use cases. - */ - VMA_VIRTUAL_BLOCK_CREATE_TLSF_ALGORITHM_BIT = 0x00000004, - /** \brief Bit mask to extract only `ALGORITHM` bits from entire set of flags. */ VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK = - VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT | - VMA_VIRTUAL_BLOCK_CREATE_BUDDY_ALGORITHM_BIT | - VMA_VIRTUAL_BLOCK_CREATE_TLSF_ALGORITHM_BIT, + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT, VMA_VIRTUAL_BLOCK_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaVirtualBlockCreateFlagBits; @@ -748,6 +786,10 @@ typedef enum VmaVirtualAllocationCreateFlagBits /** \brief Allocation strategy that tries to minimize allocation time. */ VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_PACKED_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT , /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits. @@ -821,10 +863,10 @@ returned structure VmaAllocationInfo. VK_DEFINE_HANDLE(VmaAllocation) /** \struct VmaDefragmentationContext -\brief Represents Opaque object that represents started defragmentation process. +\brief An opaque object that represents started defragmentation process. -Fill structure #VmaDefragmentationInfo2 and call function vmaDefragmentationBegin() to create it. -Call function vmaDefragmentationEnd() to destroy it. +Fill structure #VmaDefragmentationInfo and call function vmaBeginDefragmentation() to create it. +Call function vmaEndDefragmentation() to destroy it. */ VK_DEFINE_HANDLE(VmaDefragmentationContext) @@ -943,6 +985,12 @@ typedef struct VmaVulkanFunctions #if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR; #endif +#if VMA_VULKAN_VERSION >= 1003000 + /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceBufferMemoryRequirements VMA_NULLABLE vkGetDeviceBufferMemoryRequirements; + /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceImageMemoryRequirements VMA_NULLABLE vkGetDeviceImageMemoryRequirements; +#endif } VmaVulkanFunctions; /// Description of a Allocator to be created. @@ -1051,59 +1099,102 @@ typedef struct VmaAllocatorInfo @{ */ -/// Calculated statistics of memory usage in entire allocator. -typedef struct VmaStatInfo +/** \brief Calculated statistics of memory usage e.g. in a specific memory type, heap, custom pool, or total. + +These are fast to calculate. +See functions: vmaGetHeapBudgets(), vmaGetPoolStatistics(). +*/ +typedef struct VmaStatistics { - /// Number of `VkDeviceMemory` Vulkan memory blocks allocated. + /** \brief Number of `VkDeviceMemory` objects - Vulkan memory blocks allocated. + */ uint32_t blockCount; - /// Number of #VmaAllocation allocation objects allocated. + /** \brief Number of #VmaAllocation objects allocated. + + Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`. + */ uint32_t allocationCount; + /** \brief Number of bytes allocated in `VkDeviceMemory` blocks. + + \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object + (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls + "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image. + */ + VkDeviceSize blockBytes; + /** \brief Total number of bytes occupied by all #VmaAllocation objects. + + Always less or equal than `blockBytes`. + Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan + but unused by any #VmaAllocation. + */ + VkDeviceSize allocationBytes; +} VmaStatistics; + +/** \brief More detailed statistics than #VmaStatistics. + +These are slower to calculate. Use for debugging purposes. +See functions: vmaCalculateStatistics(), vmaCalculatePoolStatistics(). + +Previous version of the statistics API provided averages, but they have been removed +because they can be easily calculated as: + +\code +VkDeviceSize allocationSizeAvg = detailedStats.statistics.allocationBytes / detailedStats.statistics.allocationCount; +VkDeviceSize unusedBytes = detailedStats.statistics.blockBytes - detailedStats.statistics.allocationBytes; +VkDeviceSize unusedRangeSizeAvg = unusedBytes / detailedStats.unusedRangeCount; +\endcode +*/ +typedef struct VmaDetailedStatistics +{ + /// Basic statistics. + VmaStatistics statistics; /// Number of free ranges of memory between allocations. uint32_t unusedRangeCount; - /// Total number of bytes occupied by all allocations. - VkDeviceSize usedBytes; - /// Total number of bytes occupied by unused ranges. - VkDeviceSize unusedBytes; - VkDeviceSize allocationSizeMin, allocationSizeAvg, allocationSizeMax; - VkDeviceSize unusedRangeSizeMin, unusedRangeSizeAvg, unusedRangeSizeMax; -} VmaStatInfo; - -/// General statistics from current state of Allocator. -typedef struct VmaStats -{ - VmaStatInfo memoryType[VK_MAX_MEMORY_TYPES]; - VmaStatInfo memoryHeap[VK_MAX_MEMORY_HEAPS]; - VmaStatInfo total; -} VmaStats; - -/// Statistics of current memory usage and available budget, in bytes, for specific memory heap. -typedef struct VmaBudget + /// Smallest allocation size. `VK_WHOLE_SIZE` if there are 0 allocations. + VkDeviceSize allocationSizeMin; + /// Largest allocation size. 0 if there are 0 allocations. + VkDeviceSize allocationSizeMax; + /// Smallest empty range size. `VK_WHOLE_SIZE` if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMin; + /// Largest empty range size. 0 if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMax; +} VmaDetailedStatistics; + +/** \brief General statistics from current state of the Allocator - +total memory usage across all memory heaps and types. + +These are slower to calculate. Use for debugging purposes. +See function vmaCalculateStatistics(). +*/ +typedef struct VmaTotalStatistics { - /** \brief Sum size of all `VkDeviceMemory` blocks allocated from particular heap, in bytes. - */ - VkDeviceSize blockBytes; + VmaDetailedStatistics memoryType[VK_MAX_MEMORY_TYPES]; + VmaDetailedStatistics memoryHeap[VK_MAX_MEMORY_HEAPS]; + VmaDetailedStatistics total; +} VmaTotalStatistics; - /** \brief Sum size of all allocations created in particular heap, in bytes. +/** \brief Statistics of current memory usage and available budget for a specific memory heap. - Usually less or equal than `blockBytes`. - Difference `blockBytes - allocationBytes` is the amount of memory allocated but unused - - available for new allocations or wasted due to fragmentation. +These are fast to calculate. +See function vmaGetHeapBudgets(). +*/ +typedef struct VmaBudget +{ + /** \brief Statistics fetched from the library. */ - VkDeviceSize allocationBytes; - + VmaStatistics statistics; /** \brief Estimated current memory usage of the program, in bytes. - Fetched from system using `VK_EXT_memory_budget` extension if enabled. + Fetched from system using VK_EXT_memory_budget extension if enabled. - It might be different than `blockBytes` (usually higher) due to additional implicit objects + It might be different than `statistics.blockBytes` (usually higher) due to additional implicit objects also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or `VkDeviceMemory` blocks allocated outside of this library, if any. */ VkDeviceSize usage; - /** \brief Estimated amount of memory available to the program, in bytes. - Fetched from system using `VK_EXT_memory_budget` extension if enabled. + Fetched from system using VK_EXT_memory_budget extension if enabled. It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors external to the program, like other programs also consuming system resources. @@ -1127,26 +1218,31 @@ typedef struct VmaAllocationCreateInfo /** \brief Intended usage of memory. You can leave #VMA_MEMORY_USAGE_UNKNOWN if you specify memory requirements in other way. \n + If `pool` is not null, this member is ignored. */ VmaMemoryUsage usage; /** \brief Flags that must be set in a Memory Type chosen for an allocation. - Leave 0 if you specify memory requirements in other way.*/ + Leave 0 if you specify memory requirements in other way. \n + If `pool` is not null, this member is ignored.*/ VkMemoryPropertyFlags requiredFlags; /** \brief Flags that preferably should be set in a memory type chosen for an allocation. - Set to 0 if no additional flags are preferred.*/ + Set to 0 if no additional flags are preferred. \n + If `pool` is not null, this member is ignored. */ VkMemoryPropertyFlags preferredFlags; /** \brief Bitmask containing one bit set for every memory type acceptable for this allocation. Value 0 is equivalent to `UINT32_MAX` - it means any memory type is accepted if it meets other requirements specified by this structure, with no further restrictions on memory type index. \n + If `pool` is not null, this member is ignored. */ uint32_t memoryTypeBits; /** \brief Pool that this allocation should be created in. - Leave `VK_NULL_HANDLE` to allocate from default pool. + Leave `VK_NULL_HANDLE` to allocate from default pool. If not null, members: + `usage`, `requiredFlags`, `preferredFlags`, `memoryTypeBits` are ignored. */ VmaPool VMA_NULLABLE pool; /** \brief Custom general-purpose pointer that will be stored in #VmaAllocation, can be read as VmaAllocationInfo::pUserData and changed using vmaSetAllocationUserData(). @@ -1168,6 +1264,9 @@ typedef struct VmaAllocationCreateInfo /// Describes parameter of created #VmaPool. typedef struct VmaPoolCreateInfo { + /** \brief Vulkan memory type index to allocate this pool from. + */ + uint32_t memoryTypeIndex; /** \brief Use combination of #VmaPoolCreateFlagBits. */ VmaPoolCreateFlags flags; @@ -1222,33 +1321,6 @@ typedef struct VmaPoolCreateInfo /** @} */ /** -\addtogroup group_stats -@{ -*/ - -/// Describes parameter of existing #VmaPool. -typedef struct VmaPoolStats -{ - /** \brief Total amount of `VkDeviceMemory` allocated from Vulkan for this pool, in bytes. - */ - VkDeviceSize size; - /** \brief Total number of bytes in the pool not used by any #VmaAllocation. - */ - VkDeviceSize unusedSize; - /** \brief Number of #VmaAllocation objects created from this pool that were not destroyed. - */ - size_t allocationCount; - /** \brief Number of continuous memory ranges in the pool not used by any #VmaAllocation. - */ - size_t unusedRangeCount; - /** \brief Number of `VkDeviceMemory` blocks allocated for this pool. - */ - size_t blockCount; -} VmaPoolStats; - -/** @} */ - -/** \addtogroup group_alloc @{ */ @@ -1307,116 +1379,79 @@ typedef struct VmaAllocationInfo /** \brief Parameters for defragmentation. -To be used with function vmaDefragmentationBegin(). +To be used with function vmaBeginDefragmentation(). */ -typedef struct VmaDefragmentationInfo2 +typedef struct VmaDefragmentationInfo { - /** \brief Reserved for future use. Should be 0. - */ + /// \brief Use combination of #VmaDefragmentationFlagBits. VmaDefragmentationFlags flags; - /** \brief Number of allocations in `pAllocations` array. - */ - uint32_t allocationCount; - /** \brief Pointer to array of allocations that can be defragmented. - - The array should have `allocationCount` elements. - The array should not contain nulls. - Elements in the array should be unique - same allocation cannot occur twice. - All allocations not present in this array are considered non-moveable during this defragmentation. - */ - const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations; - /** \brief Optional, output. Pointer to array that will be filled with information whether the allocation at certain index has been changed during defragmentation. - - The array should have `allocationCount` elements. - You can pass null if you are not interested in this information. - */ - VkBool32* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationsChanged; - /** \brief Numer of pools in `pPools` array. - */ - uint32_t poolCount; - /** \brief Either null or pointer to array of pools to be defragmented. - - All the allocations in the specified pools can be moved during defragmentation - and there is no way to check if they were really moved as in `pAllocationsChanged`, - so you must query all the allocations in all these pools for new `VkDeviceMemory` - and offset using vmaGetAllocationInfo() if you might need to recreate buffers - and images bound to them. + /** \brief Custom pool to be defragmented. - The array should have `poolCount` elements. - The array should not contain nulls. - Elements in the array should be unique - same pool cannot occur twice. - - Using this array is equivalent to specifying all allocations from the pools in `pAllocations`. - It might be more efficient. - */ - const VmaPool VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(poolCount) pPools; - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places using transfers on CPU side, like `memcpy()`, `memmove()`. - - `VK_WHOLE_SIZE` means no limit. - */ - VkDeviceSize maxCpuBytesToMove; - /** \brief Maximum number of allocations that can be moved to a different place using transfers on CPU side, like `memcpy()`, `memmove()`. - - `UINT32_MAX` means no limit. + If null then default pools will undergo defragmentation process. */ - uint32_t maxCpuAllocationsToMove; - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places using transfers on GPU side, posted to `commandBuffer`. + VmaPool VMA_NULLABLE pool; + /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places. - `VK_WHOLE_SIZE` means no limit. + `0` means no limit. */ - VkDeviceSize maxGpuBytesToMove; - /** \brief Maximum number of allocations that can be moved to a different place using transfers on GPU side, posted to `commandBuffer`. + VkDeviceSize maxBytesPerPass; + /** \brief Maximum number of allocations that can be moved during single pass to a different place. - `UINT32_MAX` means no limit. + `0` means no limit. */ - uint32_t maxGpuAllocationsToMove; - /** \brief Optional. Command buffer where GPU copy commands will be posted. - - If not null, it must be a valid command buffer handle that supports Transfer queue type. - It must be in the recording state and outside of a render pass instance. - You need to submit it and make sure it finished execution before calling vmaDefragmentationEnd(). - - Passing null means that only CPU defragmentation will be performed. - */ - VkCommandBuffer VMA_NULLABLE commandBuffer; -} VmaDefragmentationInfo2; + uint32_t maxAllocationsPerPass; +} VmaDefragmentationInfo; -typedef struct VmaDefragmentationPassMoveInfo +/// Single move of an allocation to be done for defragmentation. +typedef struct VmaDefragmentationMove { - VmaAllocation VMA_NOT_NULL allocation; - VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory; - VkDeviceSize offset; -} VmaDefragmentationPassMoveInfo; + /// Operation to be performed on the allocation by vmaEndDefragmentationPass(). Default value is #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it. + VmaDefragmentationMoveOperation operation; + /// Allocation that should be moved. + VmaAllocation VMA_NOT_NULL srcAllocation; + /// Destination memory block where the allocation should be moved. + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE dstMemory; + /// Destination offset where the allocation should be moved. + VkDeviceSize dstOffset; + /// Internal data used by VMA. Do not use or modify! + void* VMA_NOT_NULL internalData; +} VmaDefragmentationMove; /** \brief Parameters for incremental defragmentation steps. To be used with function vmaBeginDefragmentationPass(). */ -typedef struct VmaDefragmentationPassInfo +typedef struct VmaDefragmentationPassMoveInfo { + /// Number of elements in the `pMoves` array. uint32_t moveCount; - VmaDefragmentationPassMoveInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(moveCount) pMoves; -} VmaDefragmentationPassInfo; + /** \brief Array of moves to be performed by the user in the current defragmentation pass. + + Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass(). -/** \brief Deprecated. Optional configuration parameters to be passed to function vmaDefragment(). + For each element, you should: + + 1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset. + 2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`. + 3. Make sure these commands finished executing on the GPU. + 4. Destroy the old buffer/image. + + Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass(). + After this call, the allocation will point to the new place in memory. -\deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead. -*/ -typedef struct VmaDefragmentationInfo -{ - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places. + Alternatively, if you cannot move specific allocation, you can set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. - Default is `VK_WHOLE_SIZE`, which means no limit. - */ - VkDeviceSize maxBytesToMove; - /** \brief Maximum number of allocations that can be moved to different place. + Alternatively, if you decide you want to completely remove the allocation: - Default is `UINT32_MAX`, which means no limit. + 1. Destroy its buffer/image. + 2. Set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + + Then, after vmaEndDefragmentationPass() the allocation will be freed. */ - uint32_t maxAllocationsToMove; -} VmaDefragmentationInfo; + VmaDefragmentationMove* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(moveCount) pMoves; +} VmaDefragmentationPassMoveInfo; -/// Statistics returned by function vmaDefragment(). +/// Statistics returned for defragmentation process in function vmaEndDefragmentation(). typedef struct VmaDefragmentationStats { /// Total number of bytes that have been copied while moving allocations to different places. @@ -1484,7 +1519,7 @@ typedef struct VmaVirtualAllocationCreateInfo typedef struct VmaVirtualAllocationInfo { /** \brief Offset of the allocation. - + Offset at which the allocation was made. */ VkDeviceSize offset; @@ -1572,23 +1607,24 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( /** \brief Retrieves statistics from current state of the Allocator. This function is called "calculate" not "get" because it has to traverse all -internal data structures, so it may be quite slow. For faster but more brief statistics -suitable to be called every frame or every allocation, use vmaGetHeapBudgets(). +internal data structures, so it may be quite slow. Use it for debugging purposes. +For faster but more brief statistics suitable to be called every frame or every allocation, +use vmaGetHeapBudgets(). Note that when using allocator from multiple threads, returned information may immediately become outdated. */ -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStats( +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( VmaAllocator VMA_NOT_NULL allocator, - VmaStats* VMA_NOT_NULL pStats); + VmaTotalStatistics* VMA_NOT_NULL pStats); -/** \brief Retrieves information about current memory budget for all memory heaps. +/** \brief Retrieves information about current memory usage and budget for all memory heaps. \param allocator \param[out] pBudgets Must point to array with number of elements at least equal to number of memory heaps in physical device used. This function is called "get" not "calculate" because it is very fast, suitable to be called -every frame or every allocation. For more detailed statistics use vmaCalculateStats(). +every frame or every allocation. For more detailed statistics use vmaCalculateStatistics(). Note that when using allocator from multiple threads, returned information may immediately become outdated. @@ -1631,12 +1667,6 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. It internally creates a temporary, dummy buffer that never has memory bound. -It is just a convenience function, equivalent to calling: - -- `vkCreateBuffer` -- `vkGetBufferMemoryRequirements` -- `vmaFindMemoryTypeIndex` -- `vkDestroyBuffer` */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( VmaAllocator VMA_NOT_NULL allocator, @@ -1649,12 +1679,6 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. It internally creates a temporary, dummy image that never has memory bound. -It is just a convenience function, equivalent to calling: - -- `vkCreateImage` -- `vkGetImageMemoryRequirements` -- `vmaFindMemoryTypeIndex` -- `vkDestroyImage` */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( VmaAllocator VMA_NOT_NULL allocator, @@ -1692,10 +1716,21 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( \param pool Pool object. \param[out] pPoolStats Statistics of specified pool. */ -VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStats( +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( VmaAllocator VMA_NOT_NULL allocator, VmaPool VMA_NOT_NULL pool, - VmaPoolStats* VMA_NOT_NULL pPoolStats); + VmaStatistics* VMA_NOT_NULL pPoolStats); + +/** \brief Retrieves detailed statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + VmaDetailedStatistics* VMA_NOT_NULL pPoolStats); /** @} */ @@ -2056,103 +2091,66 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( \param allocator Allocator object. \param pInfo Structure filled with parameters of defragmentation. -\param[out] pStats Optional. Statistics of defragmentation. You can pass null if you are not interested in this information. -\param[out] pContext Context object that must be passed to vmaDefragmentationEnd() to finish defragmentation. -\return `VK_SUCCESS` and `*pContext == null` if defragmentation finished within this function call. `VK_NOT_READY` and `*pContext != null` if defragmentation has been started and you need to call vmaDefragmentationEnd() to finish it. Negative value in case of error. - -Use this function instead of old, deprecated vmaDefragment(). - -Warning! Between the call to vmaDefragmentationBegin() and vmaDefragmentationEnd(): - -- You should not use any of allocations passed as `pInfo->pAllocations` or - any allocations that belong to pools passed as `pInfo->pPools`, - including calling vmaGetAllocationInfo(), or access - their data. -- Some mutexes protecting internal data structures may be locked, so trying to - make or free any allocations, bind buffers or images, map memory, or launch - another simultaneous defragmentation in between may cause stall (when done on - another thread) or deadlock (when done on the same thread), unless you are - 100% sure that defragmented allocations are in different pools. -- Information returned via `pStats` and `pInfo->pAllocationsChanged` are undefined. - They become valid after call to vmaDefragmentationEnd(). -- If `pInfo->commandBuffer` is not null, you must submit that command buffer - and make sure it finished execution before calling vmaDefragmentationEnd(). - -For more information and important limitations regarding defragmentation, see documentation chapter: +\param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation. + +For more information about defragmentation, see documentation chapter: [Defragmentation](@ref defragmentation). */ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationBegin( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( VmaAllocator VMA_NOT_NULL allocator, - const VmaDefragmentationInfo2* VMA_NOT_NULL pInfo, - VmaDefragmentationStats* VMA_NULLABLE pStats, + const VmaDefragmentationInfo* VMA_NOT_NULL pInfo, VmaDefragmentationContext VMA_NULLABLE* VMA_NOT_NULL pContext); /** \brief Ends defragmentation process. -Use this function to finish defragmentation started by vmaDefragmentationBegin(). -It is safe to pass `context == null`. The function then does nothing. +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pStats Optional stats for the defragmentation. Can be null. + +Use this function to finish defragmentation started by vmaBeginDefragmentation(). */ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationEnd( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentation( VmaAllocator VMA_NOT_NULL allocator, - VmaDefragmentationContext VMA_NULLABLE context); + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationStats* VMA_NULLABLE pStats); + +/** \brief Starts single defragmentation pass. +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pPassInfo Computed informations for current pass. +\returns +- `VK_SUCCESS` if no more moves are possible. Then you can omit call to vmaEndDefragmentationPass() and simply end whole defragmentation. +- `VK_INCOMPLETE` if there are pending moves returned in `pPassInfo`. You need to perform them, call vmaEndDefragmentationPass(), + and then preferably try another pass with vmaBeginDefragmentationPass(). +*/ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( VmaAllocator VMA_NOT_NULL allocator, - VmaDefragmentationContext VMA_NULLABLE context, - VmaDefragmentationPassInfo* VMA_NOT_NULL pInfo); + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); -VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( - VmaAllocator VMA_NOT_NULL allocator, - VmaDefragmentationContext VMA_NULLABLE context); +/** \brief Ends single defragmentation pass. -/** \brief Deprecated. Compacts memory by moving allocations. +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param pPassInfo Computed informations for current pass filled by vmaBeginDefragmentationPass() and possibly modified by you. -\param allocator -\param pAllocations Array of allocations that can be moved during this compation. -\param allocationCount Number of elements in pAllocations and pAllocationsChanged arrays. -\param[out] pAllocationsChanged Array of boolean values that will indicate whether matching allocation in pAllocations array has been moved. This parameter is optional. Pass null if you don't need this information. -\param pDefragmentationInfo Configuration parameters. Optional - pass null to use default values. -\param[out] pDefragmentationStats Statistics returned by the function. Optional - pass null if you don't need this information. -\return `VK_SUCCESS` if completed, negative error code in case of error. - -\deprecated This is a part of the old interface. It is recommended to use structure #VmaDefragmentationInfo2 and function vmaDefragmentationBegin() instead. - -This function works by moving allocations to different places (different -`VkDeviceMemory` objects and/or different offsets) in order to optimize memory -usage. Only allocations that are in `pAllocations` array can be moved. All other -allocations are considered nonmovable in this call. Basic rules: - -- Only allocations made in memory types that have - `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` - flags can be compacted. You may pass other allocations but it makes no sense - - these will never be moved. -- Custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT or - #VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT flag are not defragmented. Allocations - passed to this function that come from such pools are ignored. -- Allocations created with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT or - created as dedicated allocations for any other reason are also ignored. -- Both allocations made with or without #VMA_ALLOCATION_CREATE_MAPPED_BIT - flag can be compacted. If not persistently mapped, memory will be mapped - temporarily inside this function if needed. -- You must not pass same #VmaAllocation object multiple times in `pAllocations` array. - -The function also frees empty `VkDeviceMemory` blocks. - -Warning: This function may be time-consuming, so you shouldn't call it too often -(like after every resource creation/destruction). -You can call it on special occasions (like when reloading a game level or -when you just destroyed a lot of objects). Calling it every frame may be OK, but -you should measure that on your platform. - -For more information, see [Defragmentation](@ref defragmentation) chapter. +Returns `VK_SUCCESS` if no more moves are possible or `VK_INCOMPLETE` if more defragmentations are possible. + +Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`. +After this call: + +- Allocations at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY + (which is the default) will be pointing to the new destination place. +- Allocation at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY + will be freed. + +If no more moves are possible you can end whole defragmentation. */ -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragment( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( VmaAllocator VMA_NOT_NULL allocator, - const VmaAllocation VMA_NOT_NULL* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, - size_t allocationCount, - VkBool32* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationsChanged, - const VmaDefragmentationInfo* VMA_NULLABLE pDefragmentationInfo, - VmaDefragmentationStats* VMA_NULLABLE pDefragmentationStats); + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); /** \brief Binds buffer to allocation. @@ -2408,10 +2406,21 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData( void* VMA_NULLABLE pUserData); /** \brief Calculates and returns statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is fast to call. For more detailed statistics, see vmaCalculateVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats); + +/** \brief Calculates and returns detailed statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is slow to call. Use for debugging purposes. +For less detailed statistics, see vmaGetVirtualBlockStatistics(). */ -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStats( +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics( VmaVirtualBlock VMA_NOT_NULL virtualBlock, - VmaStatInfo* VMA_NOT_NULL pStatInfo); + VmaDetailedStatistics* VMA_NOT_NULL pStats); /** @} */ @@ -2424,7 +2433,7 @@ VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStats( /** \brief Builds and returns a null-terminated string in JSON format with information about given #VmaVirtualBlock. \param virtualBlock Virtual block. \param[out] ppStatsString Returned string. -\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStats(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces. +\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStatistics(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces. Returned string must be freed using vmaFreeVirtualBlockStatsString(). */ @@ -2466,9 +2475,9 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -// +// // IMPLEMENTATION -// +// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -2485,6 +2494,10 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( #include <cstring> #include <utility> +#ifdef _MSC_VER + #include <intrin.h> // For functions like __popcnt, _BitScanForward etc. +#endif + /******************************************************************************* CONFIGURATION SECTION @@ -2886,6 +2899,17 @@ If providing your own implementation, you need to implement a subset of std::ato #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256ull * 1024 * 1024) #endif +/* +Mapping hysteresis is a logic that launches when vmaMapMemory/vmaUnmapMemory is called +or a persistently mapped allocation is created and destroyed several times in a row. +It keeps additional +1 mapping of a device memory block to prevent calling actual +vkMapMemory/vkUnmapMemory too many times, which may improve performance and help +tools like RenderDOc. +*/ +#ifndef VMA_MAPPING_HYSTERESIS_ENABLED + #define VMA_MAPPING_HYSTERESIS_ENABLED 1 +#endif + #ifndef VMA_CLASS_NO_COPY #define VMA_CLASS_NO_COPY(className) \ private: \ @@ -2913,10 +2937,17 @@ static const uint32_t VMA_CORRUPTION_DETECTION_MAGIC_VALUE = 0x7F84E666; static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040; static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080; static const uint32_t VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY = 0x00020000; +static const uint32_t VK_IMAGE_CREATE_DISJOINT_BIT_COPY = 0x00000200; +static const int32_t VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY = 1000158000; static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u; static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32; static const uint32_t VMA_VENDOR_ID_AMD = 4098; +// This one is tricky. Vulkan specification defines this code as available since +// Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131. +// See pull request #207. +#define VK_ERROR_UNKNOWN_COPY ((VkResult)-13) + #if VMA_STATS_STRING_ENABLED // Correspond to values of enum VmaSuballocationType. @@ -3032,23 +3063,13 @@ typedef VmaList<VmaSuballocation, VmaStlAllocator<VmaSuballocation>> VmaSuballoc struct VmaAllocationRequest; class VmaBlockMetadata; -class VmaBlockMetadata_Generic; class VmaBlockMetadata_Linear; -class VmaBlockMetadata_Buddy; class VmaBlockMetadata_TLSF; class VmaBlockVector; -struct VmaDefragmentationMove; -class VmaDefragmentationAlgorithm; -class VmaDefragmentationAlgorithm_Generic; -class VmaDefragmentationAlgorithm_Fast; - struct VmaPoolListItemTraits; -struct VmaBlockDefragmentationContext; -class VmaBlockVectorDefragmentationContext; - struct VmaCurrentBudgetData; class VmaAllocationObjectAllocator; @@ -3056,7 +3077,7 @@ class VmaAllocationObjectAllocator; #endif // _VMA_FORWARD_DECLARATIONS -#ifndef _VMA_FUNCTIONS +#ifndef _VMA_FUNCTIONS // Returns number of bits set to 1 in (v). static inline uint32_t VmaCountBitsSet(uint32_t v) { @@ -3267,12 +3288,8 @@ static const char* VmaAlgorithmToStr(uint32_t algorithm) { case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: return "Linear"; - case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT: - return "Buddy"; - case VMA_POOL_CREATE_TLSF_ALGORITHM_BIT: - return "TLSF"; case 0: - return "Default"; + return "TLSF"; default: VMA_ASSERT(0); return ""; @@ -3497,6 +3514,150 @@ static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) mainStruct->pNext = newStruct; } +// This is the main algorithm that guides the selection of a memory type best for an allocation - +// converts usage to required/preferred/not preferred flags. +static bool FindMemoryPreferences( + bool isIntegratedGPU, + const VmaAllocationCreateInfo& allocCreateInfo, + VkFlags bufImgUsage, // VkBufferCreateInfo::usage or VkImageCreateInfo::usage. UINT32_MAX if unknown. + VkMemoryPropertyFlags& outRequiredFlags, + VkMemoryPropertyFlags& outPreferredFlags, + VkMemoryPropertyFlags& outNotPreferredFlags) +{ + outRequiredFlags = allocCreateInfo.requiredFlags; + outPreferredFlags = allocCreateInfo.preferredFlags; + outNotPreferredFlags = 0; + + switch(allocCreateInfo.usage) + { + case VMA_MEMORY_USAGE_UNKNOWN: + break; + case VMA_MEMORY_USAGE_GPU_ONLY: + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_CPU_ONLY: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + break; + case VMA_MEMORY_USAGE_CPU_TO_GPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_GPU_TO_CPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + break; + case VMA_MEMORY_USAGE_CPU_COPY: + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: + outRequiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; + break; + case VMA_MEMORY_USAGE_AUTO: + case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE: + case VMA_MEMORY_USAGE_AUTO_PREFER_HOST: + { + if(bufImgUsage == UINT32_MAX) + { + VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known."); + return false; + } + // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same VK_BUFFER_IMAGE_TRANSFER*. + const bool deviceAccess = (bufImgUsage & ~(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0; + const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0; + const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0; + const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0; + const bool preferDevice = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + const bool preferHost = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + + // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU. + if(hostAccessRandom) + { + if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL. + // Omitting HOST_VISIBLE here is intentional. + // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one. + // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list. + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + else + { + // Always CPU memory, cached. + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + } + // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined. + else if(hostAccessSequentialWrite) + { + // Want uncached and write-combined. + outNotPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + else + { + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + // Direct GPU access, CPU sequential write (e.g. a dynamic uniform buffer updated every frame) + if(deviceAccess) + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose GPU memory. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + // GPU no direct access, CPU sequential write (e.g. an upload buffer to be transferred to the GPU) + else + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose CPU memory. + if(preferDevice) + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + } + } + // No CPU access + else + { + // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory + if(deviceAccess) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + // No direct GPU access, no CPU access, just transfers. + // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or + // a "swap file" copy to free some GPU memory (then better CPU memory). + // Up to the user to decide. If no preferece, assume the former and choose GPU memory. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + } + default: + VMA_ASSERT(0); + } + + // Avoid DEVICE_COHERENT unless explicitly requested. + if(((allocCreateInfo.requiredFlags | allocCreateInfo.preferredFlags) & + (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) + { + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY; + } + + return true; +} + //////////////////////////////////////////////////////////////////////////////// // Memory allocation @@ -3635,65 +3796,60 @@ bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& } #endif // _VMA_FUNCTIONS -#ifndef _VMA_STAT_INFO_FUNCTIONS -static void VmaInitStatInfo(VmaStatInfo& outInfo) +#ifndef _VMA_STATISTICS_FUNCTIONS + +static void VmaClearStatistics(VmaStatistics& outStats) { - memset(&outInfo, 0, sizeof(outInfo)); - outInfo.allocationSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMin = UINT64_MAX; + outStats.blockCount = 0; + outStats.allocationCount = 0; + outStats.blockBytes = 0; + outStats.allocationBytes = 0; } -// Adds statistics srcInfo into inoutInfo, like: inoutInfo += srcInfo. -static void VmaAddStatInfo(VmaStatInfo& inoutInfo, const VmaStatInfo& srcInfo) +static void VmaAddStatistics(VmaStatistics& inoutStats, const VmaStatistics& src) { - inoutInfo.blockCount += srcInfo.blockCount; - inoutInfo.allocationCount += srcInfo.allocationCount; - inoutInfo.unusedRangeCount += srcInfo.unusedRangeCount; - inoutInfo.usedBytes += srcInfo.usedBytes; - inoutInfo.unusedBytes += srcInfo.unusedBytes; - inoutInfo.allocationSizeMin = VMA_MIN(inoutInfo.allocationSizeMin, srcInfo.allocationSizeMin); - inoutInfo.allocationSizeMax = VMA_MAX(inoutInfo.allocationSizeMax, srcInfo.allocationSizeMax); - inoutInfo.unusedRangeSizeMin = VMA_MIN(inoutInfo.unusedRangeSizeMin, srcInfo.unusedRangeSizeMin); - inoutInfo.unusedRangeSizeMax = VMA_MAX(inoutInfo.unusedRangeSizeMax, srcInfo.unusedRangeSizeMax); + inoutStats.blockCount += src.blockCount; + inoutStats.allocationCount += src.allocationCount; + inoutStats.blockBytes += src.blockBytes; + inoutStats.allocationBytes += src.allocationBytes; } -static void VmaAddStatInfoAllocation(VmaStatInfo& inoutInfo, VkDeviceSize size) +static void VmaClearDetailedStatistics(VmaDetailedStatistics& outStats) { - ++inoutInfo.allocationCount; - inoutInfo.usedBytes += size; - if (size < inoutInfo.allocationSizeMin) - { - inoutInfo.allocationSizeMin = size; - } - if (size > inoutInfo.allocationSizeMax) - { - inoutInfo.allocationSizeMax = size; - } + VmaClearStatistics(outStats.statistics); + outStats.unusedRangeCount = 0; + outStats.allocationSizeMin = VK_WHOLE_SIZE; + outStats.allocationSizeMax = 0; + outStats.unusedRangeSizeMin = VK_WHOLE_SIZE; + outStats.unusedRangeSizeMax = 0; } -static void VmaAddStatInfoUnusedRange(VmaStatInfo& inoutInfo, VkDeviceSize size) +static void VmaAddDetailedStatisticsAllocation(VmaDetailedStatistics& inoutStats, VkDeviceSize size) { - ++inoutInfo.unusedRangeCount; - inoutInfo.unusedBytes += size; - if (size < inoutInfo.unusedRangeSizeMin) - { - inoutInfo.unusedRangeSizeMin = size; - } - if (size > inoutInfo.unusedRangeSizeMax) - { - inoutInfo.unusedRangeSizeMax = size; - } + inoutStats.statistics.allocationCount++; + inoutStats.statistics.allocationBytes += size; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, size); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, size); +} + +static void VmaAddDetailedStatisticsUnusedRange(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ + inoutStats.unusedRangeCount++; + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, size); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, size); } -static void VmaPostprocessCalcStatInfo(VmaStatInfo& inoutInfo) +static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const VmaDetailedStatistics& src) { - inoutInfo.allocationSizeAvg = (inoutInfo.allocationCount > 0) ? - VmaRoundDiv<VkDeviceSize>(inoutInfo.usedBytes, inoutInfo.allocationCount) : 0; - inoutInfo.unusedRangeSizeAvg = (inoutInfo.unusedRangeCount > 0) ? - VmaRoundDiv<VkDeviceSize>(inoutInfo.unusedBytes, inoutInfo.unusedRangeCount) : 0; + VmaAddStatistics(inoutStats.statistics, src.statistics); + inoutStats.unusedRangeCount += src.unusedRangeCount; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, src.allocationSizeMin); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, src.allocationSizeMax); + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, src.unusedRangeSizeMin); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, src.unusedRangeSizeMax); } -#endif // _VMA_STAT_INFO_FUNCTIONS +#endif // _VMA_STATISTICS_FUNCTIONS #ifndef _VMA_MUTEX_LOCK // Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). @@ -4089,8 +4245,9 @@ VmaSmallVector<T, AllocatorT, N>::VmaSmallVector(size_t count, const AllocatorT& template<typename T, typename AllocatorT, size_t N> void VmaSmallVector<T, AllocatorT, N>::push_back(const T& src) { - resize(m_Count + 1); - data()[m_Count] = src; + const size_t newIndex = size(); + resize(newIndex + 1); + data()[newIndex] = src; } template<typename T, typename AllocatorT, size_t N> @@ -4589,6 +4746,7 @@ public: class iterator { + friend class const_iterator; friend class VmaList<T, AllocatorT>; public: iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} @@ -4614,6 +4772,7 @@ public: }; class reverse_iterator { + friend class const_reverse_iterator; friend class VmaList<T, AllocatorT>; public: reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} @@ -4645,6 +4804,8 @@ public: const_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} const_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + iterator drop_const() { return { const_cast<VmaRawList<T>*>(m_pList), const_cast<VmaListItem<T>*>(m_pItem) }; } + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } @@ -4671,6 +4832,8 @@ public: const_reverse_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} const_reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + reverse_iterator drop_const() { return { const_cast<VmaRawList<T>*>(m_pList), const_cast<VmaListItem<T>*>(m_pItem) }; } + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } @@ -4707,8 +4870,8 @@ public: reverse_iterator rbegin() { return reverse_iterator(&m_RawList, m_RawList.Back()); } reverse_iterator rend() { return reverse_iterator(&m_RawList, VMA_NULL); } - const_reverse_iterator crbegin() { return const_reverse_iterator(&m_RawList, m_RawList.Back()); } - const_reverse_iterator crend() { return const_reverse_iterator(&m_RawList, VMA_NULL); } + const_reverse_iterator crbegin() const { return const_reverse_iterator(&m_RawList, m_RawList.Back()); } + const_reverse_iterator crend() const { return const_reverse_iterator(&m_RawList, VMA_NULL); } const_reverse_iterator rbegin() const { return crbegin(); } const_reverse_iterator rend() const { return crend(); } @@ -4813,7 +4976,7 @@ public: VmaIntrusiveLinkedList& operator=(VmaIntrusiveLinkedList&& src); VmaIntrusiveLinkedList& operator=(const VmaIntrusiveLinkedList&) = delete; ~VmaIntrusiveLinkedList() { VMA_HEAVY_ASSERT(IsEmpty()); } - + size_t GetCount() const { return m_Count; } bool IsEmpty() const { return m_Count == 0; } ItemType* Front() { return m_Front; } @@ -5077,13 +5240,14 @@ public: iterator begin() { return m_Vector.begin(); } iterator end() { return m_Vector.end(); } + size_t size() { return m_Vector.size(); } void insert(const PairType& pair); iterator find(const KeyT& key); void erase(iterator it); private: - VmaVector< PairType, VmaStlAllocator<PairType> > m_Vector; + VmaVector< PairType, VmaStlAllocator<PairType>> m_Vector; }; #ifndef _VMA_MAP_FUNCTIONS @@ -5224,7 +5388,7 @@ public: // Writes a string value inside "". // pStr can contain any ANSI characters, including '"', new line etc. - they will be properly escaped. void WriteString(const char* pStr); - + // Begins writing a string value. // Call BeginString, ContinueString, ContinueString, ..., EndString instead of // WriteString to conveniently build the string content incrementally, made of @@ -5503,33 +5667,31 @@ void VmaJsonWriter::WriteIndent(bool oneLess) } #endif // _VMA_JSON_WRITER_FUNCTIONS -static void VmaPrintStatInfo(VmaJsonWriter& json, const VmaStatInfo& stat) +static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedStatistics& stat) { json.BeginObject(); - json.WriteString("Blocks"); - json.WriteNumber(stat.blockCount); + json.WriteString("BlockCount"); + json.WriteNumber(stat.statistics.blockCount); - json.WriteString("Allocations"); - json.WriteNumber(stat.allocationCount); + json.WriteString("AllocationCount"); + json.WriteNumber(stat.statistics.allocationCount); - json.WriteString("UnusedRanges"); + json.WriteString("UnusedRangeCount"); json.WriteNumber(stat.unusedRangeCount); - json.WriteString("UsedBytes"); - json.WriteNumber(stat.usedBytes); + json.WriteString("BlockBytes"); + json.WriteNumber(stat.statistics.blockBytes); - json.WriteString("UnusedBytes"); - json.WriteNumber(stat.unusedBytes); + json.WriteString("AllocationBytes"); + json.WriteNumber(stat.statistics.allocationBytes); - if (stat.allocationCount > 1) + if (stat.statistics.allocationCount > 1) { json.WriteString("AllocationSize"); json.BeginObject(true); json.WriteString("Min"); json.WriteNumber(stat.allocationSizeMin); - json.WriteString("Avg"); - json.WriteNumber(stat.allocationSizeAvg); json.WriteString("Max"); json.WriteNumber(stat.allocationSizeMax); json.EndObject(); @@ -5541,8 +5703,6 @@ static void VmaPrintStatInfo(VmaJsonWriter& json, const VmaStatInfo& stat) json.BeginObject(true); json.WriteString("Min"); json.WriteNumber(stat.unusedRangeSizeMin); - json.WriteString("Avg"); - json.WriteNumber(stat.unusedRangeSizeAvg); json.WriteString("Max"); json.WriteNumber(stat.unusedRangeSizeMax); json.EndObject(); @@ -5552,12 +5712,109 @@ static void VmaPrintStatInfo(VmaJsonWriter& json, const VmaStatInfo& stat) } #endif // _VMA_JSON_WRITER +#ifndef _VMA_MAPPING_HYSTERESIS + +class VmaMappingHysteresis +{ + VMA_CLASS_NO_COPY(VmaMappingHysteresis) +public: + VmaMappingHysteresis() = default; + + uint32_t GetExtraMapping() const { return m_ExtraMapping; } + + // Call when Map was called. + // Returns true if switched to extra +1 mapping reference count. + bool PostMap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING) + { + m_ExtraMapping = 1; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + + // Call when Unmap was called. + void PostUnmap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + ++m_MajorCounter; + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was made from the memory block. + void PostAlloc() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + ++m_MajorCounter; + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was freed from the memory block. + // Returns true if switched to extra -1 mapping reference count. + bool PostFree() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING && + m_MajorCounter > m_MinorCounter + 1) + { + m_ExtraMapping = 0; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + +private: + static const int32_t COUNTER_MIN_EXTRA_MAPPING = 7; + + uint32_t m_MinorCounter = 0; + uint32_t m_MajorCounter = 0; + uint32_t m_ExtraMapping = 0; // 0 or 1. + + void PostMinorCounter() + { + if(m_MinorCounter < m_MajorCounter) + ++m_MinorCounter; + else if(m_MajorCounter > 0) + --m_MajorCounter, --m_MinorCounter; + } +}; + +#endif // _VMA_MAPPING_HYSTERESIS + #ifndef _VMA_DEVICE_MEMORY_BLOCK /* Represents a single block of device memory (`VkDeviceMemory`) with all the data about its regions (aka suballocations, #VmaAllocation), assigned and free. -Thread-safety: This class must be externally synchronized. +Thread-safety: +- Access to m_pMetadata must be externally synchronized. +- Map, Unmap, Bind* are synchronized internally. */ class VmaDeviceMemoryBlock { @@ -5586,6 +5843,12 @@ public: uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } uint32_t GetId() const { return m_Id; } void* GetMappedData() const { return m_pMappedData; } + uint32_t GetMapRefCount() const { return m_MapCount; } + + // Call when allocation/free was made from m_pMetadata. + // Used for m_MappingHysteresis. + void PostAlloc() { m_MappingHysteresis.PostAlloc(); } + void PostFree(VmaAllocator hAllocator); // Validates all data structures inside this object. If not valid, returns false. bool Validate() const; @@ -5622,7 +5885,8 @@ private: Also protects m_MapCount, m_pMappedData. Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex. */ - VMA_MUTEX m_Mutex; + VMA_MUTEX m_MapAndBindMutex; + VmaMappingHysteresis m_MappingHysteresis; uint32_t m_MapCount; void* m_pMappedData; }; @@ -5633,9 +5897,12 @@ struct VmaAllocation_T { friend struct VmaDedicatedAllocationListItemTraits; - static const uint8_t MAP_COUNT_FLAG_PERSISTENT_MAP = 0x80; - - enum FLAGS { FLAG_USER_DATA_STRING = 0x01 }; + enum FLAGS + { + FLAG_USER_DATA_STRING = 0x01, + FLAG_PERSISTENT_MAP = 0x02, + FLAG_MAPPING_ALLOWED = 0x04, + }; public: enum ALLOCATION_TYPE @@ -5646,7 +5913,7 @@ public: }; // This struct is allocated using VmaPoolAllocator. - VmaAllocation_T(bool userDataString); + VmaAllocation_T(bool userDataString, bool mappingAllowed); ~VmaAllocation_T(); void InitBlockAllocation( @@ -5675,19 +5942,17 @@ public: VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; } uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } - bool IsPersistentMap() const { return (m_MapCount & MAP_COUNT_FLAG_PERSISTENT_MAP) != 0; } + bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; } + bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; } void SetUserData(VmaAllocator hAllocator, void* pUserData); - void ChangeBlockAllocation(VmaAllocator hAllocator, VmaDeviceMemoryBlock* block, VmaAllocHandle allocHandle); - void ChangeAllocHandle(VmaAllocHandle newAllocHandle); + void SwapBlockAllocation(VmaAllocation allocation); VmaAllocHandle GetAllocHandle() const; VkDeviceSize GetOffset() const; VmaPool GetParentPool() const; VkDeviceMemory GetMemory() const; void* GetMappedData() const; - void DedicatedAllocCalcStatsInfo(VmaStatInfo& outInfo); - void BlockAllocMap(); void BlockAllocUnmap(); VkResult DedicatedAllocMap(VmaAllocator hAllocator, void** ppData); @@ -5730,8 +5995,7 @@ private: uint32_t m_MemoryTypeIndex; uint8_t m_Type; // ALLOCATION_TYPE uint8_t m_SuballocationType; // VmaSuballocationType - // Bit 0x80 is set when allocation was created with VMA_ALLOCATION_CREATE_MAPPED_BIT. - // Bits with mask 0x7F are reference counter for vmaMapMemory()/vmaUnmapMemory(). + // Reference counter for vmaMapMemory()/vmaUnmapMemory(). uint8_t m_MapCount; uint8_t m_Flags; // enum FLAGS #if VMA_STATS_STRING_ENABLED @@ -5784,8 +6048,8 @@ public: void Init(bool useMutex) { m_UseMutex = useMutex; } bool Validate(); - void AddStats(VmaStats* stats, uint32_t memTypeIndex, uint32_t memHeapIndex); - void AddPoolStats(VmaPoolStats* stats); + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); + void AddStatistics(VmaStatistics& inoutStats); #if VMA_STATS_STRING_ENABLED // Writes JSON array with the list of allocations. void BuildStatsString(VmaJsonWriter& json); @@ -5830,31 +6094,30 @@ bool VmaDedicatedAllocationList::Validate() return true; } -void VmaDedicatedAllocationList::AddStats(VmaStats* stats, uint32_t memTypeIndex, uint32_t memHeapIndex) +void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) { - VmaMutexLockRead lock(m_Mutex, m_UseMutex); - for (VmaAllocation alloc = m_AllocationList.Front(); - alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + for(auto* item = m_AllocationList.Front(); item != nullptr; item = DedicatedAllocationLinkedList::GetNext(item)) { - VmaStatInfo allocationStatInfo; - alloc->DedicatedAllocCalcStatsInfo(allocationStatInfo); - VmaAddStatInfo(stats->total, allocationStatInfo); - VmaAddStatInfo(stats->memoryType[memTypeIndex], allocationStatInfo); - VmaAddStatInfo(stats->memoryHeap[memHeapIndex], allocationStatInfo); + const VkDeviceSize size = item->GetSize(); + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + VmaAddDetailedStatisticsAllocation(inoutStats, item->GetSize()); } } -void VmaDedicatedAllocationList::AddPoolStats(VmaPoolStats* stats) +void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats) { VmaMutexLockRead lock(m_Mutex, m_UseMutex); - const size_t allocCount = m_AllocationList.GetCount(); - stats->allocationCount += allocCount; - stats->blockCount += allocCount; + const uint32_t allocCount = (uint32_t)m_AllocationList.GetCount(); + inoutStats.blockCount += allocCount; + inoutStats.allocationCount += allocCount; for(auto* item = m_AllocationList.Front(); item != nullptr; item = DedicatedAllocationLinkedList::GetNext(item)) { - stats->size += item->GetSize(); + const VkDeviceSize size = item->GetSize(); + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size; } } @@ -5981,14 +6244,18 @@ public: virtual bool IsEmpty() const = 0; virtual void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) = 0; virtual VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const = 0; + virtual void* GetAllocationUserData(VmaAllocHandle allocHandle) const = 0; + + virtual VmaAllocHandle GetAllocationListBegin() const = 0; + virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0; - // Must set blockCount to 1. - virtual void CalcAllocationStatInfo(VmaStatInfo& outInfo) const = 0; // Shouldn't modify blockCount. - virtual void AddPoolStats(VmaPoolStats& inoutStats) const = 0; + virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0; + virtual void AddStatistics(VmaStatistics& inoutStats) const = 0; #if VMA_STATS_STRING_ENABLED - virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0; + // mapRefCount == UINT32_MAX means unspecified. + virtual void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const = 0; #endif // Tries to find a place for suballocation with given parameters inside this block. @@ -6028,10 +6295,12 @@ protected: void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const; #if VMA_STATS_STRING_ENABLED + // mapRefCount == UINT32_MAX means unspecified. void PrintDetailedMap_Begin(class VmaJsonWriter& json, VkDeviceSize unusedBytes, size_t allocationCount, - size_t unusedRangeCount) const; + size_t unusedRangeCount, + uint32_t mapRefCount) const; void PrintDetailedMap_Allocation(class VmaJsonWriter& json, VkDeviceSize offset, VkDeviceSize size, void* userData) const; void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, @@ -6098,12 +6367,12 @@ void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size } #endif // VMA_STATS_STRING_ENABLED } - + } #if VMA_STATS_STRING_ENABLED void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, - VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const + VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount, uint32_t mapRefCount) const { json.BeginObject(); @@ -6119,6 +6388,12 @@ void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, json.WriteString("UnusedRanges"); json.WriteNumber((uint64_t)unusedRangeCount); + if(mapRefCount != UINT32_MAX) + { + json.WriteString("MapRefCount"); + json.WriteNumber(mapRefCount); + } + json.WriteString("Suballocations"); json.BeginArray(); } @@ -6408,7 +6683,7 @@ uint32_t VmaBlockBufferImageGranularity::OffsetToPageIndex(VkDeviceSize offset) void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocType) { // When current alloc type is free then it can be overriden by new type - if (page.allocCount == 0 || page.allocCount > 0 && page.allocType == VMA_SUBALLOCATION_TYPE_FREE) + if (page.allocCount == 0 || (page.allocCount > 0 && page.allocType == VMA_SUBALLOCATION_TYPE_FREE)) page.allocType = allocType; ++page.allocCount; @@ -6416,6 +6691,7 @@ void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocTy #endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS #endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY +#if 0 #ifndef _VMA_BLOCK_METADATA_GENERIC class VmaBlockMetadata_Generic : public VmaBlockMetadata { @@ -6436,11 +6712,11 @@ public: void Init(VkDeviceSize size) override; bool Validate() const override; - void CalcAllocationStatInfo(VmaStatInfo& outInfo) const override; - void AddPoolStats(VmaPoolStats& inoutStats) const override; + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json) const override; + void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; #endif bool CreateAllocationRequest( @@ -6459,15 +6735,13 @@ public: void* userData) override; void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; void DebugLogAllAllocations() const override; - // For defragmentation - bool IsBufferImageGranularityConflictPossible( - VkDeviceSize bufferImageGranularity, - VmaSuballocationType& inOutPrevSuballocType) const; - private: uint32_t m_FreeCount; VkDeviceSize m_SumFreeSize; @@ -6477,7 +6751,7 @@ private: VkDeviceSize AlignAllocationSize(VkDeviceSize size) const { return IsVirtual() ? size : VmaAlignUp(size, (VkDeviceSize)16); } - VmaSuballocationList::iterator FindAtOffset(VkDeviceSize offset); + VmaSuballocationList::iterator FindAtOffset(VkDeviceSize offset) const; bool ValidateFreeSuballocationList() const; // Checks if requested suballocation with given parameters can be placed in given pFreeSuballocItem. @@ -6612,42 +6886,37 @@ bool VmaBlockMetadata_Generic::Validate() const return true; } -void VmaBlockMetadata_Generic::CalcAllocationStatInfo(VmaStatInfo& outInfo) const +void VmaBlockMetadata_Generic::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const { const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); - VmaInitStatInfo(outInfo); - outInfo.blockCount = 1; + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); for (const auto& suballoc : m_Suballocations) { if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - { - VmaAddStatInfoAllocation(outInfo, suballoc.size); - } + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); else - { - VmaAddStatInfoUnusedRange(outInfo, suballoc.size); - } + VmaAddDetailedStatisticsUnusedRange(inoutStats, suballoc.size); } } -void VmaBlockMetadata_Generic::AddPoolStats(VmaPoolStats& inoutStats) const +void VmaBlockMetadata_Generic::AddStatistics(VmaStatistics& inoutStats) const { - const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); - - inoutStats.size += GetSize(); - inoutStats.unusedSize += m_SumFreeSize; - inoutStats.allocationCount += rangeCount - m_FreeCount; - inoutStats.unusedRangeCount += m_FreeCount; + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_Suballocations.size() - m_FreeCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - m_SumFreeSize; } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Generic::PrintDetailedMap(class VmaJsonWriter& json) const +void VmaBlockMetadata_Generic::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const { PrintDetailedMap_Begin(json, m_SumFreeSize, // unusedBytes m_Suballocations.size() - (size_t)m_FreeCount, // allocationCount - m_FreeCount); // unusedRangeCount + m_FreeCount, // unusedRangeCount + mapRefCount); for (const auto& suballoc : m_Suballocations) { @@ -6740,7 +7009,7 @@ bool VmaBlockMetadata_Generic::CreateAllocationRequest( } else { - VMA_ASSERT(strategy == VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT); + VMA_ASSERT(strategy & (VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT )); // Search staring from biggest suballocations. for (size_t index = freeSuballocCount; index--; ) { @@ -6770,7 +7039,7 @@ VkResult VmaBlockMetadata_Generic::CheckCorruption(const void* pBlockData) if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) { VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_UNKNOWN; + return VK_ERROR_UNKNOWN_COPY; } } } @@ -6851,6 +7120,37 @@ void VmaBlockMetadata_Generic::GetAllocationInfo(VmaAllocHandle allocHandle, Vma outInfo.pUserData = suballoc.userData; } +void* VmaBlockMetadata_Generic::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + return FindAtOffset((VkDeviceSize)allocHandle - 1)->userData; +} + +VmaAllocHandle VmaBlockMetadata_Generic::GetAllocationListBegin() const +{ + if (IsEmpty()) + return VK_NULL_HANDLE; + + for (const auto& suballoc : m_Suballocations) + { + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + return (VmaAllocHandle)(suballoc.offset + 1); + } + VMA_ASSERT(false && "Should contain at least 1 allocation!"); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Generic::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + VmaSuballocationList::const_iterator prev = FindAtOffset((VkDeviceSize)prevAlloc - 1); + + for (VmaSuballocationList::const_iterator it = ++prev; it != m_Suballocations.end(); ++it) + { + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + return (VmaAllocHandle)(it->offset + 1); + } + return VK_NULL_HANDLE; +} + void VmaBlockMetadata_Generic::Clear() { const VkDeviceSize size = GetSize(); @@ -6885,15 +7185,15 @@ void VmaBlockMetadata_Generic::DebugLogAllAllocations() const } } -VmaSuballocationList::iterator VmaBlockMetadata_Generic::FindAtOffset(VkDeviceSize offset) +VmaSuballocationList::iterator VmaBlockMetadata_Generic::FindAtOffset(VkDeviceSize offset) const { VMA_HEAVY_ASSERT(!m_Suballocations.empty()); const VkDeviceSize last = m_Suballocations.rbegin()->offset; if (last == offset) - return m_Suballocations.rbegin(); + return m_Suballocations.rbegin().drop_const(); const VkDeviceSize first = m_Suballocations.begin()->offset; if (first == offset) - return m_Suballocations.begin(); + return m_Suballocations.begin().drop_const(); const size_t suballocCount = m_Suballocations.size(); const VkDeviceSize step = (last - first + m_Suballocations.begin()->size) / suballocCount; @@ -6903,12 +7203,11 @@ VmaSuballocationList::iterator VmaBlockMetadata_Generic::FindAtOffset(VkDeviceSi suballocItem != end; ++suballocItem) { - VmaSuballocation& suballoc = *suballocItem; - if (suballoc.offset == offset) - return suballocItem; + if (suballocItem->offset == offset) + return suballocItem.drop_const(); } VMA_ASSERT(false && "Not found!"); - return m_Suballocations.end(); + return m_Suballocations.end().drop_const(); }; // If requested offset is closer to the end of range, search from the end if (offset - first > suballocCount * step / 2) @@ -7152,37 +7451,9 @@ void VmaBlockMetadata_Generic::UnregisterFreeSuballocation(VmaSuballocationList: //VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); } - -bool VmaBlockMetadata_Generic::IsBufferImageGranularityConflictPossible( - VkDeviceSize bufferImageGranularity, - VmaSuballocationType& inOutPrevSuballocType) const -{ - if (bufferImageGranularity == 1 || IsEmpty() || IsVirtual()) - { - return false; - } - - VkDeviceSize minAlignment = VK_WHOLE_SIZE; - bool typeConflictFound = false; - for (const auto& suballoc : m_Suballocations) - { - const VmaSuballocationType suballocType = suballoc.type; - if (suballocType != VMA_SUBALLOCATION_TYPE_FREE) - { - VmaAllocation const alloc = (VmaAllocation)suballoc.userData; - minAlignment = VMA_MIN(minAlignment, alloc->GetAlignment()); - if (VmaIsBufferImageGranularityConflict(inOutPrevSuballocType, suballocType)) - { - typeConflictFound = true; - } - inOutPrevSuballocType = suballocType; - } - } - - return typeConflictFound || minAlignment >= bufferImageGranularity; -} #endif // _VMA_BLOCK_METADATA_GENERIC_FUNCTIONS #endif // _VMA_BLOCK_METADATA_GENERIC +#endif // #if 0 #ifndef _VMA_BLOCK_METADATA_LINEAR /* @@ -7279,11 +7550,11 @@ public: bool Validate() const override; size_t GetAllocationCount() const override; - void CalcAllocationStatInfo(VmaStatInfo& outInfo) const override; - void AddPoolStats(VmaPoolStats& inoutStats) const override; + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json) const override; + void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; #endif bool CreateAllocationRequest( @@ -7303,6 +7574,9 @@ public: void Free(VmaAllocHandle allocHandle) override; void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; void DebugLogAllAllocations() const override; @@ -7349,7 +7623,7 @@ private: const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } - VmaSuballocation& FindSuballocation(VkDeviceSize offset); + VmaSuballocation& FindSuballocation(VkDeviceSize offset) const; bool ShouldCompact1st() const; void CleanupAfterFree(); @@ -7541,7 +7815,7 @@ size_t VmaBlockMetadata_Linear::GetAllocationCount() const AccessSuballocations2nd().size() - m_2ndNullItemsCount; } -void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const +void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const { const VkDeviceSize size = GetSize(); const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); @@ -7549,8 +7823,8 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const const size_t suballoc1stCount = suballocations1st.size(); const size_t suballoc2ndCount = suballocations2nd.size(); - VmaInitStatInfo(outInfo); - outInfo.blockCount = 1; + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; VkDeviceSize lastOffset = 0; @@ -7577,12 +7851,12 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. - VmaAddStatInfoAllocation(outInfo, suballoc.size); + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; @@ -7595,7 +7869,7 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const if (lastOffset < freeSpace2ndTo1stEnd) { const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // End of loop. @@ -7626,12 +7900,12 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. - VmaAddStatInfoAllocation(outInfo, suballoc.size); + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; @@ -7644,7 +7918,7 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const if (lastOffset < freeSpace1stTo2ndEnd) { const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // End of loop. @@ -7674,12 +7948,12 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // 2. Process this allocation. // There is allocation with suballoc.offset, suballoc.size. - VmaAddStatInfoAllocation(outInfo, suballoc.size); + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); // 3. Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; @@ -7692,7 +7966,7 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const if (lastOffset < size) { const VkDeviceSize unusedRangeSize = size - lastOffset; - VmaAddStatInfoUnusedRange(outInfo, unusedRangeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); } // End of loop. @@ -7700,11 +7974,9 @@ void VmaBlockMetadata_Linear::CalcAllocationStatInfo(VmaStatInfo& outInfo) const } } } - - outInfo.unusedBytes = size - outInfo.usedBytes; } -void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const +void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const { const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); @@ -7712,7 +7984,9 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const const size_t suballoc1stCount = suballocations1st.size(); const size_t suballoc2ndCount = suballocations2nd.size(); - inoutStats.size += size; + inoutStats.blockCount++; + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size - m_SumFreeSize; VkDeviceSize lastOffset = 0; @@ -7739,8 +8013,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // 2. Process this allocation. @@ -7758,8 +8030,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to freeSpace2ndTo1stEnd. const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // End of loop. @@ -7790,8 +8060,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // 2. Process this allocation. @@ -7809,8 +8077,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to freeSpace1stTo2ndEnd. const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // End of loop. @@ -7840,8 +8106,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to suballoc.offset. const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // 2. Process this allocation. @@ -7859,8 +8123,6 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const { // There is free space from lastOffset to size. const VkDeviceSize unusedRangeSize = size - lastOffset; - inoutStats.unusedSize += unusedRangeSize; - ++inoutStats.unusedRangeCount; } // End of loop. @@ -7871,7 +8133,7 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const +void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const { const VkDeviceSize size = GetSize(); const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); @@ -8033,7 +8295,7 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const } const VkDeviceSize unusedBytes = size - usedBytes; - PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount, mapRefCount); // SECOND PASS lastOffset = 0; @@ -8219,7 +8481,7 @@ VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) { VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_UNKNOWN; + return VK_ERROR_UNKNOWN_COPY; } } } @@ -8233,7 +8495,7 @@ VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) { VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_UNKNOWN; + return VK_ERROR_UNKNOWN_COPY; } } } @@ -8405,6 +8667,25 @@ void VmaBlockMetadata_Linear::GetAllocationInfo(VmaAllocHandle allocHandle, VmaV outInfo.pUserData = suballoc.userData; } +void* VmaBlockMetadata_Linear::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + return FindSuballocation((VkDeviceSize)allocHandle - 1).userData; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + void VmaBlockMetadata_Linear::Clear() { m_SumFreeSize = GetSize(); @@ -8436,10 +8717,10 @@ void VmaBlockMetadata_Linear::DebugLogAllAllocations() const DebugLogAllocation(it->offset, it->size, it->userData); } -VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) +VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) const { - SuballocationVectorType& suballocations1st = AccessSuballocations1st(); - SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); VmaSuballocation refSuballoc; refSuballoc.offset = offset; @@ -8447,31 +8728,31 @@ VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset // Item from the 1st vector. { - const SuballocationVectorType::iterator it = VmaBinaryFindSorted( + SuballocationVectorType::const_iterator it = VmaBinaryFindSorted( suballocations1st.begin() + m_1stNullItemsBeginCount, suballocations1st.end(), refSuballoc, VmaSuballocationOffsetLess()); if (it != suballocations1st.end()) { - return *it; + return const_cast<VmaSuballocation&>(*it); } } if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) { // Rest of members stays uninitialized intentionally for better performance. - const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); if (it != suballocations2nd.end()) { - return *it; + return const_cast<VmaSuballocation&>(*it); } } VMA_ASSERT(0 && "Allocation not found in linear allocator!"); - return suballocations1st.back(); // Should never occur. + return const_cast<VmaSuballocation&>(suballocations1st.back()); // Should never occur. } bool VmaBlockMetadata_Linear::ShouldCompact1st() const @@ -8882,6 +9163,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( #endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS #endif // _VMA_BLOCK_METADATA_LINEAR +#if 0 #ifndef _VMA_BLOCK_METADATA_BUDDY /* - GetSize() is the original size of allocated memory block. @@ -8912,11 +9194,11 @@ public: void Init(VkDeviceSize size) override; bool Validate() const override; - void CalcAllocationStatInfo(VmaStatInfo& outInfo) const override; - void AddPoolStats(VmaPoolStats& inoutStats) const override; + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json) const override; + void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; #endif bool CreateAllocationRequest( @@ -8934,6 +9216,9 @@ public: void Free(VmaAllocHandle allocHandle) override; void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; @@ -9007,11 +9292,11 @@ private: } return VmaNextPow2(size); } - Node* FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel); + Node* FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel) const; void DeleteNodeChildren(Node* node); bool ValidateNode(ValidationContext& ctx, const Node* parent, const Node* curr, uint32_t level, VkDeviceSize levelNodeSize) const; uint32_t AllocSizeToLevel(VkDeviceSize allocSize) const; - void CalcAllocationStatInfoNode(VmaStatInfo& inoutInfo, const Node* node, VkDeviceSize levelNodeSize) const; + void AddNodeToDetailedStatistics(VmaDetailedStatistics& inoutStats, const Node* node, VkDeviceSize levelNodeSize) const; // Adds node to the front of FreeList at given level. // node->type must be FREE. // node->free.prev, next can be undefined. @@ -9115,46 +9400,39 @@ bool VmaBlockMetadata_Buddy::Validate() const return true; } -void VmaBlockMetadata_Buddy::CalcAllocationStatInfo(VmaStatInfo& outInfo) const +void VmaBlockMetadata_Buddy::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const { - VmaInitStatInfo(outInfo); - outInfo.blockCount = 1; + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); - CalcAllocationStatInfoNode(outInfo, m_Root, LevelToNodeSize(0)); + AddNodeToDetailedStatistics(inoutStats, m_Root, LevelToNodeSize(0)); const VkDeviceSize unusableSize = GetUnusableSize(); if (unusableSize > 0) - { - VmaAddStatInfoUnusedRange(outInfo, unusableSize); - } + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusableSize); } -void VmaBlockMetadata_Buddy::AddPoolStats(VmaPoolStats& inoutStats) const +void VmaBlockMetadata_Buddy::AddStatistics(VmaStatistics& inoutStats) const { - const VkDeviceSize unusableSize = GetUnusableSize(); - - inoutStats.size += GetSize(); - inoutStats.unusedSize += m_SumFreeSize + unusableSize; - inoutStats.allocationCount += m_AllocationCount; - inoutStats.unusedRangeCount += m_FreeCount; - - if (unusableSize > 0) - { - ++inoutStats.unusedRangeCount; - } + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_AllocationCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - m_SumFreeSize; } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json) const +void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const { - VmaStatInfo stat; - CalcAllocationStatInfo(stat); + VmaDetailedStatistics stats; + VmaClearDetailedStatistics(stats); + AddDetailedStatistics(stats); PrintDetailedMap_Begin( json, - stat.unusedBytes, - stat.allocationCount, - stat.unusedRangeCount); + stats.statistics.blockBytes - stats.statistics.allocationBytes, + stats.statistics.allocationCount, + stats.unusedRangeCount, + mapRefCount); PrintDetailedMapNode(json, m_Root, LevelToNodeSize(0)); @@ -9302,6 +9580,25 @@ void VmaBlockMetadata_Buddy::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVi outInfo.pUserData = node->allocation.userData; } +void* VmaBlockMetadata_Buddy::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + uint32_t level = 0; + const Node* const node = FindAllocationNode((VkDeviceSize)allocHandle - 1, level); + return node->allocation.userData; +} + +VmaAllocHandle VmaBlockMetadata_Buddy::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Buddy::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + return VK_NULL_HANDLE; +} + void VmaBlockMetadata_Buddy::DeleteNodeChildren(Node* node) { if (node->type == Node::TYPE_SPLIT) @@ -9330,7 +9627,7 @@ void VmaBlockMetadata_Buddy::SetAllocationUserData(VmaAllocHandle allocHandle, v node->allocation.userData = userData; } -VmaBlockMetadata_Buddy::Node* VmaBlockMetadata_Buddy::FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel) +VmaBlockMetadata_Buddy::Node* VmaBlockMetadata_Buddy::FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel) const { Node* node = m_Root; VkDeviceSize nodeOffset = 0; @@ -9446,23 +9743,23 @@ void VmaBlockMetadata_Buddy::Free(VmaAllocHandle allocHandle) AddToFreeListFront(level, node); } -void VmaBlockMetadata_Buddy::CalcAllocationStatInfoNode(VmaStatInfo& inoutInfo, const Node* node, VkDeviceSize levelNodeSize) const +void VmaBlockMetadata_Buddy::AddNodeToDetailedStatistics(VmaDetailedStatistics& inoutStats, const Node* node, VkDeviceSize levelNodeSize) const { switch (node->type) { case Node::TYPE_FREE: - VmaAddStatInfoUnusedRange(inoutInfo, levelNodeSize); + VmaAddDetailedStatisticsUnusedRange(inoutStats, levelNodeSize); break; case Node::TYPE_ALLOCATION: - VmaAddStatInfoAllocation(inoutInfo, levelNodeSize); + VmaAddDetailedStatisticsAllocation(inoutStats, levelNodeSize); break; case Node::TYPE_SPLIT: { const VkDeviceSize childrenNodeSize = levelNodeSize / 2; const Node* const leftChild = node->split.leftChild; - CalcAllocationStatInfoNode(inoutInfo, leftChild, childrenNodeSize); + AddNodeToDetailedStatistics(inoutStats, leftChild, childrenNodeSize); const Node* const rightChild = leftChild->buddy; - CalcAllocationStatInfoNode(inoutInfo, rightChild, childrenNodeSize); + AddNodeToDetailedStatistics(inoutStats, rightChild, childrenNodeSize); } break; default: @@ -9527,6 +9824,8 @@ void VmaBlockMetadata_Buddy::DebugLogAllAllocationNode(Node* node, uint32_t leve { switch (node->type) { + case Node::TYPE_FREE: + break; case Node::TYPE_ALLOCATION: DebugLogAllocation(node->offset, LevelToNodeSize(level), node->allocation.userData); break; @@ -9569,6 +9868,7 @@ void VmaBlockMetadata_Buddy::PrintDetailedMapNode(class VmaJsonWriter& json, con #endif // VMA_STATS_STRING_ENABLED #endif // _VMA_BLOCK_METADATA_BUDDY_FUNCTIONS #endif // _VMA_BLOCK_METADATA_BUDDY +#endif // #if 0 #ifndef _VMA_BLOCK_METADATA_TLSF // To not search current larger region if first allocation won't succeed and skip to smaller range @@ -9591,11 +9891,11 @@ public: void Init(VkDeviceSize size) override; bool Validate() const override; - void CalcAllocationStatInfo(VmaStatInfo& outInfo) const override; - void AddPoolStats(VmaPoolStats& inoutStats) const override; + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json) const override; + void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; #endif bool CreateAllocationRequest( @@ -9614,6 +9914,9 @@ public: void Free(VmaAllocHandle allocHandle) override; void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; void Clear() override; void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; void DebugLogAllAllocations() const override; @@ -9839,34 +10142,32 @@ bool VmaBlockMetadata_TLSF::Validate() const return true; } -void VmaBlockMetadata_TLSF::CalcAllocationStatInfo(VmaStatInfo& outInfo) const +void VmaBlockMetadata_TLSF::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const { - VmaInitStatInfo(outInfo); - outInfo.blockCount = 1; + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); if (m_NullBlock->size > 0) - VmaAddStatInfoUnusedRange(outInfo, m_NullBlock->size); + VmaAddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size); for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) { if (block->IsFree()) - VmaAddStatInfoUnusedRange(outInfo, block->size); + VmaAddDetailedStatisticsUnusedRange(inoutStats, block->size); else - VmaAddStatInfoAllocation(outInfo, block->size); + VmaAddDetailedStatisticsAllocation(inoutStats, block->size); } } -void VmaBlockMetadata_TLSF::AddPoolStats(VmaPoolStats& inoutStats) const +void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const { - inoutStats.size += GetSize(); - inoutStats.unusedSize += GetSumFreeSize(); - inoutStats.allocationCount += m_AllocCount; - inoutStats.unusedRangeCount += m_BlocksFreeCount; - if(m_NullBlock->size > 0) - ++inoutStats.unusedRangeCount; + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_AllocCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - GetSumFreeSize(); } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const +void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const { size_t blockCount = m_AllocCount + m_BlocksFreeCount; VmaStlAllocator<Block*> allocator(GetAllocationCallbacks()); @@ -9879,13 +10180,16 @@ void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const } VMA_ASSERT(i == 0); - VmaStatInfo stat; - CalcAllocationStatInfo(stat); + VmaDetailedStatistics stats; + VmaClearDetailedStatistics(stats); + AddDetailedStatistics(stats); - PrintDetailedMap_Begin(json, - stat.unusedBytes, - stat.allocationCount, - stat.unusedRangeCount); + PrintDetailedMap_Begin( + json, + stats.statistics.blockBytes - stats.statistics.allocationBytes, + stats.statistics.allocationCount, + stats.unusedRangeCount, + mapRefCount); for (; i < blockCount; ++i) { @@ -9996,6 +10300,33 @@ bool VmaBlockMetadata_TLSF::CreateAllocationRequest( nextListBlock = nextListBlock->NextFree(); } } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT ) + { + // Perform search from the start + VmaStlAllocator<Block*> allocator(GetAllocationCallbacks()); + VmaVector<Block*, VmaStlAllocator<Block*>> blockList(m_BlocksFreeCount, allocator); + + size_t i = m_BlocksFreeCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree() && block->size >= allocSize) + blockList[--i] = block; + } + + for (; i < m_BlocksFreeCount; ++i) + { + Block& block = *blockList[i]; + if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Whole range searched, no more memory + return false; + } else { // Check larger bucket @@ -10046,7 +10377,7 @@ VkResult VmaBlockMetadata_TLSF::CheckCorruption(const void* pBlockData) if (!VmaValidateMagicValue(pBlockData, block->offset + block->size)) { VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_UNKNOWN; + return VK_ERROR_UNKNOWN_COPY; } } } @@ -10228,6 +10559,40 @@ void VmaBlockMetadata_TLSF::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVir outInfo.pUserData = block->UserData(); } +void* VmaBlockMetadata_TLSF::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get user data for free block!"); + return block->UserData(); +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetAllocationListBegin() const +{ + if (m_AllocCount == 0) + return VK_NULL_HANDLE; + + for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + VMA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!"); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + Block* startBlock = (Block*)prevAlloc; + VMA_ASSERT(!startBlock->IsFree() && "Incorrect block!"); + + for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + return VK_NULL_HANDLE; +} + void VmaBlockMetadata_TLSF::Clear() { m_AllocCount = 0; @@ -10440,7 +10805,7 @@ Synchronized internally with a mutex. */ class VmaBlockVector { - friend class VmaDefragmentationAlgorithm_Generic; + friend struct VmaDefragmentationContext_T; VMA_CLASS_NO_COPY(VmaBlockVector) public: VmaBlockVector( @@ -10468,9 +10833,14 @@ public: bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; } float GetPriority() const { return m_Priority; } void* const GetAllocationNextPtr() const { return m_pMemoryAllocateNext; } + // To be used only while the m_Mutex is locked. Used during defragmentation. + size_t GetBlockCount() const { return m_Blocks.size(); } + // To be used only while the m_Mutex is locked. Used during defragmentation. + VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } VkResult CreateMinBlocks(); - void AddPoolStats(VmaPoolStats* pStats); + void AddStatistics(VmaStatistics& inoutStats); + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); bool IsEmpty(); bool IsCorruptionDetectionEnabled() const; @@ -10482,9 +10852,7 @@ public: size_t allocationCount, VmaAllocation* pAllocations); - void Free(const VmaAllocation hAllocation); - // Adds statistics of this BlockVector to pStats. - void AddStats(VmaStats* pStats); + void Free(const VmaAllocation hAllocation, bool incrementalSort = true); #if VMA_STATS_STRING_ENABLED void PrintDetailedMap(class VmaJsonWriter& json); @@ -10492,34 +10860,6 @@ public: VkResult CheckCorruption(); - // Saves results in pCtx->res. - void Defragment( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, - VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, - VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer); - void DefragmentationEnd( - class VmaBlockVectorDefragmentationContext* pCtx, - uint32_t flags, - VmaDefragmentationStats* pStats); - - uint32_t ProcessDefragmentations( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationPassMoveInfo* pMove, uint32_t maxMoves); - - void CommitDefragmentations( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats); - - //////////////////////////////////////////////////////////////////////////////// - // To be used only while the m_Mutex is locked. Used during defragmentation. - - size_t GetBlockCount() const { return m_Blocks.size(); } - VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } - size_t CalcAllocationCount() const; - bool IsBufferImageGranularityConflictPossible() const; - private: const VmaAllocator m_hAllocator; const VmaPool m_hParentPool; @@ -10535,9 +10875,6 @@ private: void* const m_pMemoryAllocateNext; VMA_RW_MUTEX m_Mutex; - /* There can be at most one allocation that is completely empty (except when minBlockCount > 0) - - a hysteresis to avoid pessimistic case of alternating creation and destruction of a VkDeviceMemory. */ - bool m_HasEmptyBlock; // Incrementally sorted by sumFreeSize, ascending. VmaVector<VmaDeviceMemoryBlock*, VmaStlAllocator<VmaDeviceMemoryBlock*>> m_Blocks; uint32_t m_NextBlockId; @@ -10548,6 +10885,7 @@ private: // Performs single step in sorting m_Blocks. They may not be fully sorted // after this call. void IncrementallySortBlocks(); + void SortByFreeSize(); VkResult AllocatePage( VkDeviceSize size, @@ -10566,327 +10904,90 @@ private: uint32_t strategy, VmaAllocation* pAllocation); - VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); - // Saves result to pCtx->res. - void ApplyDefragmentationMovesCpu( - VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>>& moves); - // Saves result to pCtx->res. - void ApplyDefragmentationMovesGpu( - VmaBlockVectorDefragmentationContext* pDefragCtx, - VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>>& moves, - VkCommandBuffer commandBuffer); + VkResult CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation); - /* - Used during defragmentation. pDefragmentationStats is optional. It is in/out - - updated with new data. - */ - void FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationStats); - void UpdateHasEmptyBlock(); + VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); + bool HasEmptyBlock(); }; #endif // _VMA_BLOCK_VECTOR -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM -struct VmaDefragmentationMove -{ - size_t srcBlockIndex; - size_t dstBlockIndex; - VkDeviceSize srcOffset; - VkDeviceSize dstOffset; - VmaAllocHandle dstHandle; - VkDeviceSize size; - VmaAllocation hAllocation; - VmaDeviceMemoryBlock* pSrcBlock; - VmaDeviceMemoryBlock* pDstBlock; -}; - -/* -Performs defragmentation: - -- Updates `pBlockVector->m_pMetadata`. -- Updates allocations by calling ChangeBlockAllocation() or ChangeOffset(). -- Does not move actual data, only returns requested moves as `moves`. -*/ -class VmaDefragmentationAlgorithm -{ - VMA_CLASS_NO_COPY(VmaDefragmentationAlgorithm) -public: - VmaDefragmentationAlgorithm( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector) - : m_hAllocator(hAllocator), - m_pBlockVector(pBlockVector) {} - virtual ~VmaDefragmentationAlgorithm() = default; - - virtual void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) = 0; - virtual void AddAll() = 0; - - virtual VkResult Defragment( - VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>>& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags) = 0; - - virtual VkDeviceSize GetBytesMoved() const = 0; - virtual uint32_t GetAllocationsMoved() const = 0; - -protected: - struct AllocationInfo - { - VmaAllocation m_hAllocation; - VkBool32* m_pChanged; - - AllocationInfo() : m_hAllocation(VK_NULL_HANDLE), m_pChanged(VMA_NULL) {} - AllocationInfo(VmaAllocation hAlloc, VkBool32* pChanged) : m_hAllocation(hAlloc), m_pChanged(pChanged) {} - }; - - VmaAllocator const m_hAllocator; - VmaBlockVector* const m_pBlockVector; -}; - -#endif // _VMA_DEFRAGMENTATION_ALGORITHM - -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM_GENERIC -class VmaDefragmentationAlgorithm_Generic : public VmaDefragmentationAlgorithm -{ - VMA_CLASS_NO_COPY(VmaDefragmentationAlgorithm_Generic) -public: - VmaDefragmentationAlgorithm_Generic( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - bool overlappingMoveSupported); - virtual ~VmaDefragmentationAlgorithm_Generic(); - - virtual void AddAll() { m_AllAllocations = true; } - virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } - virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } - - virtual void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged); - virtual VkResult Defragment( - VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>>& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags); - -private: - struct AllocationInfoSizeGreater - { - bool operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const; - }; - struct AllocationInfoOffsetGreater - { - bool operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const; - }; - struct BlockInfo - { - size_t m_OriginalBlockIndex; - VmaDeviceMemoryBlock* m_pBlock; - bool m_HasNonMovableAllocations; - VmaVector<AllocationInfo, VmaStlAllocator<AllocationInfo>> m_Allocations; - - BlockInfo(const VkAllocationCallbacks* pAllocationCallbacks); - - void CalcHasNonMovableAllocations(); - void SortAllocationsBySizeDescending(); - void SortAllocationsByOffsetDescending(); - }; - struct BlockPointerLess - { - bool operator()(const BlockInfo* pLhsBlockInfo, const VmaDeviceMemoryBlock* pRhsBlock) const; - bool operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const; - }; - // 1. Blocks with some non-movable allocations go first. - // 2. Blocks with smaller sumFreeSize go first. - struct BlockInfoCompareMoveDestination - { - bool operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const; - }; - typedef VmaVector<BlockInfo*, VmaStlAllocator<BlockInfo*>> BlockInfoVector; - - BlockInfoVector m_Blocks; - uint32_t m_AllocationCount; - bool m_AllAllocations; - VkDeviceSize m_BytesMoved; - uint32_t m_AllocationsMoved; - - static bool MoveMakesSense( - size_t dstBlockIndex, VkDeviceSize dstOffset, - size_t srcBlockIndex, VkDeviceSize srcOffset); - - size_t CalcBlocksWithNonMovableCount() const; - VkResult DefragmentRound( - VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>>& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - bool freeOldAllocations); -}; -#endif // _VMA_DEFRAGMENTATION_ALGORITHM_GENERIC - -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM_FAST -class VmaDefragmentationAlgorithm_Fast : public VmaDefragmentationAlgorithm +#ifndef _VMA_DEFRAGMENTATION_CONTEXT +struct VmaDefragmentationContext_T { - VMA_CLASS_NO_COPY(VmaDefragmentationAlgorithm_Fast) + VMA_CLASS_NO_COPY(VmaDefragmentationContext_T) public: - VmaDefragmentationAlgorithm_Fast( + VmaDefragmentationContext_T( VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - bool overlappingMoveSupported); - virtual ~VmaDefragmentationAlgorithm_Fast() = default; + const VmaDefragmentationInfo& info); + ~VmaDefragmentationContext_T(); - virtual void AddAll() { m_AllAllocations = true; } - virtual VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } - virtual uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } - virtual void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) { ++m_AllocationCount; } + void GetStats(VmaDefragmentationStats& outStats) { outStats = m_Stats; } - virtual VkResult Defragment( - VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags); + VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo); + VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo); private: - struct BlockInfo + struct ImmovableBlock { - size_t origBlockIndex; + uint32_t vectorIndex; + VmaDeviceMemoryBlock* block; }; - class FreeSpaceDatabase + struct StateExtensive { - public: - FreeSpaceDatabase(); - - void Register(size_t blockInfoIndex, VkDeviceSize offset, VkDeviceSize size); - bool Fetch(VkDeviceSize alignment, VkDeviceSize size, - size_t& outBlockInfoIndex, VkDeviceSize& outDstOffset); - - private: - static const size_t MAX_COUNT = 4; - - struct FreeSpace + enum class Operation : uint8_t { - size_t blockInfoIndex; // SIZE_MAX means this structure is invalid. - VkDeviceSize offset; - VkDeviceSize size; - } m_FreeSpaces[MAX_COUNT]; - }; - - const bool m_OverlappingMoveSupported; - - uint32_t m_AllocationCount; - bool m_AllAllocations; - VkDeviceSize m_BytesMoved; - uint32_t m_AllocationsMoved; - - VmaVector<BlockInfo, VmaStlAllocator<BlockInfo>> m_BlockInfos; - - void PreprocessMetadata(); - void PostprocessMetadata(); - void InsertSuballoc(VmaBlockMetadata_Generic* pMetadata, const VmaSuballocation& suballoc); -}; -#endif // _VMA_DEFRAGMENTATION_ALGORITHM_FAST + FindFreeBlockBuffer, FindFreeBlockTexture, FindFreeBlockAll, + MoveBuffers, MoveTextures, MoveAll, + Cleanup, Done + }; -#ifndef _VMA_BLOCK_VECTOR_DEFRAGMENTATION_CONTEXT -struct VmaBlockDefragmentationContext -{ - enum BLOCK_FLAG - { - BLOCK_FLAG_USED = 0x00000001, + Operation operation = Operation::FindFreeBlockTexture; + size_t firstFreeBlock = SIZE_MAX; }; - uint32_t flags; - VkBuffer hBuffer; -}; - -class VmaBlockVectorDefragmentationContext -{ - VMA_CLASS_NO_COPY(VmaBlockVectorDefragmentationContext) -public: - VkResult res; - bool mutexLocked; - VmaVector<VmaBlockDefragmentationContext, VmaStlAllocator<VmaBlockDefragmentationContext>> blockContexts; - VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>> defragmentationMoves; - uint32_t defragmentationMovesProcessed; - uint32_t defragmentationMovesCommitted; - bool hasDefragmentationPlan; - - VmaBlockVectorDefragmentationContext( - VmaAllocator hAllocator, - VmaPool hCustomPool, // Optional. - VmaBlockVector* pBlockVector); - ~VmaBlockVectorDefragmentationContext(); - - VmaPool GetCustomPool() const { return m_hCustomPool; } - VmaBlockVector* GetBlockVector() const { return m_pBlockVector; } - VmaDefragmentationAlgorithm* GetAlgorithm() const { return m_pAlgorithm; } - void AddAll() { m_AllAllocations = true; } - - void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged); - void Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags); - -private: - struct AllocInfo + struct MoveAllocationData { - VmaAllocation hAlloc; - VkBool32* pChanged; + VkDeviceSize size; + VkDeviceSize alignment; + VmaSuballocationType type; + VmaAllocationCreateFlags flags; + VmaDefragmentationMove move = {}; }; - const VmaAllocator m_hAllocator; - // Null if not from custom pool. - const VmaPool m_hCustomPool; - // Redundant, for convenience not to fetch from m_hCustomPool->m_BlockVector or m_hAllocator->m_pBlockVectors. - VmaBlockVector* const m_pBlockVector; - // Owner of this object. - VmaDefragmentationAlgorithm* m_pAlgorithm; - // Used between constructor and Begin. - VmaVector<AllocInfo, VmaStlAllocator<AllocInfo>> m_Allocations; - bool m_AllAllocations; -}; -#endif // _VMA_BLOCK_VECTOR_DEFRAGMENTATION_CONTEXT - -#ifndef _VMA_DEFRAGMENTATION_CONTEXT -struct VmaDefragmentationContext_T -{ -private: - VMA_CLASS_NO_COPY(VmaDefragmentationContext_T) -public: - VmaDefragmentationContext_T( - VmaAllocator hAllocator, - uint32_t flags, - VmaDefragmentationStats* pStats); - ~VmaDefragmentationContext_T(); - - void AddPools(uint32_t poolCount, const VmaPool* pPools); - void AddAllocations( - uint32_t allocationCount, - const VmaAllocation* pAllocations, - VkBool32* pAllocationsChanged); - - /* - Returns: - - `VK_SUCCESS` if succeeded and object can be destroyed immediately. - - `VK_NOT_READY` if succeeded but the object must remain alive until vmaDefragmentationEnd(). - - Negative value if error occurred and object can be destroyed immediately. - */ - VkResult Defragment( - VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, - VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags); - - VkResult DefragmentPassBegin(VmaDefragmentationPassInfo* pInfo); - VkResult DefragmentPassEnd(); - -private: - const VmaAllocator m_hAllocator; - const uint32_t m_Flags; - VmaDefragmentationStats* const m_pStats; - - VkDeviceSize m_MaxCpuBytesToMove; - uint32_t m_MaxCpuAllocationsToMove; - VkDeviceSize m_MaxGpuBytesToMove; - uint32_t m_MaxGpuAllocationsToMove; - - // Owner of these objects. - VmaBlockVectorDefragmentationContext* m_DefaultPoolContexts[VK_MAX_MEMORY_TYPES]; - // Owner of these objects. - VmaVector<VmaBlockVectorDefragmentationContext*, VmaStlAllocator<VmaBlockVectorDefragmentationContext*>> m_CustomPoolContexts; + const VkDeviceSize m_MaxPassBytes; + const uint32_t m_MaxPassAllocations; + + VmaStlAllocator<VmaDefragmentationMove> m_MoveAllocator; + VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>> m_Moves; + + uint32_t m_Algorithm; + uint32_t m_BlockVectorCount; + VmaBlockVector* m_PoolBlockVector; + VmaBlockVector** m_pBlockVectors; + size_t m_ImmovableBlockCount = 0; + VmaDefragmentationStats m_Stats = { 0 }; + void* m_AlgorithmState = VMA_NULL; + + static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata); + bool IncrementCounters(uint32_t& allocations, VkDeviceSize bytes); + bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block); + bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector); + + bool ComputeDefragmentation(VmaBlockVector& vector, size_t index); + bool ComputeDefragmentation_Fast(VmaBlockVector& vector); + bool ComputeDefragmentation_Balanced(VmaBlockVector& vector); + bool ComputeDefragmentation_Full(VmaBlockVector& vector); + bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index); + + bool MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent); }; #endif // _VMA_DEFRAGMENTATION_CONTEXT @@ -10896,12 +10997,13 @@ struct VmaPool_T friend struct VmaPoolListItemTraits; VMA_CLASS_NO_COPY(VmaPool_T) public: - VmaBlockVector* m_pBlockVectors[VK_MAX_MEMORY_TYPES]; - VmaDedicatedAllocationList m_DedicatedAllocations[VK_MAX_MEMORY_TYPES]; + VmaBlockVector m_BlockVector; + VmaDedicatedAllocationList m_DedicatedAllocations; VmaPool_T( VmaAllocator hAllocator, - const VmaPoolCreateInfo& createInfo); + const VmaPoolCreateInfo& createInfo, + VkDeviceSize preferredBlockSize); ~VmaPool_T(); uint32_t GetId() const { return m_Id; } @@ -10915,7 +11017,6 @@ public: #endif private: - const VmaAllocator m_hAllocator; uint32_t m_Id; char* m_Name; VmaPool_T* m_PrevPool = VMA_NULL; @@ -10936,6 +11037,8 @@ struct VmaPoolListItemTraits #ifndef _VMA_CURRENT_BUDGET_DATA struct VmaCurrentBudgetData { + VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS]; VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS]; VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS]; @@ -10958,6 +11061,8 @@ VmaCurrentBudgetData::VmaCurrentBudgetData() { for (uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex) { + m_BlockCount[heapIndex] = 0; + m_AllocationCount[heapIndex] = 0; m_BlockBytes[heapIndex] = 0; m_AllocationBytes[heapIndex] = 0; #if VMA_MEMORY_BUDGET @@ -10975,6 +11080,7 @@ VmaCurrentBudgetData::VmaCurrentBudgetData() void VmaCurrentBudgetData::AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) { m_AllocationBytes[heapIndex] += allocationSize; + ++m_AllocationCount[heapIndex]; #if VMA_MEMORY_BUDGET ++m_OperationsSinceBudgetFetch; #endif @@ -10984,6 +11090,8 @@ void VmaCurrentBudgetData::RemoveAllocation(uint32_t heapIndex, VkDeviceSize all { VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); m_AllocationBytes[heapIndex] -= allocationSize; + VMA_ASSERT(m_AllocationCount[heapIndex] > 0); + --m_AllocationCount[heapIndex]; #if VMA_MEMORY_BUDGET ++m_OperationsSinceBudgetFetch; #endif @@ -11045,7 +11153,8 @@ public: void GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo); VkResult Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, VkDeviceSize* outOffset); - void CalculateStats(VmaStatInfo& outStatInfo) const; + void GetStatistics(VmaStatistics& outStats) const; + void CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const; #if VMA_STATS_STRING_ENABLED void BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const; #endif @@ -11062,20 +11171,14 @@ VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK; switch (algorithm) { + default: + VMA_ASSERT(0); case 0: - m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Generic)(VK_NULL_HANDLE, 1, true); - break; - case VMA_VIRTUAL_BLOCK_CREATE_BUDDY_ALGORITHM_BIT: - m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Buddy)(VK_NULL_HANDLE, 1, true); + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); break; case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT: m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true); break; - case VMA_VIRTUAL_BLOCK_CREATE_TLSF_ALGORITHM_BIT: - m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); - break; - default: - VMA_ASSERT(0); } m_Metadata->Init(createInfo.size); @@ -11129,10 +11232,16 @@ VkResult VmaVirtualBlock_T::Allocate(const VmaVirtualAllocationCreateInfo& creat return VK_ERROR_OUT_OF_DEVICE_MEMORY; } -void VmaVirtualBlock_T::CalculateStats(VmaStatInfo& outStatInfo) const +void VmaVirtualBlock_T::GetStatistics(VmaStatistics& outStats) const +{ + VmaClearStatistics(outStats); + m_Metadata->AddStatistics(outStats); +} + +void VmaVirtualBlock_T::CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const { - m_Metadata->CalcAllocationStatInfo(outStatInfo); - VmaPostprocessCalcStatInfo(outStatInfo); + VmaClearDetailedStatistics(outStats); + m_Metadata->AddDetailedStatistics(outStats); } #if VMA_STATS_STRING_ENABLED @@ -11141,16 +11250,17 @@ void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) VmaJsonWriter json(GetAllocationCallbacks(), sb); json.BeginObject(); - VmaStatInfo stat = {}; - CalculateStats(stat); + VmaDetailedStatistics stats; + CalculateDetailedStatistics(stats); json.WriteString("Stats"); - VmaPrintStatInfo(json, stat); + VmaPrintDetailedStatistics(json, stats); if (detailedMap) { json.WriteString("Details"); - m_Metadata->PrintDetailedMap(json); + m_Metadata->PrintDetailedMap(json, + UINT32_MAX); // mapRefCount } json.EndObject(); @@ -11159,6 +11269,7 @@ void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) #endif // _VMA_VIRTUAL_BLOCK_T_FUNCTIONS #endif // _VMA_VIRTUAL_BLOCK_T + // Main allocator object. struct VmaAllocator_T { @@ -11253,6 +11364,11 @@ public: VkMemoryRequirements& memReq, bool& requiresDedicatedAllocation, bool& prefersDedicatedAllocation) const; + VkResult FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkFlags bufImgUsage, // VkBufferCreateInfo::usage or VkImageCreateInfo::usage. UINT32_MAX if unknown. + uint32_t* pMemoryTypeIndex) const; // Main allocation function. VkResult AllocateMemory( @@ -11260,8 +11376,8 @@ public: bool requiresDedicatedAllocation, bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, // UINT32_MAX when unknown. VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, // UINT32_MAX if unknown. const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, size_t allocationCount, @@ -11272,7 +11388,7 @@ public: size_t allocationCount, const VmaAllocation* pAllocations); - void CalculateStats(VmaStats* pStats); + void CalculateStatistics(VmaTotalStatistics* pStats); void GetHeapBudgets( VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount); @@ -11281,24 +11397,12 @@ public: void PrintDetailedMap(class VmaJsonWriter& json); #endif - VkResult DefragmentationBegin( - const VmaDefragmentationInfo2& info, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext* pContext); - VkResult DefragmentationEnd( - VmaDefragmentationContext context); - - VkResult DefragmentationPassBegin( - VmaDefragmentationPassInfo* pInfo, - VmaDefragmentationContext context); - VkResult DefragmentationPassEnd( - VmaDefragmentationContext context); - void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool); void DestroyPool(VmaPool pool); - void GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats); + void GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats); + void CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats); void SetCurrentFrameIndex(uint32_t frameIndex); uint32_t GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } @@ -11397,18 +11501,16 @@ private: void ValidateVulkanFunctions(); -public: // I'm sorry VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex); -private: VkResult AllocateMemoryOfType( VmaPool pool, VkDeviceSize size, VkDeviceSize alignment, bool dedicatedPreferred, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, VmaSuballocationType suballocType, @@ -11426,6 +11528,7 @@ private: const VkMemoryAllocateInfo& allocInfo, bool map, bool isUserDataString, + bool isMappingAllowed, void* pUserData, VmaAllocation* pAllocation); @@ -11438,12 +11541,13 @@ private: uint32_t memTypeIndex, bool map, bool isUserDataString, + bool isMappingAllowed, bool canAliasMemory, void* pUserData, float priority, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, size_t allocationCount, VmaAllocation* pAllocations, const void* pNextChain = nullptr); @@ -11561,19 +11665,11 @@ void VmaDeviceMemoryBlock::Init( m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(), bufferImageGranularity, false); // isVirtual break; - case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT: - m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Buddy)(hAllocator->GetAllocationCallbacks(), - bufferImageGranularity, false); // isVirtual - break; - case VMA_POOL_CREATE_TLSF_ALGORITHM_BIT: - m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), - bufferImageGranularity, false); // isVirtual - break; default: VMA_ASSERT(0); // Fall-through. case 0: - m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Generic)(hAllocator->GetAllocationCallbacks(), + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), bufferImageGranularity, false); // isVirtual } m_pMetadata->Init(newSize); @@ -11596,6 +11692,19 @@ void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) m_pMetadata = VMA_NULL; } +void VmaDeviceMemoryBlock::PostFree(VmaAllocator hAllocator) +{ + if(m_MappingHysteresis.PostFree()) + { + VMA_ASSERT(m_MappingHysteresis.GetExtraMapping() == 0); + if (m_MapCount == 0) + { + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); + } + } +} + bool VmaDeviceMemoryBlock::Validate() const { VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) && @@ -11627,8 +11736,10 @@ VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void return VK_SUCCESS; } - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - if (m_MapCount != 0) + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + m_MappingHysteresis.PostMap(); + if (oldTotalMapCount != 0) { m_MapCount += count; VMA_ASSERT(m_pMappedData != VMA_NULL); @@ -11666,15 +11777,17 @@ void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count) return; } - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); if (m_MapCount >= count) { m_MapCount -= count; - if (m_MapCount == 0) + const uint32_t totalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (totalMapCount == 0) { m_pMappedData = VMA_NULL; (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); } + m_MappingHysteresis.PostUnmap(); } else { @@ -11732,7 +11845,7 @@ VkResult VmaDeviceMemoryBlock::BindBufferMemory( "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext); } @@ -11749,13 +11862,13 @@ VkResult VmaDeviceMemoryBlock::BindImageMemory( "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); } #endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS #ifndef _VMA_ALLOCATION_T_FUNCTIONS -VmaAllocation_T::VmaAllocation_T(bool userDataString) +VmaAllocation_T::VmaAllocation_T(bool userDataString, bool mappingAllowed) : m_Alignment{ 1 }, m_Size{ 0 }, m_pUserData{ VMA_NULL }, @@ -11763,8 +11876,13 @@ VmaAllocation_T::VmaAllocation_T(bool userDataString) m_Type{ (uint8_t)ALLOCATION_TYPE_NONE }, m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN }, m_MapCount{ 0 }, - m_Flags{ userDataString ? (uint8_t)FLAG_USER_DATA_STRING : (uint8_t)0 } + m_Flags{ 0 } { + if(userDataString) + m_Flags |= (uint8_t)FLAG_USER_DATA_STRING; + if(mappingAllowed) + m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; + #if VMA_STATS_STRING_ENABLED m_BufferImageUsage = 0; #endif @@ -11772,10 +11890,10 @@ VmaAllocation_T::VmaAllocation_T(bool userDataString) VmaAllocation_T::~VmaAllocation_T() { - VMA_ASSERT((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) == 0 && "Allocation was not unmapped before destruction."); + VMA_ASSERT(m_MapCount == 0 && "Allocation was not unmapped before destruction."); // Check if owned string was freed. - VMA_ASSERT(m_pUserData == VMA_NULL); + VMA_ASSERT((IsUserDataString() && m_pUserData == VMA_NULL) || !IsUserDataString()); } void VmaAllocation_T::InitBlockAllocation( @@ -11793,7 +11911,11 @@ void VmaAllocation_T::InitBlockAllocation( m_Alignment = alignment; m_Size = size; m_MemoryTypeIndex = memoryTypeIndex; - m_MapCount = mapped ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; + if(mapped) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } m_SuballocationType = (uint8_t)suballocationType; m_BlockAllocation.m_Block = block; m_BlockAllocation.m_AllocHandle = allocHandle; @@ -11814,7 +11936,11 @@ void VmaAllocation_T::InitDedicatedAllocation( m_Size = size; m_MemoryTypeIndex = memoryTypeIndex; m_SuballocationType = (uint8_t)suballocationType; - m_MapCount = (pMappedData != VMA_NULL) ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; + if(pMappedData != VMA_NULL) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } m_DedicatedAllocation.m_hParentPool = hParentPool; m_DedicatedAllocation.m_hMemory = hMemory; m_DedicatedAllocation.m_pMappedData = pMappedData; @@ -11841,32 +11967,19 @@ void VmaAllocation_T::SetUserData(VmaAllocator hAllocator, void* pUserData) } } -void VmaAllocation_T::ChangeBlockAllocation( - VmaAllocator hAllocator, - VmaDeviceMemoryBlock* block, - VmaAllocHandle allocHandle) +void VmaAllocation_T::SwapBlockAllocation(VmaAllocation allocation) { - VMA_ASSERT(block != VMA_NULL); + VMA_ASSERT(allocation != VMA_NULL); VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK); - // Move mapping reference counter from old block to new block. - if (block != m_BlockAllocation.m_Block) - { - uint32_t mapRefCount = m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP; - if (IsPersistentMap()) - ++mapRefCount; - m_BlockAllocation.m_Block->Unmap(hAllocator, mapRefCount); - block->Map(hAllocator, mapRefCount, VMA_NULL); - } + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation); + VMA_SWAP(m_BlockAllocation, allocation->m_BlockAllocation); + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this); - m_BlockAllocation.m_Block = block; - m_BlockAllocation.m_AllocHandle = allocHandle; -} - -void VmaAllocation_T::ChangeAllocHandle(VmaAllocHandle newAllocHandle) -{ - VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); - m_BlockAllocation.m_AllocHandle = newAllocHandle; +#if VMA_STATS_STRING_ENABLED + VMA_SWAP(m_BufferImageUsage, allocation->m_BufferImageUsage); +#endif } VmaAllocHandle VmaAllocation_T::GetAllocHandle() const @@ -11930,7 +12043,7 @@ void* VmaAllocation_T::GetMappedData() const switch (m_Type) { case ALLOCATION_TYPE_BLOCK: - if (m_MapCount != 0) + if (m_MapCount != 0 || IsPersistentMap()) { void* pBlockData = m_BlockAllocation.m_Block->GetMappedData(); VMA_ASSERT(pBlockData != VMA_NULL); @@ -11942,7 +12055,7 @@ void* VmaAllocation_T::GetMappedData() const } break; case ALLOCATION_TYPE_DEDICATED: - VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0)); + VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0 || IsPersistentMap())); return m_DedicatedAllocation.m_pMappedData; default: VMA_ASSERT(0); @@ -11950,24 +12063,12 @@ void* VmaAllocation_T::GetMappedData() const } } -void VmaAllocation_T::DedicatedAllocCalcStatsInfo(VmaStatInfo& outInfo) -{ - VMA_ASSERT(m_Type == ALLOCATION_TYPE_DEDICATED); - outInfo.blockCount = 1; - outInfo.allocationCount = 1; - outInfo.unusedRangeCount = 0; - outInfo.usedBytes = m_Size; - outInfo.unusedBytes = 0; - outInfo.allocationSizeMin = outInfo.allocationSizeMax = m_Size; - outInfo.unusedRangeSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMax = 0; -} - void VmaAllocation_T::BlockAllocMap() { VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); - if ((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) < 0x7F) + if (m_MapCount < 0xFF) { ++m_MapCount; } @@ -11981,7 +12082,7 @@ void VmaAllocation_T::BlockAllocUnmap() { VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); - if ((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) != 0) + if (m_MapCount > 0) { --m_MapCount; } @@ -11994,10 +12095,11 @@ void VmaAllocation_T::BlockAllocUnmap() VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData) { VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); - if (m_MapCount != 0) + if (m_MapCount != 0 || IsPersistentMap()) { - if ((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) < 0x7F) + if (m_MapCount < 0xFF) { VMA_ASSERT(m_DedicatedAllocation.m_pMappedData != VMA_NULL); *ppData = m_DedicatedAllocation.m_pMappedData; @@ -12032,10 +12134,10 @@ void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) { VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); - if ((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) != 0) + if (m_MapCount > 0) { --m_MapCount; - if (m_MapCount == 0) + if (m_MapCount == 0 && !IsPersistentMap()) { m_DedicatedAllocation.m_pMappedData = VMA_NULL; (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( @@ -12121,7 +12223,6 @@ VmaBlockVector::VmaBlockVector( m_Priority(priority), m_MinAllocationAlignment(minAllocationAlignment), m_pMemoryAllocateNext(pMemoryAllocateNext), - m_HasEmptyBlock(false), m_Blocks(VmaStlAllocator<VmaDeviceMemoryBlock*>(hAllocator->GetAllocationCallbacks())), m_NextBlockId(0) {} @@ -12147,19 +12248,31 @@ VkResult VmaBlockVector::CreateMinBlocks() return VK_SUCCESS; } -void VmaBlockVector::AddPoolStats(VmaPoolStats* pStats) +void VmaBlockVector::AddStatistics(VmaStatistics& inoutStats) { VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); const size_t blockCount = m_Blocks.size(); - pStats->blockCount += blockCount; + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + { + const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VMA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddStatistics(inoutStats); + } +} + +void VmaBlockVector::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + const size_t blockCount = m_Blocks.size(); for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) { const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; VMA_ASSERT(pBlock); VMA_HEAVY_ASSERT(pBlock->Validate()); - pBlock->m_pMetadata->AddPoolStats(*pStats); + pBlock->m_pMetadata->AddDetailedStatistics(inoutStats); } } @@ -12217,14 +12330,8 @@ VkResult VmaBlockVector::Allocate( if (res != VK_SUCCESS) { // Free all already created allocations. - const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); while (allocIndex--) - { - VmaAllocation_T* const alloc = pAllocations[allocIndex]; - const VkDeviceSize allocSize = alloc->GetSize(); - Free(alloc); - m_hAllocator->m_Budget.RemoveAllocation(heapIndex, allocSize); - } + Free(pAllocations[allocIndex]); memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); } @@ -12239,8 +12346,6 @@ VkResult VmaBlockVector::AllocatePage( VmaAllocation* pAllocation) { const bool isUpperAddress = (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; - const bool mapped = (createInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; - const bool isUserDataString = (createInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; VkDeviceSize freeMemory; { @@ -12280,17 +12385,11 @@ VkResult VmaBlockVector::AllocatePage( VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back(); VMA_ASSERT(pCurrBlock); VkResult res = AllocateFromBlock( - pCurrBlock, - size, - alignment, - createInfo.flags, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { VMA_DEBUG_LOG(" Returned from last block #%u", pCurrBlock->GetId()); + IncrementallySortBlocks(); return VK_SUCCESS; } } @@ -12299,24 +12398,55 @@ VkResult VmaBlockVector::AllocatePage( { if (strategy != VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) // MIN_MEMORY or default { - // Forward order in m_Blocks - prefer blocks with smallest amount of free space. - for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + const bool isHostVisible = + (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; + if(isHostVisible) { - VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pCurrBlock); - VkResult res = AllocateFromBlock( - pCurrBlock, - size, - alignment, - createInfo.flags, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); - if (res == VK_SUCCESS) + const bool isMappingAllowed = (createInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + /* + For non-mappable allocations, check blocks that are not mapped first. + For mappable allocations, check blocks that are already mapped first. + This way, having many blocks, we will separate mappable and non-mappable allocations, + hopefully limiting the number of blocks that are mapped, which will help tools like RenderDoc. + */ + for(size_t mappingI = 0; mappingI < 2; ++mappingI) { - VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); - return VK_SUCCESS; + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + const bool isBlockMapped = pCurrBlock->GetMappedData() != VMA_NULL; + if((mappingI == 0) == (isMappingAllowed == isBlockMapped)) + { + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + } + else + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } } } } @@ -12327,18 +12457,11 @@ VkResult VmaBlockVector::AllocatePage( { VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; VMA_ASSERT(pCurrBlock); - VkResult res = AllocateFromBlock( - pCurrBlock, - size, - alignment, - createInfo.flags, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); + VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + IncrementallySortBlocks(); return VK_SUCCESS; } } @@ -12401,17 +12524,11 @@ VkResult VmaBlockVector::AllocatePage( VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); res = AllocateFromBlock( - pBlock, - size, - alignment, - createInfo.flags, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); + pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { VMA_DEBUG_LOG(" Created new block #%u Size=%llu", pBlock->GetId(), newBlockSize); + IncrementallySortBlocks(); return VK_SUCCESS; } else @@ -12426,7 +12543,8 @@ VkResult VmaBlockVector::AllocatePage( } void VmaBlockVector::Free( - const VmaAllocation hAllocation) + const VmaAllocation hAllocation, + bool incrementalSort) { VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; @@ -12455,7 +12573,9 @@ void VmaBlockVector::Free( pBlock->Unmap(m_hAllocator, 1); } + const bool hadEmptyBlockBeforeFree = HasEmptyBlock(); pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle()); + pBlock->PostFree(m_hAllocator); VMA_HEAVY_ASSERT(pBlock->Validate()); VMA_DEBUG_LOG(" Freed from MemoryTypeIndex=%u", m_MemoryTypeIndex); @@ -12464,17 +12584,17 @@ void VmaBlockVector::Free( // pBlock became empty after this deallocation. if (pBlock->m_pMetadata->IsEmpty()) { - // Already has empty block. We don't want to have two, so delete this one. - if ((m_HasEmptyBlock || budgetExceeded) && canDeleteBlock) + // Already had empty block. We don't want to have two, so delete this one. + if ((hadEmptyBlockBeforeFree || budgetExceeded) && canDeleteBlock) { pBlockToDelete = pBlock; Remove(pBlock); } - // else: We now have an empty block - leave it. + // else: We now have one empty block - leave it. A hysteresis to avoid allocating whole block back and forth. } // pBlock didn't become empty, but we have another empty block - find and free that one. // (This is optional, heuristics.) - else if (m_HasEmptyBlock && canDeleteBlock) + else if (hadEmptyBlockBeforeFree && canDeleteBlock) { VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back(); if (pLastBlock->m_pMetadata->IsEmpty()) @@ -12484,8 +12604,8 @@ void VmaBlockVector::Free( } } - UpdateHasEmptyBlock(); - IncrementallySortBlocks(); + if (incrementalSort) + IncrementallySortBlocks(); } // Destruction of a free block. Deferred until this point, outside of mutex @@ -12496,6 +12616,9 @@ void VmaBlockVector::Free( pBlockToDelete->Destroy(m_hAllocator); vma_delete(m_hAllocator, pBlockToDelete); } + + m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); + m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); } VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const @@ -12541,6 +12664,15 @@ void VmaBlockVector::IncrementallySortBlocks() } } +void VmaBlockVector::SortByFreeSize() +{ + VMA_SORT(m_Blocks.begin(), m_Blocks.end(), + [](auto* b1, auto* b2) + { + return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize(); + }); +} + VkResult VmaBlockVector::AllocateFromBlock( VmaDeviceMemoryBlock* pBlock, VkDeviceSize size, @@ -12552,8 +12684,6 @@ VkResult VmaBlockVector::AllocateFromBlock( VmaAllocation* pAllocation) { const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; - const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; - const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; VmaAllocationRequest currRequest = {}; if (pBlock->m_pMetadata->CreateAllocationRequest( @@ -12564,42 +12694,59 @@ VkResult VmaBlockVector::AllocateFromBlock( strategy, &currRequest)) { - // Allocate from pCurrBlock. - if (mapped) - { - VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); - if (res != VK_SUCCESS) - { - return res; - } - } + return CommitAllocationRequest(currRequest, pBlock, alignment, allocFlags, pUserData, suballocType, pAllocation); + } + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} - *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isUserDataString); - pBlock->m_pMetadata->Alloc(currRequest, suballocType, *pAllocation); - UpdateHasEmptyBlock(); - (*pAllocation)->InitBlockAllocation( - pBlock, - currRequest.allocHandle, - alignment, - currRequest.size, // Not size, as actual allocation size may be larger than requested! - m_MemoryTypeIndex, - suballocType, - mapped); - VMA_HEAVY_ASSERT(pBlock->Validate()); - (*pAllocation)->SetUserData(m_hAllocator, pUserData); - m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), currRequest.size); - if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) - { - m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); - } - if (IsCorruptionDetectionEnabled()) +VkResult VmaBlockVector::CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation) +{ + const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; + const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; + const bool isMappingAllowed = (allocFlags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + + pBlock->PostAlloc(); + // Allocate from pCurrBlock. + if (mapped) + { + VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); + if (res != VK_SUCCESS) { - VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), currRequest.size); - VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); + return res; } - return VK_SUCCESS; } - return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isUserDataString, isMappingAllowed); + pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation); + (*pAllocation)->InitBlockAllocation( + pBlock, + allocRequest.allocHandle, + alignment, + allocRequest.size, // Not size, as actual allocation size may be larger than requested! + m_MemoryTypeIndex, + suballocType, + mapped); + VMA_HEAVY_ASSERT(pBlock->Validate()); + (*pAllocation)->SetUserData(m_hAllocator, pUserData); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size); + if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); + } + if (IsCorruptionDetectionEnabled()) + { + VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), allocRequest.size); + VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); + } + return VK_SUCCESS; } VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex) @@ -12668,241 +12815,17 @@ VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIn return VK_SUCCESS; } -void VmaBlockVector::ApplyDefragmentationMovesCpu( - VmaBlockVectorDefragmentationContext* pDefragCtx, - const VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>>& moves) +bool VmaBlockVector::HasEmptyBlock() { - const size_t blockCount = m_Blocks.size(); - const bool isNonCoherent = m_hAllocator->IsMemoryTypeNonCoherent(m_MemoryTypeIndex); - - enum BLOCK_FLAG - { - BLOCK_FLAG_USED = 0x00000001, - BLOCK_FLAG_MAPPED_FOR_DEFRAGMENTATION = 0x00000002, - }; - - struct BlockInfo - { - uint32_t flags; - void* pMappedData; - }; - VmaVector< BlockInfo, VmaStlAllocator<BlockInfo> > - blockInfo(blockCount, BlockInfo(), VmaStlAllocator<BlockInfo>(m_hAllocator->GetAllocationCallbacks())); - memset(blockInfo.data(), 0, blockCount * sizeof(BlockInfo)); - - // Go over all moves. Mark blocks that are used with BLOCK_FLAG_USED. - const size_t moveCount = moves.size(); - for (size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - blockInfo[move.srcBlockIndex].flags |= BLOCK_FLAG_USED; - blockInfo[move.dstBlockIndex].flags |= BLOCK_FLAG_USED; - } - - VMA_ASSERT(pDefragCtx->res == VK_SUCCESS); - - // Go over all blocks. Get mapped pointer or map if necessary. - for (size_t blockIndex = 0; pDefragCtx->res == VK_SUCCESS && blockIndex < blockCount; ++blockIndex) - { - BlockInfo& currBlockInfo = blockInfo[blockIndex]; - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if ((currBlockInfo.flags & BLOCK_FLAG_USED) != 0) - { - currBlockInfo.pMappedData = pBlock->GetMappedData(); - // It is not originally mapped - map it. - if (currBlockInfo.pMappedData == VMA_NULL) - { - pDefragCtx->res = pBlock->Map(m_hAllocator, 1, &currBlockInfo.pMappedData); - if (pDefragCtx->res == VK_SUCCESS) - { - currBlockInfo.flags |= BLOCK_FLAG_MAPPED_FOR_DEFRAGMENTATION; - } - } - } - } - - // Go over all moves. Do actual data transfer. - if (pDefragCtx->res == VK_SUCCESS) - { - const VkDeviceSize nonCoherentAtomSize = m_hAllocator->m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; - VkMappedMemoryRange memRange = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE }; - - for (size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - - const BlockInfo& srcBlockInfo = blockInfo[move.srcBlockIndex]; - const BlockInfo& dstBlockInfo = blockInfo[move.dstBlockIndex]; - - VMA_ASSERT(srcBlockInfo.pMappedData && dstBlockInfo.pMappedData); - - // Invalidate source. - if (isNonCoherent) - { - VmaDeviceMemoryBlock* const pSrcBlock = m_Blocks[move.srcBlockIndex]; - memRange.memory = pSrcBlock->GetDeviceMemory(); - memRange.offset = VmaAlignDown(move.srcOffset, nonCoherentAtomSize); - memRange.size = VMA_MIN( - VmaAlignUp(move.size + (move.srcOffset - memRange.offset), nonCoherentAtomSize), - pSrcBlock->m_pMetadata->GetSize() - memRange.offset); - (*m_hAllocator->GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hAllocator->m_hDevice, 1, &memRange); - } - - // THE PLACE WHERE ACTUAL DATA COPY HAPPENS. - memmove( - reinterpret_cast<char*>(dstBlockInfo.pMappedData) + move.dstOffset, - reinterpret_cast<char*>(srcBlockInfo.pMappedData) + move.srcOffset, - static_cast<size_t>(move.size)); - - if (IsCorruptionDetectionEnabled()) - { - VmaWriteMagicValue(dstBlockInfo.pMappedData, move.dstOffset + move.size); - } - - // Flush destination. - if (isNonCoherent) - { - VmaDeviceMemoryBlock* const pDstBlock = m_Blocks[move.dstBlockIndex]; - memRange.memory = pDstBlock->GetDeviceMemory(); - memRange.offset = VmaAlignDown(move.dstOffset, nonCoherentAtomSize); - memRange.size = VMA_MIN( - VmaAlignUp(move.size + (move.dstOffset - memRange.offset), nonCoherentAtomSize), - pDstBlock->m_pMetadata->GetSize() - memRange.offset); - (*m_hAllocator->GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hAllocator->m_hDevice, 1, &memRange); - } - } - } - - // Go over all blocks in reverse order. Unmap those that were mapped just for defragmentation. - // Regardless of pCtx->res == VK_SUCCESS. - for (size_t blockIndex = blockCount; blockIndex--; ) - { - const BlockInfo& currBlockInfo = blockInfo[blockIndex]; - if ((currBlockInfo.flags & BLOCK_FLAG_MAPPED_FOR_DEFRAGMENTATION) != 0) - { - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - pBlock->Unmap(m_hAllocator, 1); - } - } -} - -void VmaBlockVector::ApplyDefragmentationMovesGpu( - VmaBlockVectorDefragmentationContext* pDefragCtx, - VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>>& moves, - VkCommandBuffer commandBuffer) -{ - const size_t blockCount = m_Blocks.size(); - - pDefragCtx->blockContexts.resize(blockCount); - memset(pDefragCtx->blockContexts.data(), 0, blockCount * sizeof(VmaBlockDefragmentationContext)); - - // Go over all moves. Mark blocks that are used with BLOCK_FLAG_USED. - const size_t moveCount = moves.size(); - for (size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - - //if(move.type == VMA_ALLOCATION_TYPE_UNKNOWN) - { - // Old school move still require us to map the whole block - pDefragCtx->blockContexts[move.srcBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; - pDefragCtx->blockContexts[move.dstBlockIndex].flags |= VmaBlockDefragmentationContext::BLOCK_FLAG_USED; - } - } - - VMA_ASSERT(pDefragCtx->res == VK_SUCCESS); - - // Go over all blocks. Create and bind buffer for whole block if necessary. - { - VkBufferCreateInfo bufCreateInfo; - VmaFillGpuDefragmentationBufferCreateInfo(bufCreateInfo); - - for (size_t blockIndex = 0; pDefragCtx->res == VK_SUCCESS && blockIndex < blockCount; ++blockIndex) - { - VmaBlockDefragmentationContext& currBlockCtx = pDefragCtx->blockContexts[blockIndex]; - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if ((currBlockCtx.flags & VmaBlockDefragmentationContext::BLOCK_FLAG_USED) != 0) - { - bufCreateInfo.size = pBlock->m_pMetadata->GetSize(); - pDefragCtx->res = (*m_hAllocator->GetVulkanFunctions().vkCreateBuffer)( - m_hAllocator->m_hDevice, &bufCreateInfo, m_hAllocator->GetAllocationCallbacks(), &currBlockCtx.hBuffer); - if (pDefragCtx->res == VK_SUCCESS) - { - pDefragCtx->res = (*m_hAllocator->GetVulkanFunctions().vkBindBufferMemory)( - m_hAllocator->m_hDevice, currBlockCtx.hBuffer, pBlock->GetDeviceMemory(), 0); - } - } - } - } - - // Go over all moves. Post data transfer commands to command buffer. - if (pDefragCtx->res == VK_SUCCESS) - { - for (size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex) - { - const VmaDefragmentationMove& move = moves[moveIndex]; - - const VmaBlockDefragmentationContext& srcBlockCtx = pDefragCtx->blockContexts[move.srcBlockIndex]; - const VmaBlockDefragmentationContext& dstBlockCtx = pDefragCtx->blockContexts[move.dstBlockIndex]; - - VMA_ASSERT(srcBlockCtx.hBuffer && dstBlockCtx.hBuffer); - - VkBufferCopy region = { - move.srcOffset, - move.dstOffset, - move.size }; - (*m_hAllocator->GetVulkanFunctions().vkCmdCopyBuffer)( - commandBuffer, srcBlockCtx.hBuffer, dstBlockCtx.hBuffer, 1, ®ion); - } - } - - // Save buffers to defrag context for later destruction. - if (pDefragCtx->res == VK_SUCCESS && moveCount > 0) - { - pDefragCtx->res = VK_NOT_READY; - } -} - -void VmaBlockVector::FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationStats) -{ - for (size_t blockIndex = m_Blocks.size(); blockIndex--; ) - { - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if (pBlock->m_pMetadata->IsEmpty()) - { - if (m_Blocks.size() > m_MinBlockCount) - { - if (pDefragmentationStats != VMA_NULL) - { - ++pDefragmentationStats->deviceMemoryBlocksFreed; - pDefragmentationStats->bytesFreed += pBlock->m_pMetadata->GetSize(); - } - - VmaVectorRemove(m_Blocks, blockIndex); - pBlock->Destroy(m_hAllocator); - vma_delete(m_hAllocator, pBlock); - } - else - { - break; - } - } - } - UpdateHasEmptyBlock(); -} - -void VmaBlockVector::UpdateHasEmptyBlock() -{ - m_HasEmptyBlock = false; for (size_t index = 0, count = m_Blocks.size(); index < count; ++index) { VmaDeviceMemoryBlock* const pBlock = m_Blocks[index]; if (pBlock->m_pMetadata->IsEmpty()) { - m_HasEmptyBlock = true; - break; + return true; } } + return false; } #if VMA_STATS_STRING_ENABLED @@ -12961,234 +12884,12 @@ void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) json.ContinueString(m_Blocks[i]->GetId()); json.EndString(); - m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); + m_Blocks[i]->m_pMetadata->PrintDetailedMap(json, m_Blocks[i]->GetMapRefCount()); } json.EndObject(); } #endif // VMA_STATS_STRING_ENABLED -void VmaBlockVector::Defragment( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags, - VkDeviceSize& maxCpuBytesToMove, uint32_t& maxCpuAllocationsToMove, - VkDeviceSize& maxGpuBytesToMove, uint32_t& maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer) -{ - pCtx->res = VK_SUCCESS; - - const VkMemoryPropertyFlags memPropFlags = - m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags; - const bool isHostVisible = (memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; - - const bool canDefragmentOnCpu = maxCpuBytesToMove > 0 && maxCpuAllocationsToMove > 0 && - isHostVisible; - const bool canDefragmentOnGpu = maxGpuBytesToMove > 0 && maxGpuAllocationsToMove > 0 && - !IsCorruptionDetectionEnabled() && - ((1u << m_MemoryTypeIndex) & m_hAllocator->GetGpuDefragmentationMemoryTypeBits()) != 0; - - // There are options to defragment this memory type. - if (canDefragmentOnCpu || canDefragmentOnGpu) - { - bool defragmentOnGpu; - // There is only one option to defragment this memory type. - if (canDefragmentOnGpu != canDefragmentOnCpu) - { - defragmentOnGpu = canDefragmentOnGpu; - } - // Both options are available: Heuristics to choose the best one. - else - { - defragmentOnGpu = (memPropFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0 || - m_hAllocator->IsIntegratedGpu(); - } - - bool overlappingMoveSupported = !defragmentOnGpu; - - if (m_hAllocator->m_UseMutex) - { - if (flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) - { - if (!m_Mutex.TryLockWrite()) - { - pCtx->res = VK_ERROR_INITIALIZATION_FAILED; - return; - } - } - else - { - m_Mutex.LockWrite(); - pCtx->mutexLocked = true; - } - } - - pCtx->Begin(overlappingMoveSupported, flags); - - // Defragment. - - const VkDeviceSize maxBytesToMove = defragmentOnGpu ? maxGpuBytesToMove : maxCpuBytesToMove; - const uint32_t maxAllocationsToMove = defragmentOnGpu ? maxGpuAllocationsToMove : maxCpuAllocationsToMove; - VmaDefragmentationAlgorithm* algo = pCtx->GetAlgorithm(); - pCtx->res = algo->Defragment(pCtx->defragmentationMoves, maxBytesToMove, maxAllocationsToMove, flags); - - // Accumulate statistics. - if (pStats != VMA_NULL) - { - const VkDeviceSize bytesMoved = algo->GetBytesMoved(); - const uint32_t allocationsMoved = algo->GetAllocationsMoved(); - pStats->bytesMoved += bytesMoved; - pStats->allocationsMoved += allocationsMoved; - VMA_ASSERT(bytesMoved <= maxBytesToMove); - VMA_ASSERT(allocationsMoved <= maxAllocationsToMove); - if (defragmentOnGpu) - { - maxGpuBytesToMove -= bytesMoved; - maxGpuAllocationsToMove -= allocationsMoved; - } - else - { - maxCpuBytesToMove -= bytesMoved; - maxCpuAllocationsToMove -= allocationsMoved; - } - } - - if (flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) - { - if (m_hAllocator->m_UseMutex) - m_Mutex.UnlockWrite(); - - if (pCtx->res >= VK_SUCCESS && !pCtx->defragmentationMoves.empty()) - pCtx->res = VK_NOT_READY; - - return; - } - - if (pCtx->res >= VK_SUCCESS) - { - if (defragmentOnGpu) - { - ApplyDefragmentationMovesGpu(pCtx, pCtx->defragmentationMoves, commandBuffer); - } - else - { - ApplyDefragmentationMovesCpu(pCtx, pCtx->defragmentationMoves); - } - } - } -} - -void VmaBlockVector::DefragmentationEnd( - class VmaBlockVectorDefragmentationContext* pCtx, - uint32_t flags, - VmaDefragmentationStats* pStats) -{ - if (flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL && m_hAllocator->m_UseMutex) - { - VMA_ASSERT(pCtx->mutexLocked == false); - - // Incremental defragmentation doesn't hold the lock, so when we enter here we don't actually have any - // lock protecting us. Since we mutate state here, we have to take the lock out now - m_Mutex.LockWrite(); - pCtx->mutexLocked = true; - } - - // If the mutex isn't locked we didn't do any work and there is nothing to delete. - if (pCtx->mutexLocked || !m_hAllocator->m_UseMutex) - { - // Destroy buffers. - for (size_t blockIndex = pCtx->blockContexts.size(); blockIndex--;) - { - VmaBlockDefragmentationContext& blockCtx = pCtx->blockContexts[blockIndex]; - if (blockCtx.hBuffer) - { - (*m_hAllocator->GetVulkanFunctions().vkDestroyBuffer)(m_hAllocator->m_hDevice, blockCtx.hBuffer, m_hAllocator->GetAllocationCallbacks()); - } - } - - if (pCtx->res >= VK_SUCCESS) - { - FreeEmptyBlocks(pStats); - } - } - - if (pCtx->mutexLocked) - { - VMA_ASSERT(m_hAllocator->m_UseMutex); - m_Mutex.UnlockWrite(); - } -} - -uint32_t VmaBlockVector::ProcessDefragmentations( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationPassMoveInfo* pMove, uint32_t maxMoves) -{ - VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); - - const uint32_t moveCount = VMA_MIN(uint32_t(pCtx->defragmentationMoves.size()) - pCtx->defragmentationMovesProcessed, maxMoves); - - for (uint32_t i = 0; i < moveCount; ++i) - { - VmaDefragmentationMove& move = pCtx->defragmentationMoves[pCtx->defragmentationMovesProcessed + i]; - - pMove->allocation = move.hAllocation; - pMove->memory = move.pDstBlock->GetDeviceMemory(); - pMove->offset = move.dstOffset; - - ++pMove; - } - - pCtx->defragmentationMovesProcessed += moveCount; - - return moveCount; -} - -void VmaBlockVector::CommitDefragmentations( - class VmaBlockVectorDefragmentationContext* pCtx, - VmaDefragmentationStats* pStats) -{ - VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); - - for (uint32_t i = pCtx->defragmentationMovesCommitted; i < pCtx->defragmentationMovesProcessed; ++i) - { - const VmaDefragmentationMove& move = pCtx->defragmentationMoves[i]; - - move.pSrcBlock->m_pMetadata->Free(move.hAllocation->GetAllocHandle()); - move.hAllocation->ChangeBlockAllocation(m_hAllocator, move.pDstBlock, move.dstHandle); - } - - pCtx->defragmentationMovesCommitted = pCtx->defragmentationMovesProcessed; - FreeEmptyBlocks(pStats); -} - -size_t VmaBlockVector::CalcAllocationCount() const -{ - size_t result = 0; - for (size_t i = 0; i < m_Blocks.size(); ++i) - { - result += m_Blocks[i]->m_pMetadata->GetAllocationCount(); - } - return result; -} - -bool VmaBlockVector::IsBufferImageGranularityConflictPossible() const -{ - if (m_BufferImageGranularity == 1) - { - return false; - } - VmaSuballocationType lastSuballocType = VMA_SUBALLOCATION_TYPE_FREE; - for (size_t i = 0, count = m_Blocks.size(); i < count; ++i) - { - VmaDeviceMemoryBlock* const pBlock = m_Blocks[i]; - VMA_ASSERT(m_Algorithm == 0); - VmaBlockMetadata_Generic* const pMetadata = (VmaBlockMetadata_Generic*)pBlock->m_pMetadata; - if (pMetadata->IsBufferImageGranularityConflictPossible(m_BufferImageGranularity, lastSuballocType)) - { - return true; - } - } - return false; -} - VkResult VmaBlockVector::CheckCorruption() { if (!IsCorruptionDetectionEnabled()) @@ -13210,1332 +12911,827 @@ VkResult VmaBlockVector::CheckCorruption() return VK_SUCCESS; } -void VmaBlockVector::AddStats(VmaStats* pStats) -{ - const uint32_t memTypeIndex = m_MemoryTypeIndex; - const uint32_t memHeapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(memTypeIndex); - - VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); - - for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) - { - const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pBlock); - VMA_HEAVY_ASSERT(pBlock->Validate()); - VmaStatInfo allocationStatInfo; - pBlock->m_pMetadata->CalcAllocationStatInfo(allocationStatInfo); - VmaAddStatInfo(pStats->total, allocationStatInfo); - VmaAddStatInfo(pStats->memoryType[memTypeIndex], allocationStatInfo); - VmaAddStatInfo(pStats->memoryHeap[memHeapIndex], allocationStatInfo); - } -} #endif // _VMA_BLOCK_VECTOR_FUNCTIONS -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM_GENERIC_FUNCTIONS -VmaDefragmentationAlgorithm_Generic::VmaDefragmentationAlgorithm_Generic( +#ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS +VmaDefragmentationContext_T::VmaDefragmentationContext_T( VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - bool overlappingMoveSupported) - : VmaDefragmentationAlgorithm(hAllocator, pBlockVector), - m_AllocationCount(0), - m_AllAllocations(false), - m_BytesMoved(0), - m_AllocationsMoved(0), - m_Blocks(VmaStlAllocator<BlockInfo*>(hAllocator->GetAllocationCallbacks())) -{ - // Create block info for each block. - const size_t blockCount = m_pBlockVector->m_Blocks.size(); - for (size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - BlockInfo* pBlockInfo = vma_new(m_hAllocator, BlockInfo)(m_hAllocator->GetAllocationCallbacks()); - pBlockInfo->m_OriginalBlockIndex = blockIndex; - pBlockInfo->m_pBlock = m_pBlockVector->m_Blocks[blockIndex]; - m_Blocks.push_back(pBlockInfo); - } - - // Sort them by m_pBlock pointer value. - VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockPointerLess()); -} - -VmaDefragmentationAlgorithm_Generic::~VmaDefragmentationAlgorithm_Generic() + const VmaDefragmentationInfo& info) + : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass), + m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass), + m_MoveAllocator(hAllocator->GetAllocationCallbacks()), + m_Moves(m_MoveAllocator) { - for (size_t i = m_Blocks.size(); i--; ) - { - vma_delete(m_hAllocator, m_Blocks[i]); - } -} + m_Algorithm = info.flags & VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK; -void VmaDefragmentationAlgorithm_Generic::AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) -{ - VmaDeviceMemoryBlock* pBlock = hAlloc->GetBlock(); - BlockInfoVector::iterator it = VmaBinaryFindFirstNotLess(m_Blocks.begin(), m_Blocks.end(), pBlock, BlockPointerLess()); - if (it != m_Blocks.end() && (*it)->m_pBlock == pBlock) + if (info.pool != VMA_NULL) { - AllocationInfo allocInfo = AllocationInfo(hAlloc, pChanged); - (*it)->m_Allocations.push_back(allocInfo); + m_BlockVectorCount = 1; + m_PoolBlockVector = &info.pool->m_BlockVector; + m_pBlockVectors = &m_PoolBlockVector; + m_PoolBlockVector->SortByFreeSize(); } else { - VMA_ASSERT(0); - } - - ++m_AllocationCount; -} - -VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound( - VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - bool freeOldAllocations) -{ - if (m_Blocks.empty()) - { - return VK_SUCCESS; - } - - // This is a choice based on research. - // Option 1: - uint32_t strategy = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT; - // Option 2: - //uint32_t strategy = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT; - - size_t srcBlockMinIndex = 0; - // When FAST_ALGORITHM, move allocations from only last out of blocks that contain non-movable allocations. - /* - if(m_AlgorithmFlags & VMA_DEFRAGMENTATION_FAST_ALGORITHM_BIT) - { - const size_t blocksWithNonMovableCount = CalcBlocksWithNonMovableCount(); - if(blocksWithNonMovableCount > 0) + m_BlockVectorCount = hAllocator->GetMemoryTypeCount(); + m_PoolBlockVector = VMA_NULL; + m_pBlockVectors = hAllocator->m_pBlockVectors; + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) { - srcBlockMinIndex = blocksWithNonMovableCount - 1; + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + vector->SortByFreeSize(); } } - */ - - size_t srcBlockIndex = m_Blocks.size() - 1; - size_t srcAllocIndex = SIZE_MAX; - for (;;) + + switch (m_Algorithm) { - // 1. Find next allocation to move. - // 1.1. Start from last to first m_Blocks - they are sorted from most "destination" to most "source". - // 1.2. Then start from last to first m_Allocations. - while (srcAllocIndex >= m_Blocks[srcBlockIndex]->m_Allocations.size()) - { - if (m_Blocks[srcBlockIndex]->m_Allocations.empty()) - { - // Finished: no more allocations to process. - if (srcBlockIndex == srcBlockMinIndex) - { - return VK_SUCCESS; - } - else - { - --srcBlockIndex; - srcAllocIndex = SIZE_MAX; - } - } - else - { - srcAllocIndex = m_Blocks[srcBlockIndex]->m_Allocations.size() - 1; - } - } - - BlockInfo* pSrcBlockInfo = m_Blocks[srcBlockIndex]; - AllocationInfo& allocInfo = pSrcBlockInfo->m_Allocations[srcAllocIndex]; - - const VkDeviceSize size = allocInfo.m_hAllocation->GetSize(); - const VkDeviceSize srcOffset = allocInfo.m_hAllocation->GetOffset(); - const VkDeviceSize alignment = allocInfo.m_hAllocation->GetAlignment(); - const VmaSuballocationType suballocType = allocInfo.m_hAllocation->GetSuballocationType(); - - // 2. Try to find new place for this allocation in preceding or current block. - for (size_t dstBlockIndex = 0; dstBlockIndex <= srcBlockIndex; ++dstBlockIndex) - { - BlockInfo* pDstBlockInfo = m_Blocks[dstBlockIndex]; - VmaBlockMetadata* pMetadata = pDstBlockInfo->m_pBlock->m_pMetadata; - VmaAllocationRequest dstAllocRequest; - if (pMetadata->CreateAllocationRequest( - size, - alignment, - false, // upperAddress - suballocType, - strategy, - &dstAllocRequest) && - MoveMakesSense( - dstBlockIndex, pMetadata->GetAllocationOffset(dstAllocRequest.allocHandle), srcBlockIndex, srcOffset)) - { - // Reached limit on number of allocations or bytes to move. - if ((m_AllocationsMoved + 1 > maxAllocationsToMove) || - (m_BytesMoved + size > maxBytesToMove)) - { - return VK_SUCCESS; - } - - VmaDefragmentationMove move = {}; - move.srcBlockIndex = pSrcBlockInfo->m_OriginalBlockIndex; - move.dstBlockIndex = pDstBlockInfo->m_OriginalBlockIndex; - move.srcOffset = srcOffset; - move.dstOffset = pMetadata->GetAllocationOffset(dstAllocRequest.allocHandle); - move.size = size; - move.hAllocation = allocInfo.m_hAllocation; - move.pSrcBlock = pSrcBlockInfo->m_pBlock; - move.pDstBlock = pDstBlockInfo->m_pBlock; - move.dstHandle = dstAllocRequest.allocHandle; - - moves.push_back(move); - - pDstBlockInfo->m_pBlock->m_pMetadata->Alloc(dstAllocRequest, suballocType, allocInfo.m_hAllocation); - - if (freeOldAllocations) - { - pSrcBlockInfo->m_pBlock->m_pMetadata->Free(allocInfo.m_hAllocation->GetAllocHandle()); - allocInfo.m_hAllocation->ChangeBlockAllocation(m_hAllocator, pDstBlockInfo->m_pBlock, dstAllocRequest.allocHandle); - } - - if (allocInfo.m_pChanged != VMA_NULL) - { - *allocInfo.m_pChanged = VK_TRUE; - } - - ++m_AllocationsMoved; - m_BytesMoved += size; - - VmaVectorRemove(pSrcBlockInfo->m_Allocations, srcAllocIndex); - - break; - } - } - - // If not processed, this allocInfo remains in pBlockInfo->m_Allocations for next round. - - if (srcAllocIndex > 0) + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + { + if (hAllocator->GetBufferImageGranularity() > 1) { - --srcAllocIndex; + m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount); } - else - { - if (srcBlockIndex > 0) - { - --srcBlockIndex; - srcAllocIndex = SIZE_MAX; - } - else - { - return VK_SUCCESS; - } - } - } -} - -bool VmaDefragmentationAlgorithm_Generic::AllocationInfoSizeGreater::operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const -{ - return lhs.m_hAllocation->GetSize() > rhs.m_hAllocation->GetSize(); -} - -bool VmaDefragmentationAlgorithm_Generic::AllocationInfoOffsetGreater::operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const -{ - return lhs.m_hAllocation->GetOffset() > rhs.m_hAllocation->GetOffset(); -} - -VmaDefragmentationAlgorithm_Generic::BlockInfo::BlockInfo(const VkAllocationCallbacks* pAllocationCallbacks) - : m_OriginalBlockIndex(SIZE_MAX), - m_pBlock(VMA_NULL), - m_HasNonMovableAllocations(true), - m_Allocations(pAllocationCallbacks) {} - -void VmaDefragmentationAlgorithm_Generic::BlockInfo::CalcHasNonMovableAllocations() -{ - const size_t blockAllocCount = m_pBlock->m_pMetadata->GetAllocationCount(); - const size_t defragmentAllocCount = m_Allocations.size(); - m_HasNonMovableAllocations = blockAllocCount != defragmentAllocCount; -} - -void VmaDefragmentationAlgorithm_Generic::BlockInfo::SortAllocationsBySizeDescending() -{ - VMA_SORT(m_Allocations.begin(), m_Allocations.end(), AllocationInfoSizeGreater()); -} - -void VmaDefragmentationAlgorithm_Generic::BlockInfo::SortAllocationsByOffsetDescending() -{ - VMA_SORT(m_Allocations.begin(), m_Allocations.end(), AllocationInfoOffsetGreater()); -} - -bool VmaDefragmentationAlgorithm_Generic::BlockPointerLess::operator()(const BlockInfo* pLhsBlockInfo, const VmaDeviceMemoryBlock* pRhsBlock) const -{ - return pLhsBlockInfo->m_pBlock < pRhsBlock; -} -bool VmaDefragmentationAlgorithm_Generic::BlockPointerLess::operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const -{ - return pLhsBlockInfo->m_pBlock < pRhsBlockInfo->m_pBlock; -} - -bool VmaDefragmentationAlgorithm_Generic::BlockInfoCompareMoveDestination::operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const -{ - if (pLhsBlockInfo->m_HasNonMovableAllocations && !pRhsBlockInfo->m_HasNonMovableAllocations) - { - return true; - } - if (!pLhsBlockInfo->m_HasNonMovableAllocations && pRhsBlockInfo->m_HasNonMovableAllocations) - { - return false; - } - if (pLhsBlockInfo->m_pBlock->m_pMetadata->GetSumFreeSize() < pRhsBlockInfo->m_pBlock->m_pMetadata->GetSumFreeSize()) - { - return true; - } - return false; -} - -bool VmaDefragmentationAlgorithm_Generic::MoveMakesSense( - size_t dstBlockIndex, VkDeviceSize dstOffset, - size_t srcBlockIndex, VkDeviceSize srcOffset) -{ - if (dstBlockIndex < srcBlockIndex) - { - return true; - } - if (dstBlockIndex > srcBlockIndex) - { - return false; + break; } - if (dstOffset < srcOffset) - { - return true; } - return false; } -size_t VmaDefragmentationAlgorithm_Generic::CalcBlocksWithNonMovableCount() const +VmaDefragmentationContext_T::~VmaDefragmentationContext_T() { - size_t result = 0; - for (size_t i = 0; i < m_Blocks.size(); ++i) + if (m_AlgorithmState) { - if (m_Blocks[i]->m_HasNonMovableAllocations) + switch (m_Algorithm) { - ++result; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast<StateExtensive*>(m_AlgorithmState), m_BlockVectorCount); + break; + default: + VMA_ASSERT(0); } } - return result; } -VkResult VmaDefragmentationAlgorithm_Generic::Defragment( - VmaVector< VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove> >& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags) +VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo) { - if (!m_AllAllocations && m_AllocationCount == 0) + if (m_PoolBlockVector != VMA_NULL) { - return VK_SUCCESS; + if (m_PoolBlockVector->GetBlockCount() > 1) + ComputeDefragmentation(*m_PoolBlockVector, 0); + else if (m_PoolBlockVector->GetBlockCount() == 1) + ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0)); } - - const size_t blockCount = m_Blocks.size(); - for (size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + else { - BlockInfo* pBlockInfo = m_Blocks[blockIndex]; - - if (m_AllAllocations) + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) { - VmaBlockMetadata_Generic* pMetadata = (VmaBlockMetadata_Generic*)pBlockInfo->m_pBlock->m_pMetadata; - VMA_ASSERT(!pMetadata->IsVirtual()); - for (VmaSuballocationList::const_iterator it = pMetadata->m_Suballocations.begin(); - it != pMetadata->m_Suballocations.end(); - ++it) + if (m_pBlockVectors[i] != VMA_NULL) { - if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + if (m_pBlockVectors[i]->GetBlockCount() > 1) { - AllocationInfo allocInfo = AllocationInfo((VmaAllocation)it->userData, VMA_NULL); - pBlockInfo->m_Allocations.push_back(allocInfo); + if (ComputeDefragmentation(*m_pBlockVectors[i], i)) + break; + } + else if (m_pBlockVectors[i]->GetBlockCount() == 1) + { + if (ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0))) + break; } } } - - pBlockInfo->CalcHasNonMovableAllocations(); - - // This is a choice based on research. - // Option 1: - pBlockInfo->SortAllocationsByOffsetDescending(); - // Option 2: - //pBlockInfo->SortAllocationsBySizeDescending(); } - // Sort m_Blocks this time by the main criterium, from most "destination" to most "source" blocks. - VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockInfoCompareMoveDestination()); - - // This is a choice based on research. - const uint32_t roundCount = 2; - - // Execute defragmentation rounds (the main part). - VkResult result = VK_SUCCESS; - for (uint32_t round = 0; (round < roundCount) && (result == VK_SUCCESS); ++round) + moveInfo.moveCount = static_cast<uint32_t>(m_Moves.size()); + if (moveInfo.moveCount > 0) { - result = DefragmentRound(moves, maxBytesToMove, maxAllocationsToMove, !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)); + moveInfo.pMoves = m_Moves.data(); + return VK_INCOMPLETE; } - return result; -} -#endif // _VMA_DEFRAGMENTATION_ALGORITHM_GENERIC_FUNCTIONS - -#ifndef _VMA_DEFRAGMENTATION_ALGORITHM_FAST_FUNCTIONS -VmaDefragmentationAlgorithm_Fast::VmaDefragmentationAlgorithm_Fast( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - bool overlappingMoveSupported) - : VmaDefragmentationAlgorithm(hAllocator, pBlockVector), - m_OverlappingMoveSupported(overlappingMoveSupported), - m_AllocationCount(0), - m_AllAllocations(false), - m_BytesMoved(0), - m_AllocationsMoved(0), - m_BlockInfos(VmaStlAllocator<BlockInfo>(hAllocator->GetAllocationCallbacks())) -{ - VMA_ASSERT(VMA_DEBUG_MARGIN == 0); + moveInfo.pMoves = VMA_NULL; + return VK_SUCCESS; } -VkResult VmaDefragmentationAlgorithm_Fast::Defragment( - VmaVector<VmaDefragmentationMove, VmaStlAllocator<VmaDefragmentationMove>>& moves, - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove, - VmaDefragmentationFlags flags) +VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo) { - VMA_ASSERT(m_AllAllocations || m_pBlockVector->CalcAllocationCount() == m_AllocationCount); - - const size_t blockCount = m_pBlockVector->GetBlockCount(); - if (blockCount == 0 || maxBytesToMove == 0 || maxAllocationsToMove == 0) - { - return VK_SUCCESS; - } - - PreprocessMetadata(); - - // Sort blocks in order from most destination. + VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true); - m_BlockInfos.resize(blockCount); - for (size_t i = 0; i < blockCount; ++i) + VkResult result = VK_SUCCESS; + VmaVector<ImmovableBlock, VmaStlAllocator<ImmovableBlock>> immovableBlocks(VmaStlAllocator<ImmovableBlock>(m_MoveAllocator.m_pCallbacks)); + for (uint32_t i = 0; i < moveInfo.moveCount; ++i) { - m_BlockInfos[i].origBlockIndex = i; - } - - VMA_SORT(m_BlockInfos.begin(), m_BlockInfos.end(), [this](const BlockInfo& lhs, const BlockInfo& rhs) -> bool { - return m_pBlockVector->GetBlock(lhs.origBlockIndex)->m_pMetadata->GetSumFreeSize() < - m_pBlockVector->GetBlock(rhs.origBlockIndex)->m_pMetadata->GetSumFreeSize(); - }); + VmaDefragmentationMove& move = moveInfo.pMoves[i]; + size_t prevCount = 0, currentCount = 0; + VkDeviceSize freedBlockSize = 0; - // THE MAIN ALGORITHM - - FreeSpaceDatabase freeSpaceDb; + uint32_t vectorIndex; + VmaBlockVector* vector; + if (m_PoolBlockVector != VMA_NULL) + { + vectorIndex = 0; + vector = m_PoolBlockVector; + } + else + { + vectorIndex = move.srcAllocation->GetMemoryTypeIndex(); + vector = m_pBlockVectors[vectorIndex]; + VMA_ASSERT(vector != VMA_NULL); + } - size_t dstBlockInfoIndex = 0; - size_t dstOrigBlockIndex = m_BlockInfos[dstBlockInfoIndex].origBlockIndex; - VmaDeviceMemoryBlock* pDstBlock = m_pBlockVector->GetBlock(dstOrigBlockIndex); - VmaBlockMetadata_Generic* pDstMetadata = (VmaBlockMetadata_Generic*)pDstBlock->m_pMetadata; - VkDeviceSize dstBlockSize = pDstMetadata->GetSize(); - VkDeviceSize dstOffset = 0; + VmaAllocation dst = reinterpret_cast<VmaAllocation>(move.internalData); + switch (move.operation) + { + case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY: + { + move.srcAllocation->SwapBlockAllocation(dst); + prevCount = vector->GetBlockCount(); + freedBlockSize = dst->GetBlock()->m_pMetadata->GetSize(); + vector->Free(dst, false); + currentCount = vector->GetBlockCount(); - bool end = false; - for (size_t srcBlockInfoIndex = 0; !end && srcBlockInfoIndex < blockCount; ++srcBlockInfoIndex) - { - const size_t srcOrigBlockIndex = m_BlockInfos[srcBlockInfoIndex].origBlockIndex; - VmaDeviceMemoryBlock* const pSrcBlock = m_pBlockVector->GetBlock(srcOrigBlockIndex); - VmaBlockMetadata_Generic* const pSrcMetadata = (VmaBlockMetadata_Generic*)pSrcBlock->m_pMetadata; - for (VmaSuballocationList::iterator srcSuballocIt = pSrcMetadata->m_Suballocations.begin(); - !end && srcSuballocIt != pSrcMetadata->m_Suballocations.end(); ) + result = VK_INCOMPLETE; + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE: { - VmaAllocation const pAlloc = (VmaAllocation)srcSuballocIt->userData; - const VkDeviceSize srcAllocAlignment = pAlloc->GetAlignment(); - const VkDeviceSize srcAllocSize = srcSuballocIt->size; - if (m_AllocationsMoved == maxAllocationsToMove || - m_BytesMoved + srcAllocSize > maxBytesToMove) - { - end = true; - break; - } - const VkDeviceSize srcAllocOffset = srcSuballocIt->offset; - - VmaDefragmentationMove move = {}; - // Try to place it in one of free spaces from the database. - size_t freeSpaceInfoIndex; - VkDeviceSize dstAllocOffset; - if (freeSpaceDb.Fetch(srcAllocAlignment, srcAllocSize, - freeSpaceInfoIndex, dstAllocOffset)) - { - size_t freeSpaceOrigBlockIndex = m_BlockInfos[freeSpaceInfoIndex].origBlockIndex; - VmaDeviceMemoryBlock* pFreeSpaceBlock = m_pBlockVector->GetBlock(freeSpaceOrigBlockIndex); - VmaBlockMetadata_Generic* pFreeSpaceMetadata = (VmaBlockMetadata_Generic*)pFreeSpaceBlock->m_pMetadata; + m_Stats.bytesMoved -= move.srcAllocation->GetSize(); + vector->Free(dst, false); - // Same block - if (freeSpaceInfoIndex == srcBlockInfoIndex) + VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (const ImmovableBlock& block : immovableBlocks) + { + if (block.block == newBlock) { - VMA_ASSERT(dstAllocOffset <= srcAllocOffset); - - // MOVE OPTION 1: Move the allocation inside the same block by decreasing offset. - - VmaSuballocation suballoc = *srcSuballocIt; - suballoc.offset = dstAllocOffset; - ((VmaAllocation)(suballoc.userData))->ChangeAllocHandle((VmaAllocHandle)(dstAllocOffset + 1)); - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - - VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; - ++nextSuballocIt; - pSrcMetadata->m_Suballocations.erase(srcSuballocIt); - srcSuballocIt = nextSuballocIt; - - InsertSuballoc(pFreeSpaceMetadata, suballoc); - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = freeSpaceOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.dstHandle = (VmaAllocHandle)(dstAllocOffset + 1); - move.size = srcAllocSize; - - moves.push_back(move); + notPresent = false; + break; } - // Different block - else - { - // MOVE OPTION 2: Move the allocation to a different block. - - VMA_ASSERT(freeSpaceInfoIndex < srcBlockInfoIndex); - - VmaSuballocation suballoc = *srcSuballocIt; - suballoc.offset = dstAllocOffset; - ((VmaAllocation)(suballoc.userData))->ChangeBlockAllocation(m_hAllocator, pFreeSpaceBlock, (VmaAllocHandle)(dstAllocOffset + 1)); - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; + } + if (notPresent) + immovableBlocks.push_back({ vectorIndex, newBlock }); + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY: + { + prevCount = vector->GetBlockCount(); + freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize(); + vector->Free(move.srcAllocation, false); + currentCount = vector->GetBlockCount(); + freedBlockSize *= prevCount - currentCount; - VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; - ++nextSuballocIt; - pSrcMetadata->m_Suballocations.erase(srcSuballocIt); - srcSuballocIt = nextSuballocIt; + VkDeviceSize dstBlockSize = dst->GetBlock()->m_pMetadata->GetSize(); + vector->Free(dst, false); + freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); + currentCount = vector->GetBlockCount(); - InsertSuballoc(pFreeSpaceMetadata, suballoc); + result = VK_INCOMPLETE; + break; + } + default: + VMA_ASSERT(0); + } - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = freeSpaceOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.dstHandle = (VmaAllocHandle)(dstAllocOffset + 1); - move.size = srcAllocSize; + if (prevCount > currentCount) + { + size_t freedBlocks = prevCount - currentCount; + m_Stats.deviceMemoryBlocksFreed += static_cast<uint32_t>(freedBlocks); + m_Stats.bytesFreed += freedBlockSize; + } - moves.push_back(move); - } - } - else + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + { + if (m_AlgorithmState != VMA_NULL) { - dstAllocOffset = VmaAlignUp(dstOffset, srcAllocAlignment); - - // If the allocation doesn't fit before the end of dstBlock, forward to next block. - while (dstBlockInfoIndex < srcBlockInfoIndex && - dstAllocOffset + srcAllocSize > dstBlockSize) + // Avoid unnecessary tries to allocate when new free block is avaiable + StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[vectorIndex]; + if (state.firstFreeBlock != SIZE_MAX) { - // But before that, register remaining free space at the end of dst block. - freeSpaceDb.Register(dstBlockInfoIndex, dstOffset, dstBlockSize - dstOffset); - - ++dstBlockInfoIndex; - dstOrigBlockIndex = m_BlockInfos[dstBlockInfoIndex].origBlockIndex; - pDstBlock = m_pBlockVector->GetBlock(dstOrigBlockIndex); - pDstMetadata = (VmaBlockMetadata_Generic*)pDstBlock->m_pMetadata; - dstBlockSize = pDstMetadata->GetSize(); - dstOffset = 0; - dstAllocOffset = 0; + state.firstFreeBlock -= prevCount - currentCount; + if (state.firstFreeBlock != 0) + state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); } + } + } + } + } + moveInfo.moveCount = 0; + moveInfo.pMoves = VMA_NULL; + m_Moves.clear(); - // Same block - if (dstBlockInfoIndex == srcBlockInfoIndex) + // Move blocks with immovable allocations according to algorithm + if (immovableBlocks.size() > 0) + { + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + { + if (m_AlgorithmState != VMA_NULL) + { + bool swapped = false; + // Move to the start of free blocks range + for (const ImmovableBlock& block : immovableBlocks) { - VMA_ASSERT(dstAllocOffset <= srcAllocOffset); - - const bool overlap = dstAllocOffset + srcAllocSize > srcAllocOffset; - - bool skipOver = overlap; - if (overlap && m_OverlappingMoveSupported && dstAllocOffset < srcAllocOffset) - { - // If destination and source place overlap, skip if it would move it - // by only < 1/64 of its size. - skipOver = (srcAllocOffset - dstAllocOffset) * 64 < srcAllocSize; - } - - if (skipOver) - { - freeSpaceDb.Register(dstBlockInfoIndex, dstOffset, srcAllocOffset - dstOffset); - - dstOffset = srcAllocOffset + srcAllocSize; - ++srcSuballocIt; - } - // MOVE OPTION 1: Move the allocation inside the same block by decreasing offset. - else + StateExtensive& state = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[block.vectorIndex]; + if (state.operation != StateExtensive::Operation::Cleanup) { - srcSuballocIt->offset = dstAllocOffset; - ((VmaAllocation)(srcSuballocIt->userData))->ChangeAllocHandle((VmaAllocHandle)(dstAllocOffset + 1)); - dstOffset = dstAllocOffset + srcAllocSize; - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - ++srcSuballocIt; - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = dstOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.dstHandle = (VmaAllocHandle)(dstAllocOffset + 1); - move.size = srcAllocSize; - - moves.push_back(move); + VmaBlockVector* vector = m_pBlockVectors[block.vectorIndex]; + for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) + { + if (vector->GetBlock(i) == block.block) + { + VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); + if (state.firstFreeBlock != SIZE_MAX) + { + if (i < state.firstFreeBlock - 1) + { + VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); + } + } + swapped = true; + break; + } + } } } - // Different block - else + if (swapped) + result = VK_INCOMPLETE; + break; + } + } + default: + { + // Move to the begining + for (const ImmovableBlock& block : immovableBlocks) + { + VmaBlockVector* vector = m_pBlockVectors[block.vectorIndex]; + for (size_t i = m_ImmovableBlockCount; vector->GetBlockCount(); ++i) { - // MOVE OPTION 2: Move the allocation to a different block. - - VMA_ASSERT(dstBlockInfoIndex < srcBlockInfoIndex); - VMA_ASSERT(dstAllocOffset + srcAllocSize <= dstBlockSize); - - VmaSuballocation suballoc = *srcSuballocIt; - suballoc.offset = dstAllocOffset; - ((VmaAllocation)(suballoc.userData))->ChangeBlockAllocation(m_hAllocator, pDstBlock, (VmaAllocHandle)(dstAllocOffset + 1)); - dstOffset = dstAllocOffset + srcAllocSize; - m_BytesMoved += srcAllocSize; - ++m_AllocationsMoved; - - VmaSuballocationList::iterator nextSuballocIt = srcSuballocIt; - ++nextSuballocIt; - pSrcMetadata->m_Suballocations.erase(srcSuballocIt); - srcSuballocIt = nextSuballocIt; - - pDstMetadata->m_Suballocations.push_back(suballoc); - - move.srcBlockIndex = srcOrigBlockIndex; - move.dstBlockIndex = dstOrigBlockIndex; - move.srcOffset = srcAllocOffset; - move.dstOffset = dstAllocOffset; - move.dstHandle = (VmaAllocHandle)(dstAllocOffset + 1); - move.size = srcAllocSize; - - moves.push_back(move); + if (vector->GetBlock(i) == block.block) + { + VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); + break; + } } } + break; + } } } - - m_BlockInfos.clear(); - - PostprocessMetadata(); - - return VK_SUCCESS; + return result; } -VmaDefragmentationAlgorithm_Fast::FreeSpaceDatabase::FreeSpaceDatabase() +bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, size_t index) { - FreeSpace s = {}; - s.blockInfoIndex = SIZE_MAX; - for (size_t i = 0; i < MAX_COUNT; ++i) + switch (m_Algorithm) { - m_FreeSpaces[i] = s; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT: + return ComputeDefragmentation_Fast(vector); + default: // Default algoritm + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + return ComputeDefragmentation_Balanced(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT: + return ComputeDefragmentation_Full(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + return ComputeDefragmentation_Extensive(vector, index); } } -void VmaDefragmentationAlgorithm_Fast::FreeSpaceDatabase::Register(size_t blockInfoIndex, VkDeviceSize offset, VkDeviceSize size) +VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::GetMoveData( + VmaAllocHandle handle, VmaBlockMetadata* metadata) { - // Find first invalid or the smallest structure. - size_t bestIndex = SIZE_MAX; - for (size_t i = 0; i < MAX_COUNT; ++i) - { - // Empty structure. - if (m_FreeSpaces[i].blockInfoIndex == SIZE_MAX) - { - bestIndex = i; - break; - } - if (m_FreeSpaces[i].size < size && - (bestIndex == SIZE_MAX || m_FreeSpaces[bestIndex].size > m_FreeSpaces[i].size)) - { - bestIndex = i; - } - } + MoveAllocationData moveData; + moveData.move.srcAllocation = (VmaAllocation)metadata->GetAllocationUserData(handle); + moveData.size = moveData.move.srcAllocation->GetSize(); + moveData.alignment = moveData.move.srcAllocation->GetAlignment(); + moveData.type = moveData.move.srcAllocation->GetSuballocationType(); + moveData.flags = 0; - if (bestIndex != SIZE_MAX) - { - m_FreeSpaces[bestIndex].blockInfoIndex = blockInfoIndex; - m_FreeSpaces[bestIndex].offset = offset; - m_FreeSpaces[bestIndex].size = size; - } + if (moveData.move.srcAllocation->IsPersistentMap()) + moveData.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + if (moveData.move.srcAllocation->IsMappingAllowed()) + moveData.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + + return moveData; } -bool VmaDefragmentationAlgorithm_Fast::FreeSpaceDatabase::Fetch(VkDeviceSize alignment, VkDeviceSize size, - size_t& outBlockInfoIndex, VkDeviceSize& outDstOffset) +bool VmaDefragmentationContext_T::IncrementCounters(uint32_t& allocations, VkDeviceSize bytes) { - size_t bestIndex = SIZE_MAX; - VkDeviceSize bestFreeSpaceAfter = 0; - for (size_t i = 0; i < MAX_COUNT; ++i) + if (++allocations >= m_MaxPassAllocations || bytes >= m_MaxPassBytes) { - // Structure is valid. - if (m_FreeSpaces[i].blockInfoIndex != SIZE_MAX) - { - const VkDeviceSize dstOffset = VmaAlignUp(m_FreeSpaces[i].offset, alignment); - // Allocation fits into this structure. - if (dstOffset + size <= m_FreeSpaces[i].offset + m_FreeSpaces[i].size) - { - const VkDeviceSize freeSpaceAfter = (m_FreeSpaces[i].offset + m_FreeSpaces[i].size) - - (dstOffset + size); - if (bestIndex == SIZE_MAX || freeSpaceAfter > bestFreeSpaceAfter) - { - bestIndex = i; - bestFreeSpaceAfter = freeSpaceAfter; - } - } - } - } - - if (bestIndex != SIZE_MAX) - { - outBlockInfoIndex = m_FreeSpaces[bestIndex].blockInfoIndex; - outDstOffset = VmaAlignUp(m_FreeSpaces[bestIndex].offset, alignment); - - // Leave this structure for remaining empty space. - const VkDeviceSize alignmentPlusSize = (outDstOffset - m_FreeSpaces[bestIndex].offset) + size; - m_FreeSpaces[bestIndex].offset += alignmentPlusSize; - m_FreeSpaces[bestIndex].size -= alignmentPlusSize; - + m_Stats.bytesMoved += bytes; + m_Stats.allocationsMoved += allocations; return true; } - return false; } -void VmaDefragmentationAlgorithm_Fast::PreprocessMetadata() +bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block) { - const size_t blockCount = m_pBlockVector->GetBlockCount(); - for (size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - VmaBlockMetadata_Generic* const pMetadata = - (VmaBlockMetadata_Generic*)m_pBlockVector->GetBlock(blockIndex)->m_pMetadata; - pMetadata->m_FreeCount = 0; - pMetadata->m_SumFreeSize = pMetadata->GetSize(); - pMetadata->m_FreeSuballocationsBySize.clear(); - for (VmaSuballocationList::iterator it = pMetadata->m_Suballocations.begin(); - it != pMetadata->m_Suballocations.end(); ) - { - if (it->type == VMA_SUBALLOCATION_TYPE_FREE) - { - VmaSuballocationList::iterator nextIt = it; - ++nextIt; - pMetadata->m_Suballocations.erase(it); - it = nextIt; - } - else - { - ++it; - } - } - } -} + VkDeviceSize currentBytesMoved = 0; + uint32_t currentAllocsMoved = 0; + VmaBlockMetadata* metadata = block->m_pMetadata; -void VmaDefragmentationAlgorithm_Fast::PostprocessMetadata() -{ - const size_t blockCount = m_pBlockVector->GetBlockCount(); - for (size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) { - VmaBlockMetadata_Generic* const pMetadata = - (VmaBlockMetadata_Generic*)m_pBlockVector->GetBlock(blockIndex)->m_pMetadata; - const VkDeviceSize blockSize = pMetadata->GetSize(); - - // No allocations in this block - entire area is free. - if (pMetadata->m_Suballocations.empty()) - { - pMetadata->m_FreeCount = 1; - //pMetadata->m_SumFreeSize is already set to blockSize. - VmaSuballocation suballoc = { - 0, // offset - blockSize, // size - VMA_NULL, // hAllocation - VMA_SUBALLOCATION_TYPE_FREE }; - pMetadata->m_Suballocations.push_back(suballoc); - pMetadata->RegisterFreeSuballocation(pMetadata->m_Suballocations.begin()); - } - // There are some allocations in this block. - else - { - VkDeviceSize offset = 0; - VmaSuballocationList::iterator it; - for (it = pMetadata->m_Suballocations.begin(); - it != pMetadata->m_Suballocations.end(); - ++it) + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + VmaAllocation& dst = reinterpret_cast<VmaAllocation&>(moveData.move.internalData); + + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) { - VMA_ASSERT(it->type != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(it->offset >= offset); - - // Need to insert preceding free space. - if (it->offset > offset) + if (metadata->GetAllocationOffset(request.allocHandle) < offset) { - ++pMetadata->m_FreeCount; - const VkDeviceSize freeSize = it->offset - offset; - VmaSuballocation suballoc = { - offset, // offset - freeSize, // size - VMA_NULL, // hAllocation - VMA_SUBALLOCATION_TYPE_FREE }; - VmaSuballocationList::iterator precedingFreeIt = pMetadata->m_Suballocations.insert(it, suballoc); - pMetadata->m_FreeSuballocationsBySize.push_back(precedingFreeIt); - } - - pMetadata->m_SumFreeSize -= it->size; - offset = it->offset + it->size; - } + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &dst) == VK_SUCCESS) + { + moveData.move.dstMemory = dst->GetMemory(); + moveData.move.dstOffset = dst->GetOffset(); + m_Moves.push_back(moveData.move); + currentBytesMoved += moveData.size; - // Need to insert trailing free space. - if (offset < blockSize) - { - ++pMetadata->m_FreeCount; - const VkDeviceSize freeSize = blockSize - offset; - VmaSuballocation suballoc = { - offset, // offset - freeSize, // size - VMA_NULL, // hAllocation - VMA_SUBALLOCATION_TYPE_FREE }; - VMA_ASSERT(it == pMetadata->m_Suballocations.end()); - VmaSuballocationList::iterator trailingFreeIt = pMetadata->m_Suballocations.insert(it, suballoc); - pMetadata->m_FreeSuballocationsBySize.push_back(trailingFreeIt); + if (IncrementCounters(currentAllocsMoved, currentBytesMoved)) + return true; + } + } } - - VMA_SORT( - pMetadata->m_FreeSuballocationsBySize.begin(), - pMetadata->m_FreeSuballocationsBySize.end(), - VmaSuballocationItemSizeLess()); } - - VMA_HEAVY_ASSERT(pMetadata->Validate()); } + + m_Stats.bytesMoved += currentBytesMoved; + m_Stats.allocationsMoved += currentAllocsMoved; + return false; } -void VmaDefragmentationAlgorithm_Fast::InsertSuballoc(VmaBlockMetadata_Generic* pMetadata, const VmaSuballocation& suballoc) +bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector) { - VmaSuballocationList& suballocs = pMetadata->m_Suballocations; - VmaSuballocationList::iterator elementAfter; - const VkDeviceSize last = suballocs.rbegin()->offset; - const VkDeviceSize first = suballocs.begin()->offset; + VkDeviceSize currentBytesMoved = 0; + uint32_t currentAllocsMoved = 0; + VmaAllocation& dst = reinterpret_cast<VmaAllocation&>(data.move.internalData); - if (last <= suballoc.offset) - elementAfter = suballocs.end(); - else if (first >= suballoc.offset) - elementAfter = suballocs.begin(); - else + for (; start < end; ++start) { - const size_t suballocCount = suballocs.size(); - const VkDeviceSize step = (last - first + suballocs.begin()->size) / suballocCount; - // If offset to be inserted is closer to the end of range, search from the end - if ((suballoc.offset - first) / step > suballocCount / 2) + VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start); + if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size) { - elementAfter = suballocs.begin(); - for (VmaSuballocationList::reverse_iterator suballocItem = ++suballocs.rbegin(); - suballocItem != suballocs.rend(); - ++suballocItem) - { - if (suballocItem->offset <= suballoc.offset) - { - elementAfter = --suballocItem; - break; - } - } - } - else - { - elementAfter = suballocs.end(); - for (VmaSuballocationList::iterator suballocItem = ++suballocs.begin(); - suballocItem != suballocs.end(); - ++suballocItem) + if (vector.AllocateFromBlock(dstBlock, + data.size, + data.alignment, + data.flags, + this, + data.type, + 0, + &dst) == VK_SUCCESS) { - if (suballocItem->offset >= suballoc.offset) - { - elementAfter = suballocItem; - break; - } + data.move.dstMemory = dst->GetMemory(); + data.move.dstOffset = dst->GetOffset(); + m_Moves.push_back(data.move); + currentBytesMoved += data.size; + + if (IncrementCounters(currentAllocsMoved, currentBytesMoved)) + return true; + break; } } } - pMetadata->m_Suballocations.insert(elementAfter, suballoc); -} -#endif // _VMA_DEFRAGMENTATION_ALGORITHM_FAST_FUNCTIONS -#ifndef _VMA_BLOCK_VECTOR_DEFRAGMENTATION_CONTEXT_FUNCTIONS -VmaBlockVectorDefragmentationContext::VmaBlockVectorDefragmentationContext( - VmaAllocator hAllocator, - VmaPool hCustomPool, - VmaBlockVector* pBlockVector) - : res(VK_SUCCESS), - mutexLocked(false), - blockContexts(VmaStlAllocator<VmaBlockDefragmentationContext>(hAllocator->GetAllocationCallbacks())), - defragmentationMoves(VmaStlAllocator<VmaDefragmentationMove>(hAllocator->GetAllocationCallbacks())), - defragmentationMovesProcessed(0), - defragmentationMovesCommitted(0), - hasDefragmentationPlan(0), - m_hAllocator(hAllocator), - m_hCustomPool(hCustomPool), - m_pBlockVector(pBlockVector), - m_pAlgorithm(VMA_NULL), - m_Allocations(VmaStlAllocator<AllocInfo>(hAllocator->GetAllocationCallbacks())), - m_AllAllocations(false) {} - -VmaBlockVectorDefragmentationContext::~VmaBlockVectorDefragmentationContext() -{ - vma_delete(m_hAllocator, m_pAlgorithm); -} - -void VmaBlockVectorDefragmentationContext::AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) -{ - AllocInfo info = { hAlloc, pChanged }; - m_Allocations.push_back(info); + m_Stats.bytesMoved += currentBytesMoved; + m_Stats.allocationsMoved += currentAllocsMoved; + return false; } -void VmaBlockVectorDefragmentationContext::Begin(bool overlappingMoveSupported, VmaDefragmentationFlags flags) +bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& vector) { - const bool allAllocations = m_AllAllocations || - m_Allocations.size() == m_pBlockVector->CalcAllocationCount(); + // Move only between blocks - /******************************** - HERE IS THE CHOICE OF DEFRAGMENTATION ALGORITHM. - ********************************/ - - /* - Fast algorithm is supported only when certain criteria are met: - - VMA_DEBUG_MARGIN is 0. - - All allocations in this block vector are movable. - - There is no possibility of image/buffer granularity conflict. - - The defragmentation is not incremental - */ - if (VMA_DEBUG_MARGIN == 0 && - allAllocations && - !m_pBlockVector->IsBufferImageGranularityConflictPossible() && - !(flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL)) + // Go through allocations in last blocks and try to fit them inside first ones + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) { - m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Fast)( - m_hAllocator, m_pBlockVector, overlappingMoveSupported); - } - else - { - m_pAlgorithm = vma_new(m_hAllocator, VmaDefragmentationAlgorithm_Generic)( - m_hAllocator, m_pBlockVector, overlappingMoveSupported); - } + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; - if (allAllocations) - { - m_pAlgorithm->AddAll(); - } - else - { - for (size_t i = 0, count = m_Allocations.size(); i < count; ++i) + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) { - m_pAlgorithm->AddAllocation(m_Allocations[i].hAlloc, m_Allocations[i].pChanged); + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; } } + return false; } -#endif // _VMA_BLOCK_VECTOR_DEFRAGMENTATION_CONTEXT_FUNCTIONS -#ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS -VmaDefragmentationContext_T::VmaDefragmentationContext_T( - VmaAllocator hAllocator, - uint32_t flags, - VmaDefragmentationStats* pStats) - : m_hAllocator(hAllocator), - m_Flags(flags), - m_pStats(pStats), - m_CustomPoolContexts(VmaStlAllocator<VmaBlockVectorDefragmentationContext*>(hAllocator->GetAllocationCallbacks())) +bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector) { - memset(m_DefaultPoolContexts, 0, sizeof(m_DefaultPoolContexts)); -} + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0), + // but only if there are noticable gaps between them (some heuristic, ex. average size of allocation in block) -VmaDefragmentationContext_T::~VmaDefragmentationContext_T() -{ - for (size_t i = m_CustomPoolContexts.size(); i--; ) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[i]; - pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_Flags, m_pStats); - vma_delete(m_hAllocator, pBlockVectorCtx); - } - for (size_t i = m_hAllocator->m_MemProps.memoryTypeCount; i--; ) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[i]; - if (pBlockVectorCtx) - { - pBlockVectorCtx->GetBlockVector()->DefragmentationEnd(pBlockVectorCtx, m_Flags, m_pStats); - vma_delete(m_hAllocator, pBlockVectorCtx); - } - } -} + VkDeviceSize currentBytesMoved = 0; + uint32_t currentAllocsMoved = 0; -void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, const VmaPool* pPools) -{ - for (uint32_t poolIndex = 0; poolIndex < poolCount; ++poolIndex) + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) { - VmaPool pool = pPools[poolIndex]; - VMA_ASSERT(pool); - for(uint32_t memTypeIndex = 0; memTypeIndex < m_hAllocator->GetMemoryTypeCount(); ++memTypeIndex) + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) { - if(pool->m_pBlockVectors[memTypeIndex]) + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) { - // Pools with algorithm other than default are not defragmented. - if (pool->m_pBlockVectors[memTypeIndex]->GetAlgorithm() == 0) + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) { - VmaBlockVectorDefragmentationContext* pBlockVectorDefragCtx = VMA_NULL; - - for (size_t i = m_CustomPoolContexts.size(); i--; ) + if (metadata->GetAllocationOffset(request.allocHandle) < offset) { - if (m_CustomPoolContexts[i]->GetCustomPool() == pool) + VmaAllocation& dst = reinterpret_cast<VmaAllocation&>(moveData.move.internalData); + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &dst) == VK_SUCCESS) { - pBlockVectorDefragCtx = m_CustomPoolContexts[i]; - break; - } - } + moveData.move.dstMemory = dst->GetMemory(); + moveData.move.dstOffset = dst->GetOffset(); + m_Moves.push_back(moveData.move); + currentBytesMoved += moveData.size; - if (!pBlockVectorDefragCtx) - { - pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( - m_hAllocator, - pool, - pool->m_pBlockVectors[memTypeIndex]); - m_CustomPoolContexts.push_back(pBlockVectorDefragCtx); + if (IncrementCounters(currentAllocsMoved, currentBytesMoved)) + return true; + } } - - pBlockVectorDefragCtx->AddAll(); } } } } + + m_Stats.bytesMoved += currentBytesMoved; + m_Stats.allocationsMoved += currentAllocsMoved; + return false; } -void VmaDefragmentationContext_T::AddAllocations( - uint32_t allocationCount, - const VmaAllocation* pAllocations, - VkBool32* pAllocationsChanged) +bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& vector) { - // Dispatch pAllocations among defragmentators. Create them when necessary. - for (uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0) + + VkDeviceSize currentBytesMoved = 0; + uint32_t currentAllocsMoved = 0; + + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) { - const VmaAllocation hAlloc = pAllocations[allocIndex]; - VMA_ASSERT(hAlloc); - const uint32_t memTypeIndex = hAlloc->GetMemoryTypeIndex(); - // DedicatedAlloc cannot be defragmented. - if (hAlloc->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK) + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) { - VmaBlockVectorDefragmentationContext* pBlockVectorDefragCtx = VMA_NULL; + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; - const VmaPool hAllocPool = hAlloc->GetBlock()->GetParentPool(); - // This allocation belongs to custom pool. - if (hAllocPool != VK_NULL_HANDLE) + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) { - // Pools with algorithm other than default are not defragmented. - if (hAllocPool->m_pBlockVectors[memTypeIndex]->GetAlgorithm() == 0) + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) { - for (size_t i = m_CustomPoolContexts.size(); i--; ) + if (metadata->GetAllocationOffset(request.allocHandle) < offset) { - if (m_CustomPoolContexts[i]->GetCustomPool() == hAllocPool) + VmaAllocation& dst = reinterpret_cast<VmaAllocation&>(moveData.move.internalData); + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &dst) == VK_SUCCESS) { - pBlockVectorDefragCtx = m_CustomPoolContexts[i]; - break; + moveData.move.dstMemory = dst->GetMemory(); + moveData.move.dstOffset = dst->GetOffset(); + m_Moves.push_back(moveData.move); + currentBytesMoved += moveData.size; + + if (IncrementCounters(currentAllocsMoved, currentBytesMoved)) + return true; } } - if (!pBlockVectorDefragCtx) - { - pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( - m_hAllocator, - hAllocPool, - hAllocPool->m_pBlockVectors[memTypeIndex]); - m_CustomPoolContexts.push_back(pBlockVectorDefragCtx); - } } } - // This allocation belongs to default pool. - else - { - pBlockVectorDefragCtx = m_DefaultPoolContexts[memTypeIndex]; - if (!pBlockVectorDefragCtx) - { - VMA_ASSERT(m_hAllocator->m_pBlockVectors[memTypeIndex] && "Trying to use unsupported memory type!"); - - pBlockVectorDefragCtx = vma_new(m_hAllocator, VmaBlockVectorDefragmentationContext)( - m_hAllocator, - VMA_NULL, // hCustomPool - m_hAllocator->m_pBlockVectors[memTypeIndex]); - m_DefaultPoolContexts[memTypeIndex] = pBlockVectorDefragCtx; - } - } - - if (pBlockVectorDefragCtx) - { - VkBool32* const pChanged = (pAllocationsChanged != VMA_NULL) ? - &pAllocationsChanged[allocIndex] : VMA_NULL; - pBlockVectorDefragCtx->AddAllocation(hAlloc, pChanged); - } } } + + m_Stats.bytesMoved += currentBytesMoved; + m_Stats.allocationsMoved += currentAllocsMoved; + return false; } -VkResult VmaDefragmentationContext_T::Defragment( - VkDeviceSize maxCpuBytesToMove, uint32_t maxCpuAllocationsToMove, - VkDeviceSize maxGpuBytesToMove, uint32_t maxGpuAllocationsToMove, - VkCommandBuffer commandBuffer, VmaDefragmentationStats* pStats, VmaDefragmentationFlags flags) +bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index) { - if (pStats) - { - memset(pStats, 0, sizeof(VmaDefragmentationStats)); - } + // First free single block, then populate it to the brim, then free another block, and so on - if (flags & VMA_DEFRAGMENTATION_FLAG_INCREMENTAL) - { - // For incremental defragmetnations, we just earmark how much we can move - // The real meat is in the defragmentation steps - m_MaxCpuBytesToMove = maxCpuBytesToMove; - m_MaxCpuAllocationsToMove = maxCpuAllocationsToMove; + // Fallback to previous algorithm since without granularity conflicts it can achieve max packing + if (vector.m_BufferImageGranularity == 1) + return ComputeDefragmentation_Full(vector); - m_MaxGpuBytesToMove = maxGpuBytesToMove; - m_MaxGpuAllocationsToMove = maxGpuAllocationsToMove; + VMA_ASSERT(m_AlgorithmState != VMA_NULL); - if (m_MaxCpuBytesToMove == 0 && m_MaxCpuAllocationsToMove == 0 && - m_MaxGpuBytesToMove == 0 && m_MaxGpuAllocationsToMove == 0) - return VK_SUCCESS; + StateExtensive& vectorState = reinterpret_cast<StateExtensive*>(m_AlgorithmState)[index]; - return VK_NOT_READY; - } - - if (commandBuffer == VK_NULL_HANDLE) + bool texturePresent = false, bufferPresent = false, otherPresent = false; + switch (vectorState.operation) { - maxGpuBytesToMove = 0; - maxGpuAllocationsToMove = 0; - } + case StateExtensive::Operation::Done: // Vector defragmented + return false; + case StateExtensive::Operation::FindFreeBlockBuffer: + case StateExtensive::Operation::FindFreeBlockTexture: + case StateExtensive::Operation::FindFreeBlockAll: + { + // No free blocks, have to clear last one + size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1; + VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata; - VkResult res = VK_SUCCESS; + const size_t prevMoveCount = m_Moves.size(); + for (VmaAllocHandle handle = freeMetadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = freeMetadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, freeMetadata); - // Process default pools. - for (uint32_t memTypeIndex = 0; - memTypeIndex < m_hAllocator->GetMemoryTypeCount() && res >= VK_SUCCESS; - ++memTypeIndex) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; - if (pBlockVectorCtx) - { - VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - pStats, flags, - maxCpuBytesToMove, maxCpuAllocationsToMove, - maxGpuBytesToMove, maxGpuAllocationsToMove, - commandBuffer); - if (pBlockVectorCtx->res != VK_SUCCESS) + // Check all previous blocks for free space + if (AllocInOtherBlock(0, last, moveData, vector)) { - res = pBlockVectorCtx->res; + // Full clear performed already + if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE) + reinterpret_cast<size_t*>(m_AlgorithmState)[index] = last; + return true; } } - } - // Process custom pools. - for (size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); - customCtxIndex < customCtxCount && res >= VK_SUCCESS; - ++customCtxIndex) - { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; - VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - pStats, flags, - maxCpuBytesToMove, maxCpuAllocationsToMove, - maxGpuBytesToMove, maxGpuAllocationsToMove, - commandBuffer); - if (pBlockVectorCtx->res != VK_SUCCESS) + if (prevMoveCount == m_Moves.size()) { - res = pBlockVectorCtx->res; + // Cannot perform full clear, have to move data in other blocks around + if (last != 0) + { + for (size_t i = last - 1; i; --i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // No possible reallocs within blocks, try to move them around fast + return ComputeDefragmentation_Fast(vector); + } + } + else + { + switch (vectorState.operation) + { + case StateExtensive::Operation::FindFreeBlockBuffer: + vectorState.operation = StateExtensive::Operation::MoveBuffers; + break; + default: + VMA_ASSERT(0); + case StateExtensive::Operation::FindFreeBlockTexture: + vectorState.operation = StateExtensive::Operation::MoveTextures; + break; + case StateExtensive::Operation::FindFreeBlockAll: + vectorState.operation = StateExtensive::Operation::MoveAll; + break; + } + vectorState.firstFreeBlock = last; + // Nothing done, block found without reallocations, can perform another reallocs in same pass + if (prevMoveCount == m_Moves.size()) + return ComputeDefragmentation_Extensive(vector, index); } + break; } - - return res; -} - -VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassInfo* pInfo) -{ - VmaDefragmentationPassMoveInfo* pCurrentMove = pInfo->pMoves; - uint32_t movesLeft = pInfo->moveCount; - - // Process default pools. - for (uint32_t memTypeIndex = 0; - memTypeIndex < m_hAllocator->GetMemoryTypeCount(); - ++memTypeIndex) + case StateExtensive::Operation::MoveTextures: { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; - if (pBlockVectorCtx) + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) { - VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); - - if (!pBlockVectorCtx->hasDefragmentationPlan) + if (texturePresent) { - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - m_pStats, m_Flags, - m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, - m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, - VK_NULL_HANDLE); - - if (pBlockVectorCtx->res < VK_SUCCESS) - continue; - - pBlockVectorCtx->hasDefragmentationPlan = true; + vectorState.operation = StateExtensive::Operation::FindFreeBlockTexture; + return ComputeDefragmentation_Extensive(vector, index); } - const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( - pBlockVectorCtx, - pCurrentMove, movesLeft); + if (!bufferPresent && !otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } - movesLeft -= processed; - pCurrentMove += processed; + // No more textures to move, check buffers + vectorState.operation = StateExtensive::Operation::MoveBuffers; + bufferPresent = false; + otherPresent = false; } + else + break; } - - // Process custom pools. - for (size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); - customCtxIndex < customCtxCount; - ++customCtxIndex) + case StateExtensive::Operation::MoveBuffers: { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; - VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); - - if (!pBlockVectorCtx->hasDefragmentationPlan) + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_BUFFER, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) { - pBlockVectorCtx->GetBlockVector()->Defragment( - pBlockVectorCtx, - m_pStats, m_Flags, - m_MaxCpuBytesToMove, m_MaxCpuAllocationsToMove, - m_MaxGpuBytesToMove, m_MaxGpuAllocationsToMove, - VK_NULL_HANDLE); + if (bufferPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } - if (pBlockVectorCtx->res < VK_SUCCESS) - continue; + if (!otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } - pBlockVectorCtx->hasDefragmentationPlan = true; + // No more buffers to move, check all others + vectorState.operation = StateExtensive::Operation::MoveAll; + otherPresent = false; } - - const uint32_t processed = pBlockVectorCtx->GetBlockVector()->ProcessDefragmentations( - pBlockVectorCtx, - pCurrentMove, movesLeft); - - movesLeft -= processed; - pCurrentMove += processed; + else + break; } - - pInfo->moveCount = pInfo->moveCount - movesLeft; - - return VK_SUCCESS; -} - -VkResult VmaDefragmentationContext_T::DefragmentPassEnd() -{ - VkResult res = VK_SUCCESS; - - // Process default pools. - for (uint32_t memTypeIndex = 0; - memTypeIndex < m_hAllocator->GetMemoryTypeCount(); - ++memTypeIndex) + case StateExtensive::Operation::MoveAll: { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_DefaultPoolContexts[memTypeIndex]; - if (pBlockVectorCtx) + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_FREE, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) { - VMA_ASSERT(pBlockVectorCtx->GetBlockVector()); - - if (!pBlockVectorCtx->hasDefragmentationPlan) + if (otherPresent) { - res = VK_NOT_READY; - continue; + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); } + // Everything moved + vectorState.operation = StateExtensive::Operation::Cleanup; + } + break; + } + } - pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( - pBlockVectorCtx, m_pStats); - - if (pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) - res = VK_NOT_READY; + if (vectorState.operation == StateExtensive::Operation::Cleanup) + { + // All other work done, pack data in blocks even tighter if possible + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; } + + if (prevMoveCount == m_Moves.size()) + vectorState.operation = StateExtensive::Operation::Done; } + return false; +} - // Process custom pools. - for (size_t customCtxIndex = 0, customCtxCount = m_CustomPoolContexts.size(); - customCtxIndex < customCtxCount; - ++customCtxIndex) +bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent) +{ + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = firstFreeBlock ; i;) { - VmaBlockVectorDefragmentationContext* pBlockVectorCtx = m_CustomPoolContexts[customCtxIndex]; - VMA_ASSERT(pBlockVectorCtx && pBlockVectorCtx->GetBlockVector()); + VmaDeviceMemoryBlock* block = vector.GetBlock(--i); + VmaBlockMetadata* metadata = block->m_pMetadata; - if (!pBlockVectorCtx->hasDefragmentationPlan) + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) { - res = VK_NOT_READY; - continue; - } + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; - pBlockVectorCtx->GetBlockVector()->CommitDefragmentations( - pBlockVectorCtx, m_pStats); + // Move only single type of resources at once + if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType)) + { + // Try to fit allocation into free blocks + if (AllocInOtherBlock(firstFreeBlock, vector.GetBlockCount(), moveData, vector)) + return false; + } - if (pBlockVectorCtx->defragmentationMoves.size() != pBlockVectorCtx->defragmentationMovesCommitted) - res = VK_NOT_READY; + if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL)) + texturePresent = true; + else if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_BUFFER)) + bufferPresent = true; + else + otherPresent = true; + } } - - return res; + return prevMoveCount == m_Moves.size(); } #endif // _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS #ifndef _VMA_POOL_T_FUNCTIONS VmaPool_T::VmaPool_T( VmaAllocator hAllocator, - const VmaPoolCreateInfo& createInfo) : - m_hAllocator(hAllocator), - m_pBlockVectors{}, + const VmaPoolCreateInfo& createInfo, + VkDeviceSize preferredBlockSize) + : m_BlockVector( + hAllocator, + this, // hParentPool + createInfo.memoryTypeIndex, + createInfo.blockSize != 0 ? createInfo.blockSize : preferredBlockSize, + createInfo.minBlockCount, + createInfo.maxBlockCount, + (createInfo.flags& VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), + createInfo.blockSize != 0, // explicitBlockSize + createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, // algorithm + createInfo.priority, + VMA_MAX(hAllocator->GetMemoryTypeMinAlignment(createInfo.memoryTypeIndex), createInfo.minAllocationAlignment), + createInfo.pMemoryAllocateNext), m_Id(0), - m_Name(VMA_NULL) -{ - for(uint32_t memTypeIndex = 0; memTypeIndex < hAllocator->GetMemoryTypeCount(); ++memTypeIndex) - { - // Create only supported types - if((hAllocator->GetGlobalMemoryTypeBits() & (1u << memTypeIndex)) != 0) - { - m_pBlockVectors[memTypeIndex] = vma_new(hAllocator, VmaBlockVector)( - hAllocator, - this, // hParentPool - memTypeIndex, - createInfo.blockSize != 0 ? createInfo.blockSize : hAllocator->CalcPreferredBlockSize(memTypeIndex), - createInfo.minBlockCount, - createInfo.maxBlockCount, - (createInfo.flags& VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), - false, // explicitBlockSize - createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, // algorithm - createInfo.priority, - VMA_MAX(hAllocator->GetMemoryTypeMinAlignment(memTypeIndex), createInfo.minAllocationAlignment), - createInfo.pMemoryAllocateNext); - } - } -} + m_Name(VMA_NULL) {} VmaPool_T::~VmaPool_T() { VMA_ASSERT(m_PrevPool == VMA_NULL && m_NextPool == VMA_NULL); - for(uint32_t memTypeIndex = 0; memTypeIndex < m_hAllocator->GetMemoryTypeCount(); ++memTypeIndex) - { - vma_delete(m_hAllocator, m_pBlockVectors[memTypeIndex]); - } } void VmaPool_T::SetName(const char* pName) { - for(uint32_t memTypeIndex = 0; memTypeIndex < m_hAllocator->GetMemoryTypeCount(); ++memTypeIndex) - { - if(m_pBlockVectors[memTypeIndex]) - { - const VkAllocationCallbacks* allocs = m_pBlockVectors[memTypeIndex]->GetAllocator()->GetAllocationCallbacks(); - VmaFreeString(allocs, m_Name); + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); - if (pName != VMA_NULL) - { - m_Name = VmaCreateStringCopy(allocs, pName); - } - else - { - m_Name = VMA_NULL; - } - } + if (pName != VMA_NULL) + { + m_Name = VmaCreateStringCopy(allocs, pName); + } + else + { + m_Name = VMA_NULL; } } #endif // _VMA_POOL_T_FUNCTIONS @@ -14784,6 +13980,14 @@ void VmaAllocator_T::ImportVulkanFunctions_Static() m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2; } #endif + +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceBufferMemoryRequirements; + m_VulkanFunctions.vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceImageMemoryRequirements; + } +#endif } #endif // VMA_STATIC_VULKAN_FUNCTIONS == 1 @@ -14829,6 +14033,11 @@ void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVul VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR); #endif +#if VMA_VULKAN_VERSION >= 1003000 + VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements); +#endif + #undef VMA_COPY_IF_NOT_NULL } @@ -14902,6 +14111,14 @@ void VmaAllocator_T::ImportVulkanFunctions_Dynamic() } #endif // #if VMA_MEMORY_BUDGET +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirements, "vkGetDeviceBufferMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirements, "vkGetDeviceImageMemoryRequirements"); + } +#endif + #undef VMA_FETCH_DEVICE_FUNC #undef VMA_FETCH_INSTANCE_FUNC } @@ -14950,6 +14167,14 @@ void VmaAllocator_T::ValidateVulkanFunctions() VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL); } #endif + +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL); + } +#endif } VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) @@ -14966,8 +14191,8 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( VkDeviceSize alignment, bool dedicatedPreferred, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, VmaSuballocationType suballocType, @@ -14998,12 +14223,14 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( memTypeIndex, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, finalCreateInfo.pUserData, finalCreateInfo.priority, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, allocationCount, pAllocations, blockVector.GetAllocationNextPtr()); @@ -15039,12 +14266,14 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( memTypeIndex, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, finalCreateInfo.pUserData, finalCreateInfo.priority, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, allocationCount, pAllocations, blockVector.GetAllocationNextPtr()); @@ -15078,12 +14307,14 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( memTypeIndex, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, finalCreateInfo.pUserData, finalCreateInfo.priority, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, allocationCount, pAllocations, blockVector.GetAllocationNextPtr()); @@ -15108,12 +14339,13 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( uint32_t memTypeIndex, bool map, bool isUserDataString, + bool isMappingAllowed, bool canAliasMemory, void* pUserData, float priority, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, size_t allocationCount, VmaAllocation* pAllocations, const void* pNextChain) @@ -15153,8 +14385,8 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( bool canContainBufferWithDeviceAddress = true; if(dedicatedBuffer != VK_NULL_HANDLE) { - canContainBufferWithDeviceAddress = dedicatedBufferUsage == UINT32_MAX || // Usage flags unknown - (dedicatedBufferUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) != 0; + canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == UINT32_MAX || // Usage flags unknown + (dedicatedBufferImageUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) != 0; } else if(dedicatedImage != VK_NULL_HANDLE) { @@ -15199,6 +14431,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( allocInfo, map, isUserDataString, + isMappingAllowed, pUserData, pAllocations + allocIndex); if(res != VK_SUCCESS) @@ -15235,7 +14468,6 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize()); - currAlloc->SetUserData(this, VMA_NULL); m_AllocationObjectAllocator.Free(currAlloc); } @@ -15253,6 +14485,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( const VkMemoryAllocateInfo& allocInfo, bool map, bool isUserDataString, + bool isMappingAllowed, void* pUserData, VmaAllocation* pAllocation) { @@ -15282,7 +14515,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( } } - *pAllocation = m_AllocationObjectAllocator.Allocate(isUserDataString); + *pAllocation = m_AllocationObjectAllocator.Allocate(isUserDataString, isMappingAllowed); (*pAllocation)->InitDedicatedAllocation(pool, memTypeIndex, hMemory, suballocType, pMappedData, size); (*pAllocation)->SetUserData(this, pUserData); m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); @@ -15358,6 +14591,62 @@ void VmaAllocator_T::GetImageMemoryRequirements( } } +VkResult VmaAllocator_T::FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkFlags bufImgUsage, + uint32_t* pMemoryTypeIndex) const +{ + memoryTypeBits &= GetGlobalMemoryTypeBits(); + + if(pAllocationCreateInfo->memoryTypeBits != 0) + { + memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; + } + + VkMemoryPropertyFlags requiredFlags = 0, preferredFlags = 0, notPreferredFlags = 0; + if(!FindMemoryPreferences( + IsIntegratedGpu(), + *pAllocationCreateInfo, + bufImgUsage, + requiredFlags, preferredFlags, notPreferredFlags)) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + *pMemoryTypeIndex = UINT32_MAX; + uint32_t minCost = UINT32_MAX; + for(uint32_t memTypeIndex = 0, memTypeBit = 1; + memTypeIndex < GetMemoryTypeCount(); + ++memTypeIndex, memTypeBit <<= 1) + { + // This memory type is acceptable according to memoryTypeBits bitmask. + if((memTypeBit & memoryTypeBits) != 0) + { + const VkMemoryPropertyFlags currFlags = + m_MemProps.memoryTypes[memTypeIndex].propertyFlags; + // This memory type contains requiredFlags. + if((requiredFlags & ~currFlags) == 0) + { + // Calculate cost as number of bits from preferredFlags not present in this memory type. + uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags) + + VmaCountBitsSet(currFlags & notPreferredFlags); + // Remember memory type with lowest cost. + if(currCost < minCost) + { + *pMemoryTypeIndex = memTypeIndex; + if(currCost == 0) + { + return VK_SUCCESS; + } + minCost = currCost; + } + } + } + } + return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; +} + VkResult VmaAllocator_T::CalcMemTypeParams( VmaAllocationCreateInfo& inoutCreateInfo, uint32_t memTypeIndex, @@ -15390,29 +14679,38 @@ VkResult VmaAllocator_T::CalcAllocationParams( bool dedicatedRequired, bool dedicatedPreferred) { + VMA_ASSERT((inoutCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) && + "Specifying both flags VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT and VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT is incorrect."); + VMA_ASSERT((((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) == 0 || + (inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0)) && + "Specifying VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT requires also VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + if(inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0) + { + VMA_ASSERT((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0 && + "When using VMA_ALLOCATION_CREATE_MAPPED_BIT and usage = VMA_MEMORY_USAGE_AUTO*, you must also specify VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + } + } + + // If memory is lazily allocated, it should be always dedicated. if(dedicatedRequired || - // If memory is lazily allocated, it should be always dedicated. inoutCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED) { inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; } - if(inoutCreateInfo.pool != VK_NULL_HANDLE && (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) + if(inoutCreateInfo.pool != VK_NULL_HANDLE) { - // Assuming here every block has the same block size and priority. - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + if(inoutCreateInfo.pool->m_BlockVector.HasExplicitBlockSize() && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) { - if(inoutCreateInfo.pool->m_pBlockVectors[memTypeIndex]) - { - if(inoutCreateInfo.pool->m_pBlockVectors[memTypeIndex]->HasExplicitBlockSize()) - { - VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT while current custom pool doesn't support dedicated allocations."); - return VK_ERROR_FEATURE_NOT_PRESENT; - } - inoutCreateInfo.priority = inoutCreateInfo.pool->m_pBlockVectors[memTypeIndex]->GetPriority(); - break; - } + VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT while current custom pool doesn't support dedicated allocations."); + return VK_ERROR_FEATURE_NOT_PRESENT; } + inoutCreateInfo.priority = inoutCreateInfo.pool->m_BlockVector.GetPriority(); } if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && @@ -15427,6 +14725,21 @@ VkResult VmaAllocator_T::CalcAllocationParams( { inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; } + + // Non-auto USAGE values imply HOST_ACCESS flags. + // And so does VMA_MEMORY_USAGE_UNKNOWN because it is used with custom pools. + // Which specific flag is used doesn't matter. They change things only when used with VMA_MEMORY_USAGE_AUTO*. + // Otherwise they just protect from assert on mapping. + if(inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) == 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + } + } + return VK_SUCCESS; } @@ -15435,8 +14748,8 @@ VkResult VmaAllocator_T::AllocateMemory( bool requiresDedicatedAllocation, bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, size_t allocationCount, @@ -15456,46 +14769,67 @@ VkResult VmaAllocator_T::AllocateMemory( if(res != VK_SUCCESS) return res; - // Bit mask of memory Vulkan types acceptable for this allocation. - uint32_t memoryTypeBits = vkMemReq.memoryTypeBits; - uint32_t memTypeIndex = UINT32_MAX; - res = vmaFindMemoryTypeIndex(this, memoryTypeBits, &createInfoFinal, &memTypeIndex); - // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. - if(res != VK_SUCCESS) - return res; - do + if(createInfoFinal.pool != VK_NULL_HANDLE) { - VmaBlockVector* blockVector = createInfoFinal.pool == VK_NULL_HANDLE ? m_pBlockVectors[memTypeIndex] : createInfoFinal.pool->m_pBlockVectors[memTypeIndex]; - VMA_ASSERT(blockVector && "Trying to use unsupported memory type!"); - VmaDedicatedAllocationList& dedicatedAllocations = createInfoFinal.pool == VK_NULL_HANDLE ? m_DedicatedAllocations[memTypeIndex] : createInfoFinal.pool->m_DedicatedAllocations[memTypeIndex]; - res = AllocateMemoryOfType( + VmaBlockVector& blockVector = createInfoFinal.pool->m_BlockVector; + return AllocateMemoryOfType( createInfoFinal.pool, vkMemReq.size, vkMemReq.alignment, - requiresDedicatedAllocation || prefersDedicatedAllocation, + prefersDedicatedAllocation, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, createInfoFinal, - memTypeIndex, + blockVector.GetMemoryTypeIndex(), suballocType, - dedicatedAllocations, - *blockVector, + createInfoFinal.pool->m_DedicatedAllocations, + blockVector, allocationCount, pAllocations); - // Allocation succeeded - if(res == VK_SUCCESS) - return VK_SUCCESS; + } + else + { + // Bit mask of memory Vulkan types acceptable for this allocation. + uint32_t memoryTypeBits = vkMemReq.memoryTypeBits; + uint32_t memTypeIndex = UINT32_MAX; + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. + if(res != VK_SUCCESS) + return res; + do + { + VmaBlockVector* blockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(blockVector && "Trying to use unsupported memory type!"); + res = AllocateMemoryOfType( + VK_NULL_HANDLE, + vkMemReq.size, + vkMemReq.alignment, + requiresDedicatedAllocation || prefersDedicatedAllocation, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + createInfoFinal, + memTypeIndex, + suballocType, + m_DedicatedAllocations[memTypeIndex], + *blockVector, + allocationCount, + pAllocations); + // Allocation succeeded + if(res == VK_SUCCESS) + return VK_SUCCESS; - // Remove old memTypeIndex from list of possibilities. - memoryTypeBits &= ~(1u << memTypeIndex); - // Find alternative memTypeIndex. - res = vmaFindMemoryTypeIndex(this, memoryTypeBits, &createInfoFinal, &memTypeIndex); - } while(res == VK_SUCCESS); + // Remove old memTypeIndex from list of possibilities. + memoryTypeBits &= ~(1u << memTypeIndex); + // Find alternative memTypeIndex. + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + } while(res == VK_SUCCESS); - // No other matching memory type index could be found. - // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once. - return VK_ERROR_OUT_OF_DEVICE_MEMORY; + // No other matching memory type index could be found. + // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once. + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } } void VmaAllocator_T::FreeMemory( @@ -15521,16 +14855,16 @@ void VmaAllocator_T::FreeMemory( { VmaBlockVector* pBlockVector = VMA_NULL; VmaPool hPool = allocation->GetParentPool(); - const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); if(hPool != VK_NULL_HANDLE) { - pBlockVector = hPool->m_pBlockVectors[memTypeIndex]; + pBlockVector = &hPool->m_BlockVector; } else { + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); pBlockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(pBlockVector && "Trying to free memory of unsupported type!"); } - VMA_ASSERT(pBlockVector && "Trying to free memory of unsupported type!"); pBlockVector->Free(allocation); } break; @@ -15540,29 +14874,25 @@ void VmaAllocator_T::FreeMemory( default: VMA_ASSERT(0); } - - m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); - allocation->SetUserData(this, VMA_NULL); - m_AllocationObjectAllocator.Free(allocation); } } } -void VmaAllocator_T::CalculateStats(VmaStats* pStats) +void VmaAllocator_T::CalculateStatistics(VmaTotalStatistics* pStats) { // Initialize. - VmaInitStatInfo(pStats->total); - for(size_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) - VmaInitStatInfo(pStats->memoryType[i]); - for(size_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) - VmaInitStatInfo(pStats->memoryHeap[i]); + VmaClearDetailedStatistics(pStats->total); + for(uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) + VmaClearDetailedStatistics(pStats->memoryType[i]); + for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) + VmaClearDetailedStatistics(pStats->memoryHeap[i]); // Process default pools. for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; if (pBlockVector != VMA_NULL) - pBlockVector->AddStats(pStats); + pBlockVector->AddDetailedStatistics(pStats->memoryType[memTypeIndex]); } // Process custom pools. @@ -15570,33 +14900,34 @@ void VmaAllocator_T::CalculateStats(VmaStats* pStats) VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) { - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - if (pool->m_pBlockVectors[memTypeIndex]) - { - VmaBlockVector& blockVector = *pool->m_pBlockVectors[memTypeIndex]; - blockVector.AddStats(pStats); - const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex(); - const uint32_t memHeapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - pool->m_DedicatedAllocations[memTypeIndex].AddStats(pStats, memTypeIndex, memHeapIndex); - } - } + VmaBlockVector& blockVector = pool->m_BlockVector; + const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex(); + blockVector.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + pool->m_DedicatedAllocations.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); } } // Process dedicated allocations. for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) { - const uint32_t memHeapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - m_DedicatedAllocations[memTypeIndex].AddStats(pStats, memTypeIndex, memHeapIndex); + m_DedicatedAllocations[memTypeIndex].AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + + // Sum from memory types to memory heaps. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + const uint32_t memHeapIndex = m_MemProps.memoryTypes[memTypeIndex].heapIndex; + VmaAddDetailedStatistics(pStats->memoryHeap[memHeapIndex], pStats->memoryType[memTypeIndex]); } - // Postprocess. - VmaPostprocessCalcStatInfo(pStats->total); - for(size_t i = 0; i < GetMemoryTypeCount(); ++i) - VmaPostprocessCalcStatInfo(pStats->memoryType[i]); - for(size_t i = 0; i < GetMemoryHeapCount(); ++i) - VmaPostprocessCalcStatInfo(pStats->memoryHeap[i]); + // Sum from memory heaps to total. + for(uint32_t memHeapIndex = 0; memHeapIndex < GetMemoryHeapCount(); ++memHeapIndex) + VmaAddDetailedStatistics(pStats->total, pStats->memoryHeap[memHeapIndex]); + + VMA_ASSERT(pStats->total.statistics.allocationCount == 0 || + pStats->total.allocationSizeMax >= pStats->total.allocationSizeMin); + VMA_ASSERT(pStats->total.unusedRangeCount == 0 || + pStats->total.unusedRangeSizeMax >= pStats->total.unusedRangeSizeMin); } void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount) @@ -15611,13 +14942,15 @@ void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, u { const uint32_t heapIndex = firstHeap + i; - outBudgets->blockBytes = m_Budget.m_BlockBytes[heapIndex]; - outBudgets->allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; - if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) + if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->statistics.blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) { outBudgets->usage = m_Budget.m_VulkanUsage[heapIndex] + - outBudgets->blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + outBudgets->statistics.blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; } else { @@ -15642,66 +14975,17 @@ void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, u { const uint32_t heapIndex = firstHeap + i; - outBudgets->blockBytes = m_Budget.m_BlockBytes[heapIndex]; - outBudgets->allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; - outBudgets->usage = outBudgets->blockBytes; + outBudgets->usage = outBudgets->statistics.blockBytes; outBudgets->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. } } } -VkResult VmaAllocator_T::DefragmentationBegin( - const VmaDefragmentationInfo2& info, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext* pContext) -{ - if(info.pAllocationsChanged != VMA_NULL) - { - memset(info.pAllocationsChanged, 0, info.allocationCount * sizeof(VkBool32)); - } - - *pContext = vma_new(this, VmaDefragmentationContext_T)( - this, info.flags, pStats); - - (*pContext)->AddPools(info.poolCount, info.pPools); - (*pContext)->AddAllocations( - info.allocationCount, info.pAllocations, info.pAllocationsChanged); - - VkResult res = (*pContext)->Defragment( - info.maxCpuBytesToMove, info.maxCpuAllocationsToMove, - info.maxGpuBytesToMove, info.maxGpuAllocationsToMove, - info.commandBuffer, pStats, info.flags); - - if(res != VK_NOT_READY) - { - vma_delete(this, *pContext); - *pContext = VMA_NULL; - } - - return res; -} - -VkResult VmaAllocator_T::DefragmentationEnd( - VmaDefragmentationContext context) -{ - vma_delete(this, context); - return VK_SUCCESS; -} - -VkResult VmaAllocator_T::DefragmentationPassBegin( - VmaDefragmentationPassInfo* pInfo, - VmaDefragmentationContext context) -{ - return context->DefragmentPassBegin(pInfo); -} - -VkResult VmaAllocator_T::DefragmentationPassEnd( - VmaDefragmentationContext context) -{ - return context->DefragmentPassEnd(); -} - void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) { pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); @@ -15732,26 +15016,27 @@ VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPoo { return VK_ERROR_INITIALIZATION_FAILED; } + // Memory type index out of range or forbidden. + if(pCreateInfo->memoryTypeIndex >= GetMemoryTypeCount() || + ((1u << pCreateInfo->memoryTypeIndex) & m_GlobalMemoryTypeBits) == 0) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } if(newCreateInfo.minAllocationAlignment > 0) { VMA_ASSERT(VmaIsPow2(newCreateInfo.minAllocationAlignment)); } - *pPool = vma_new(this, VmaPool_T)(this, newCreateInfo); + const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex); - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + *pPool = vma_new(this, VmaPool_T)(this, newCreateInfo, preferredBlockSize); + + VkResult res = (*pPool)->m_BlockVector.CreateMinBlocks(); + if(res != VK_SUCCESS) { - // Create only supported types - if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0) - { - VkResult res = (*pPool)->m_pBlockVectors[memTypeIndex]->CreateMinBlocks(); - if(res != VK_SUCCESS) - { - vma_delete(this, *pPool); - *pPool = VMA_NULL; - return res; - } - } + vma_delete(this, *pPool); + *pPool = VMA_NULL; + return res; } // Add to m_Pools. @@ -15775,22 +15060,18 @@ void VmaAllocator_T::DestroyPool(VmaPool pool) vma_delete(this, pool); } -void VmaAllocator_T::GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats) +void VmaAllocator_T::GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats) { - pPoolStats->size = 0; - pPoolStats->unusedSize = 0; - pPoolStats->allocationCount = 0; - pPoolStats->unusedRangeCount = 0; - pPoolStats->blockCount = 0; + VmaClearStatistics(*pPoolStats); + pool->m_BlockVector.AddStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddStatistics(*pPoolStats); +} - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0) - { - pool->m_pBlockVectors[memTypeIndex]->AddPoolStats(pPoolStats); - pool->m_DedicatedAllocations[memTypeIndex].AddPoolStats(pPoolStats); - } - } +void VmaAllocator_T::CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats) +{ + VmaClearDetailedStatistics(*pPoolStats); + pool->m_BlockVector.AddDetailedStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddDetailedStatistics(*pPoolStats); } void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) @@ -15807,13 +15088,7 @@ void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) VkResult VmaAllocator_T::CheckPoolCorruption(VmaPool hPool) { - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0) - { - return hPool->m_pBlockVectors[memTypeIndex]->CheckCorruption(); - } - } + return hPool->m_BlockVector.CheckCorruption(); } VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) @@ -15845,21 +15120,18 @@ VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) { - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + if(((1u << pool->m_BlockVector.GetMemoryTypeIndex()) & memoryTypeBits) != 0) { - if(pool->m_pBlockVectors[memTypeIndex] && ((1u << memTypeIndex) & memoryTypeBits) != 0) + VkResult localRes = pool->m_BlockVector.CheckCorruption(); + switch(localRes) { - VkResult localRes = pool->m_pBlockVectors[memTypeIndex]->CheckCorruption(); - switch(localRes) - { - case VK_ERROR_FEATURE_NOT_PRESENT: - break; - case VK_SUCCESS: - finalRes = VK_SUCCESS; - break; - default: - return localRes; - } + case VK_ERROR_FEATURE_NOT_PRESENT: + break; + case VK_SUCCESS: + finalRes = VK_SUCCESS; + break; + default: + return localRes; } } } @@ -15903,6 +15175,7 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc { m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize; } + ++m_Budget.m_BlockCount[heapIndex]; // VULKAN CALL vkAllocateMemory. VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); @@ -15923,6 +15196,7 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc } else { + --m_Budget.m_BlockCount[heapIndex]; m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize; } @@ -15940,7 +15214,9 @@ void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, Vk // VULKAN CALL vkFreeMemory. (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); - m_Budget.m_BlockBytes[MemoryTypeIndexToHeapIndex(memoryType)] -= size; + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); + --m_Budget.m_BlockCount[heapIndex]; + m_Budget.m_BlockBytes[heapIndex] -= size; --m_DeviceMemoryCount; } @@ -16181,7 +15457,7 @@ void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) else { // Custom pool - parentPool->m_DedicatedAllocations[memTypeIndex].Unregister(allocation); + parentPool->m_DedicatedAllocations.Unregister(allocation); } VkDeviceMemory hMemory = allocation->GetMemory(); @@ -16198,6 +15474,9 @@ void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); + m_AllocationObjectAllocator.Free(allocation); + VMA_DEBUG_LOG(" Freed DedicatedMemory MemoryTypeIndex=%u", memTypeIndex); } @@ -16456,18 +15735,12 @@ void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) json.EndString(); json.BeginObject(); - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - if (pool->m_pBlockVectors[memTypeIndex]) - { - pool->m_pBlockVectors[memTypeIndex]->PrintDetailedMap(json); - } + pool->m_BlockVector.PrintDetailedMap(json); - if (!pool->m_DedicatedAllocations[memTypeIndex].IsEmpty()) - { - json.WriteString("DedicatedAllocations"); - pool->m_DedicatedAllocations->BuildStatsString(json); - } + if (!pool->m_DedicatedAllocations.IsEmpty()) + { + json.WriteString("DedicatedAllocations"); + pool->m_DedicatedAllocations.BuildStatsString(json); } json.EndObject(); } @@ -16554,13 +15827,13 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( allocator->SetCurrentFrameIndex(frameIndex); } -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStats( +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( VmaAllocator allocator, - VmaStats* pStats) + VmaTotalStatistics* pStats) { VMA_ASSERT(allocator && pStats); VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->CalculateStats(pStats); + allocator->CalculateStatistics(pStats); } VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( @@ -16590,11 +15863,11 @@ VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount()); - VmaStats stats; - allocator->CalculateStats(&stats); + VmaTotalStatistics stats; + allocator->CalculateStatistics(&stats); json.WriteString("Total"); - VmaPrintStatInfo(json, stats.total); + VmaPrintDetailedStatistics(json, stats.total); for(uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) { @@ -16618,9 +15891,13 @@ VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( json.BeginObject(); { json.WriteString("BlockBytes"); - json.WriteNumber(budgets[heapIndex].blockBytes); + json.WriteNumber(budgets[heapIndex].statistics.blockBytes); json.WriteString("AllocationBytes"); - json.WriteNumber(budgets[heapIndex].allocationBytes); + json.WriteNumber(budgets[heapIndex].statistics.allocationBytes); + json.WriteString("BlockCount"); + json.WriteNumber(budgets[heapIndex].statistics.blockCount); + json.WriteString("AllocationCount"); + json.WriteNumber(budgets[heapIndex].statistics.allocationCount); json.WriteString("Usage"); json.WriteNumber(budgets[heapIndex].usage); json.WriteString("Budget"); @@ -16628,10 +15905,10 @@ VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( } json.EndObject(); - if(stats.memoryHeap[heapIndex].blockCount > 0) + if(stats.memoryHeap[heapIndex].statistics.blockCount > 0) { json.WriteString("Stats"); - VmaPrintStatInfo(json, stats.memoryHeap[heapIndex]); + VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]); } for(uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) @@ -16685,10 +15962,10 @@ VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( #endif // #if VK_AMD_device_coherent_memory json.EndArray(); - if(stats.memoryType[typeIndex].blockCount > 0) + if(stats.memoryType[typeIndex].statistics.blockCount > 0) { json.WriteString("Stats"); - VmaPrintStatInfo(json, stats.memoryType[typeIndex]); + VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]); } json.EndObject(); @@ -16734,91 +16011,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); - memoryTypeBits &= allocator->GetGlobalMemoryTypeBits(); - - if(pAllocationCreateInfo->memoryTypeBits != 0) - { - memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; - } - - uint32_t requiredFlags = pAllocationCreateInfo->requiredFlags; - uint32_t preferredFlags = pAllocationCreateInfo->preferredFlags; - uint32_t notPreferredFlags = 0; - - // Convert usage to requiredFlags and preferredFlags. - switch(pAllocationCreateInfo->usage) - { - case VMA_MEMORY_USAGE_UNKNOWN: - break; - case VMA_MEMORY_USAGE_GPU_ONLY: - if(!allocator->IsIntegratedGpu() || (preferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) - { - preferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - } - break; - case VMA_MEMORY_USAGE_CPU_ONLY: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - break; - case VMA_MEMORY_USAGE_CPU_TO_GPU: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - if(!allocator->IsIntegratedGpu() || (preferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) - { - preferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - } - break; - case VMA_MEMORY_USAGE_GPU_TO_CPU: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - preferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - break; - case VMA_MEMORY_USAGE_CPU_COPY: - notPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - break; - case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: - requiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; - break; - default: - VMA_ASSERT(0); - break; - } - - // Avoid DEVICE_COHERENT unless explicitly requested. - if(((pAllocationCreateInfo->requiredFlags | pAllocationCreateInfo->preferredFlags) & - (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) - { - notPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY; - } - - *pMemoryTypeIndex = UINT32_MAX; - uint32_t minCost = UINT32_MAX; - for(uint32_t memTypeIndex = 0, memTypeBit = 1; - memTypeIndex < allocator->GetMemoryTypeCount(); - ++memTypeIndex, memTypeBit <<= 1) - { - // This memory type is acceptable according to memoryTypeBits bitmask. - if((memTypeBit & memoryTypeBits) != 0) - { - const VkMemoryPropertyFlags currFlags = - allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags; - // This memory type contains requiredFlags. - if((requiredFlags & ~currFlags) == 0) - { - // Calculate cost as number of bits from preferredFlags not present in this memory type. - uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags) + - VmaCountBitsSet(currFlags & notPreferredFlags); - // Remember memory type with lowest cost. - if(currCost < minCost) - { - *pMemoryTypeIndex = memTypeIndex; - if(currCost == 0) - { - return VK_SUCCESS; - } - minCost = currCost; - } - } - } - } - return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; + return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, UINT32_MAX, pMemoryTypeIndex); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( @@ -16833,24 +16026,40 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); const VkDevice hDev = allocator->m_hDevice; - VkBuffer hBuffer = VK_NULL_HANDLE; const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); - VkResult res = funcs->vkCreateBuffer( - hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer); - if(res == VK_SUCCESS) + VkResult res; + +#if VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceBufferMemoryRequirements) { - VkMemoryRequirements memReq = {}; - funcs->vkGetBufferMemoryRequirements( - hDev, hBuffer, &memReq); + // Can query straight from VkBufferCreateInfo :) + VkDeviceBufferMemoryRequirements devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS}; + devBufMemReq.pCreateInfo = pBufferCreateInfo; - res = vmaFindMemoryTypeIndex( - allocator, - memReq.memoryTypeBits, - pAllocationCreateInfo, - pMemoryTypeIndex); + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceBufferMemoryRequirements)(hDev, &devBufMemReq, &memReq); - funcs->vkDestroyBuffer( - hDev, hBuffer, allocator->GetAllocationCallbacks()); + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, pBufferCreateInfo->usage, pMemoryTypeIndex); + } + else +#endif // #if VMA_VULKAN_VERSION >= 1003000 + { + // Must create a dummy buffer to query :( + VkBuffer hBuffer = VK_NULL_HANDLE; + res = funcs->vkCreateBuffer( + hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetBufferMemoryRequirements(hDev, hBuffer, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, pBufferCreateInfo->usage, pMemoryTypeIndex); + + funcs->vkDestroyBuffer( + hDev, hBuffer, allocator->GetAllocationCallbacks()); + } } return res; } @@ -16867,24 +16076,42 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); const VkDevice hDev = allocator->m_hDevice; - VkImage hImage = VK_NULL_HANDLE; const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); - VkResult res = funcs->vkCreateImage( - hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage); - if(res == VK_SUCCESS) + VkResult res; + +#if VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceImageMemoryRequirements) + { + // Can query straight from VkImageCreateInfo :) + VkDeviceImageMemoryRequirements devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS}; + devImgMemReq.pCreateInfo = pImageCreateInfo; + VMA_ASSERT(pImageCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY && (pImageCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT_COPY) == 0 && + "Cannot use this VkImageCreateInfo with vmaFindMemoryTypeIndexForImageInfo as I don't know what to pass as VkDeviceImageMemoryRequirements::planeAspect."); + + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceImageMemoryRequirements)(hDev, &devImgMemReq, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, pImageCreateInfo->usage, pMemoryTypeIndex); + } + else +#endif // #if VMA_VULKAN_VERSION >= 1003000 { - VkMemoryRequirements memReq = {}; - funcs->vkGetImageMemoryRequirements( - hDev, hImage, &memReq); + // Must create a dummy image to query :( + VkImage hImage = VK_NULL_HANDLE; + res = funcs->vkCreateImage( + hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetImageMemoryRequirements(hDev, hImage, &memReq); - res = vmaFindMemoryTypeIndex( - allocator, - memReq.memoryTypeBits, - pAllocationCreateInfo, - pMemoryTypeIndex); + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, pImageCreateInfo->usage, pMemoryTypeIndex); - funcs->vkDestroyImage( - hDev, hImage, allocator->GetAllocationCallbacks()); + funcs->vkDestroyImage( + hDev, hImage, allocator->GetAllocationCallbacks()); + } } return res; } @@ -16921,16 +16148,28 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( allocator->DestroyPool(pool); } -VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStats( +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( + VmaAllocator allocator, + VmaPool pool, + VmaStatistics* pPoolStats) +{ + VMA_ASSERT(allocator && pool && pPoolStats); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetPoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( VmaAllocator allocator, VmaPool pool, - VmaPoolStats* pPoolStats) + VmaDetailedStatistics* pPoolStats) { VMA_ASSERT(allocator && pool && pPoolStats); VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->GetPoolStats(pool, pPoolStats); + allocator->CalculatePoolStatistics(pool, pPoolStats); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) @@ -16990,8 +16229,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( false, // requiresDedicatedAllocation false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + UINT32_MAX, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, 1, // allocationCount @@ -17029,8 +16268,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( false, // requiresDedicatedAllocation false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + UINT32_MAX, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, allocationCount, @@ -17072,8 +16311,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( requiresDedicatedAllocation, prefersDedicatedAllocation, buffer, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + UINT32_MAX, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount @@ -17111,8 +16350,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( requiresDedicatedAllocation, prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage image, // dedicatedImage + UINT32_MAX, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, 1, // allocationCount @@ -17319,123 +16558,64 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( return allocator->CheckCorruption(memoryTypeBits); } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragment( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( VmaAllocator allocator, - const VmaAllocation* pAllocations, - size_t allocationCount, - VkBool32* pAllocationsChanged, - const VmaDefragmentationInfo *pDefragmentationInfo, - VmaDefragmentationStats* pDefragmentationStats) -{ - // Deprecated interface, reimplemented using new one. - - VmaDefragmentationInfo2 info2 = {}; - info2.allocationCount = (uint32_t)allocationCount; - info2.pAllocations = pAllocations; - info2.pAllocationsChanged = pAllocationsChanged; - if(pDefragmentationInfo != VMA_NULL) - { - info2.maxCpuAllocationsToMove = pDefragmentationInfo->maxAllocationsToMove; - info2.maxCpuBytesToMove = pDefragmentationInfo->maxBytesToMove; - } - else - { - info2.maxCpuAllocationsToMove = UINT32_MAX; - info2.maxCpuBytesToMove = VK_WHOLE_SIZE; - } - // info2.flags, maxGpuAllocationsToMove, maxGpuBytesToMove, commandBuffer deliberately left zero. - - VmaDefragmentationContext ctx; - VkResult res = vmaDefragmentationBegin(allocator, &info2, pDefragmentationStats, &ctx); - if(res == VK_NOT_READY) - { - res = vmaDefragmentationEnd( allocator, ctx); - } - return res; -} - -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationBegin( - VmaAllocator allocator, - const VmaDefragmentationInfo2* pInfo, - VmaDefragmentationStats* pStats, - VmaDefragmentationContext *pContext) + const VmaDefragmentationInfo* pInfo, + VmaDefragmentationContext* pContext) { VMA_ASSERT(allocator && pInfo && pContext); - // Degenerate case: Nothing to defragment. - if(pInfo->allocationCount == 0 && pInfo->poolCount == 0) - { - return VK_SUCCESS; - } - - VMA_ASSERT(pInfo->allocationCount == 0 || pInfo->pAllocations != VMA_NULL); - VMA_ASSERT(pInfo->poolCount == 0 || pInfo->pPools != VMA_NULL); - VMA_HEAVY_ASSERT(VmaValidatePointerArray(pInfo->allocationCount, pInfo->pAllocations)); - VMA_HEAVY_ASSERT(VmaValidatePointerArray(pInfo->poolCount, pInfo->pPools)); - - VMA_DEBUG_LOG("vmaDefragmentationBegin"); + VMA_DEBUG_LOG("vmaBeginDefragmentation"); VMA_DEBUG_GLOBAL_MUTEX_LOCK - VkResult res = allocator->DefragmentationBegin(*pInfo, pStats, pContext); - - return res; + *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo); + return VK_SUCCESS; } -VMA_CALL_PRE VkResult VMA_CALL_POST vmaDefragmentationEnd( +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentation( VmaAllocator allocator, - VmaDefragmentationContext context) + VmaDefragmentationContext context, + VmaDefragmentationStats* pStats) { - VMA_ASSERT(allocator); + VMA_ASSERT(allocator && context); - VMA_DEBUG_LOG("vmaDefragmentationEnd"); + VMA_DEBUG_LOG("vmaEndDefragmentation"); - if(context != VK_NULL_HANDLE) - { - VMA_DEBUG_GLOBAL_MUTEX_LOCK - return allocator->DefragmentationEnd(context); - } - else - { - return VK_SUCCESS; - } + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if (pStats) + context->GetStats(*pStats); + vma_delete(allocator, context); + return VK_SUCCESS; } VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( - VmaAllocator allocator, - VmaDefragmentationContext context, - VmaDefragmentationPassInfo* pInfo - ) + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) { - VMA_ASSERT(allocator); - VMA_ASSERT(pInfo); + VMA_ASSERT(context && pPassInfo); VMA_DEBUG_LOG("vmaBeginDefragmentationPass"); VMA_DEBUG_GLOBAL_MUTEX_LOCK - if(context == VK_NULL_HANDLE) - { - pInfo->moveCount = 0; - return VK_SUCCESS; - } - - return allocator->DefragmentationPassBegin(pInfo, context); + return context->DefragmentPassBegin(*pPassInfo); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( - VmaAllocator allocator, - VmaDefragmentationContext context) + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) { - VMA_ASSERT(allocator); + VMA_ASSERT(context && pPassInfo); VMA_DEBUG_LOG("vmaEndDefragmentationPass"); - VMA_DEBUG_GLOBAL_MUTEX_LOCK - if(context == VK_NULL_HANDLE) - return VK_SUCCESS; + VMA_DEBUG_GLOBAL_MUTEX_LOCK - return allocator->DefragmentationPassEnd(context); + return context->DefragmentPassEnd(*pPassInfo); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( @@ -17547,8 +16727,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( requiresDedicatedAllocation, prefersDedicatedAllocation, *pBuffer, // dedicatedBuffer - pBufferCreateInfo->usage, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + pBufferCreateInfo->usage, // dedicatedBufferImageUsage *pAllocationCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount @@ -17642,8 +16822,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( requiresDedicatedAllocation, prefersDedicatedAllocation, *pBuffer, // dedicatedBuffer - pBufferCreateInfo->usage, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + pBufferCreateInfo->usage, // dedicatedBufferImageUsage *pAllocationCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount @@ -17763,8 +16943,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( requiresDedicatedAllocation, prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage *pImage, // dedicatedImage + pImageCreateInfo->usage, // dedicatedBufferImageUsage *pAllocationCreateInfo, suballocType, 1, // allocationCount @@ -17917,13 +17097,22 @@ VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(VmaVirtualBlock virtualBlock->SetAllocationUserData(allocation, pUserData); } -VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStats(VmaVirtualBlock VMA_NOT_NULL virtualBlock, - VmaStatInfo* VMA_NOT_NULL pStatInfo) +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats) { - VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStatInfo != VMA_NULL); - VMA_DEBUG_LOG("vmaCalculateVirtualBlockStats"); + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualBlockStatistics"); VMA_DEBUG_GLOBAL_MUTEX_LOCK; - virtualBlock->CalculateStats(*pStatInfo); + virtualBlock->GetStatistics(*pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaCalculateVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->CalculateDetailedStatistics(*pStats); } #if VMA_STATS_STRING_ENABLED @@ -18054,7 +17243,7 @@ bufferInfo.size = 65536; bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocInfo = {}; -allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; VkBuffer buffer; VmaAllocation allocation; @@ -18078,8 +17267,8 @@ appropriate members of VmaAllocationCreateInfo structure, as described below. You can also combine multiple methods. -# If you just want to find memory type index that meets your requirements, you - can use function: vmaFindMemoryTypeIndex(), vmaFindMemoryTypeIndexForBufferInfo(), - vmaFindMemoryTypeIndexForImageInfo(). + can use function: vmaFindMemoryTypeIndexForBufferInfo(), + vmaFindMemoryTypeIndexForImageInfo(), vmaFindMemoryTypeIndex(). -# If you want to allocate a region of device memory without association with any specific image or buffer, you can use function vmaAllocateMemory(). Usage of this function is not recommended and usually not needed. @@ -18090,9 +17279,10 @@ You can also combine multiple methods. vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). --# If you want to create a buffer or an image, allocate memory for it and bind +-# **This is the easiest and recommended way to use this library:** + If you want to create a buffer or an image, allocate memory for it and bind them together, all in one call, you can use function vmaCreateBuffer(), - vmaCreateImage(). This is the easiest and recommended way to use this library. + vmaCreateImage(). When using 3. or 4., the library internally queries Vulkan for memory types supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) @@ -18110,11 +17300,12 @@ It is valid, although not very useful. The easiest way to specify memory requirements is to fill member VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage. It defines high level, common usage types. -For more details, see description of this enum. +Since version 3 of the library, it is recommended to use #VMA_MEMORY_USAGE_AUTO to let it select best memory type for your resource automatically. For example, if you want to create a uniform buffer that will be filled using -transfer only once or infrequently and used for rendering every frame, you can -do it using following code: +transfer only once or infrequently and then used for rendering every frame as a uniform buffer, you can +do it using following code. The buffer will most likely end up in a memory type with +`VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT` to be fast to access by the GPU device. \code VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; @@ -18122,13 +17313,56 @@ bufferInfo.size = 65536; bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocInfo = {}; -allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; VkBuffer buffer; VmaAllocation allocation; vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); \endcode +If you have a preference for putting the resource in GPU (device) memory or CPU (host) memory +on systems with discrete graphics card that have the memories separate, you can use +#VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST. + +When using `VMA_MEMORY_USAGE_AUTO*` while you want to map the allocated memory, +you also need to specify one of the host access flags: +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +This will help the library decide about preferred memory type to ensure it has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +so you can map it. + +For example, a staging buffer that will be filled via mapped pointer and then +used as a source of transfer to the buffer decribed previously can be created like this. +It will likely and up in a memory type that is `HOST_VISIBLE` and `HOST_COHERENT` +but not `HOST_CACHED` (meaning uncached, write-combined) and not `DEVICE_LOCAL` (meaning system RAM). + +\code +VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +stagingBufferInfo.size = 65536; +stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo stagingAllocInfo = {}; +stagingAllocInfo.usage = VMA_MEMORY_USAGE_AUTO; +stagingAllocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer stagingBuffer; +VmaAllocation stagingAllocation; +vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer, &stagingAllocation, nullptr); +\endcode + +For more examples of creating different kinds of resources, see chapter \ref usage_patterns. + +Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows +about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed, +so they work with functions like: vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo() etc. +If you allocate raw memory using function vmaAllocateMemory(), you have to use other means of selecting +memory type, as decribed below. + +\note +Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`, +`VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`) +are still available and work same way as in previous versions of the library +for backward compatibility, but they are not recommended. + \section choosing_memory_type_required_preferred_flags Required and preferred flags You can specify more detailed requirements by filling members @@ -18142,7 +17376,7 @@ use following code: VmaAllocationCreateInfo allocInfo = {}; allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; -allocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; +allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; VkBuffer buffer; VmaAllocation allocation; @@ -18152,8 +17386,8 @@ vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullpt A memory type is chosen that has all the required flags and as many preferred flags set as possible. -If you use VmaAllocationCreateInfo::usage, it is just internally converted to -a set of required and preferred flags. +Value passed in VmaAllocationCreateInfo::usage is internally converted to a set of required and preferred flags, +plus some extra "magic" (heuristics). \section choosing_memory_type_explicit_memory_types Explicit memory types @@ -18220,6 +17454,13 @@ Mapping the same `VkDeviceMemory` block multiple times is illegal - only one map This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan. Because of this, Vulkan Memory Allocator provides following facilities: +\note If you want to be able to map an allocation, you need to specify one of the flags +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable +when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values. +For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable, +but they can still be used for consistency. + \section memory_mapping_mapping_functions Mapping functions The library provides following functions for mapping of a specific #VmaAllocation: vmaMapMemory(), vmaUnmapMemory(). @@ -18232,16 +17473,15 @@ Example: \code // Having these objects initialized: - struct ConstantBuffer { ... }; -ConstantBuffer constantBufferData; +ConstantBuffer constantBufferData = ... -VmaAllocator allocator; -VkBuffer constantBuffer; -VmaAllocation constantBufferAllocation; +VmaAllocator allocator = ... +VkBuffer constantBuffer = ... +VmaAllocation constantBufferAllocation = ... // You can map and fill your buffer using following code: @@ -18278,8 +17518,9 @@ bufCreateInfo.size = sizeof(ConstantBuffer); bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; VkBuffer buf; VmaAllocation alloc; @@ -18290,18 +17531,12 @@ vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allo memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); \endcode -There are some exceptions though, when you should consider mapping memory only for a short period of time: - -- When operating system is Windows 7 or 8.x (Windows 10 is not affected because it uses WDDM2), - device is discrete AMD GPU, - and memory type is the special 256 MiB pool of `DEVICE_LOCAL + HOST_VISIBLE` memory - (selected when you use #VMA_MEMORY_USAGE_CPU_TO_GPU), - then whenever a memory block allocated from this memory type stays mapped - for the time of any call to `vkQueueSubmit()` or `vkQueuePresentKHR()`, this - block is migrated by WDDM to system RAM, which degrades performance. It doesn't - matter if that particular memory block is actually used by the command buffer - being submitted. -- Keeping many large memory blocks mapped may impact performance or stability of some debugging tools. +\note #VMA_ALLOCATION_CREATE_MAPPED_BIT by itself doesn't guarantee that the allocation will end up +in a mappable memory type. +For this, you need to also specify #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +#VMA_ALLOCATION_CREATE_MAPPED_BIT only guarantees that if the memory is `HOST_VISIBLE`, the allocation will be mapped on creation. +For an example of how to make use of this fact, see section \ref usage_patterns_advanced_data_uploading. \section memory_mapping_cache_control Cache flush and invalidate @@ -18322,86 +17557,9 @@ In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocatio within blocks are aligned to this value, so their offsets are always multiply of `nonCoherentAtomSize` and two different allocations never share same "line" of this size. -Please note that memory allocated with #VMA_MEMORY_USAGE_CPU_ONLY is guaranteed to be `HOST_COHERENT`. - -Also, Windows drivers from all 3 **PC** GPU vendors (AMD, Intel, NVIDIA) +Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA) currently provide `HOST_COHERENT` flag on all memory types that are -`HOST_VISIBLE`, so on this platform you may not need to bother. - -\section memory_mapping_finding_if_memory_mappable Finding out if memory is mappable - -It may happen that your allocation ends up in memory that is `HOST_VISIBLE` (available for mapping) -despite it wasn't explicitly requested. -For example, application may work on integrated graphics with unified memory (like Intel) or -allocation from video memory might have failed, so the library chose system memory as fallback. - -You can detect this case and map such allocation to access its memory on CPU directly, -instead of launching a transfer operation. -In order to do that: call vmaGetAllocationMemoryProperties() -and look for `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. - -\code -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = sizeof(ConstantBuffer); -bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - -VkBuffer buf; -VmaAllocation alloc; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); - -VkMemoryPropertyFlags memFlags; -vmaGetAllocationMemoryProperties(allocator, alloc, &memFlags); -if((memFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) -{ - // Allocation ended up in mappable memory. You can map it and access it directly. - void* mappedData; - vmaMapMemory(allocator, alloc, &mappedData); - memcpy(mappedData, &constantBufferData, sizeof(constantBufferData)); - vmaUnmapMemory(allocator, alloc); -} -else -{ - // Allocation ended up in non-mappable memory. - // You need to create CPU-side buffer in VMA_MEMORY_USAGE_CPU_ONLY and make a transfer. -} -\endcode - -You can even use #VMA_ALLOCATION_CREATE_MAPPED_BIT flag while creating allocations -that are not necessarily `HOST_VISIBLE` (e.g. using #VMA_MEMORY_USAGE_GPU_ONLY). -If the allocation ends up in memory type that is `HOST_VISIBLE`, it will be persistently mapped and you can use it directly. -If not, the flag is just ignored. -Example: - -\code -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = sizeof(ConstantBuffer); -bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - -VkBuffer buf; -VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); - -if(allocInfo.pMappedData != nullptr) -{ - // Allocation ended up in mappable memory. - // It is persistently mapped. You can access it directly. - memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); -} -else -{ - // Allocation ended up in non-mappable memory. - // You need to create CPU-side buffer in VMA_MEMORY_USAGE_CPU_ONLY and make a transfer. -} -\endcode +`HOST_VISIBLE`, so on PC you may not need to bother. \page staying_within_budget Staying within budget @@ -18426,8 +17584,8 @@ To query for current memory usage and available budget, use function vmaGetHeapB Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap. Please note that this function returns different information and works faster than -vmaCalculateStats(). vmaGetHeapBudgets() can be called every frame or even before every -allocation, while vmaCalculateStats() is intended to be used rarely, +vmaCalculateStatistics(). vmaGetHeapBudgets() can be called every frame or even before every +allocation, while vmaCalculateStatistics() is intended to be used rarely, only to obtain statistical information, e.g. for debugging purposes. It is recommended to use <b>VK_EXT_memory_budget</b> device extension to obtain information @@ -18457,20 +17615,27 @@ budget, by default the library still tries to create it, leaving it to the Vulka implementation whether the allocation succeeds or fails. You can change this behavior by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is not made if it would exceed the budget or if the budget is already exceeded. -The allocation then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +VMA then tries to make the allocation from the next eligible Vulkan memory type. +The all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag when creating resources that are not essential for the application (e.g. the texture of a specific object) and not to pass it when creating critically important resources (e.g. render targets). +On AMD graphics cards there is a custom vendor extension available: <b>VK_AMD_memory_overallocation_behavior</b> +that allows to control the behavior of the Vulkan implementation in out-of-memory cases - +whether it should fail with an error code or still allow the allocation. +Usage of this extension involves only passing extra structure on Vulkan device creation, +so it is out of scope of this library. + Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure a new allocation is created only when it fits inside one of the existing memory blocks. If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. This also ensures that the function call is very fast because it never goes to Vulkan to obtain a new block. -Please note that creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount -set to more than 0 will try to allocate memory blocks without checking whether they +\note Creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount +set to more than 0 will currently try to allocate memory blocks without checking whether they fit within budget. @@ -18544,7 +17709,7 @@ finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBi // Validate if(finalMemReq.memoryTypeBits != 0) VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; VmaAllocation alloc; res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr); @@ -18573,7 +17738,7 @@ See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ - You can create more complex layout where different images and buffers are bound at different offsets inside one large allocation. For example, one can imagine a big texture used in some render passes, aliasing with a set of many small buffers -used between in some further passes. To bind a resource at non-zero offset of an allocation, +used between in some further passes. To bind a resource at non-zero offset in an allocation, use vmaBindBufferMemory2() / vmaBindImageMemory2(). - Before allocating memory for the resources you want to alias, check `memoryTypeBits` returned in memory requirements of each resource to make sure the bits overlap. @@ -18598,6 +17763,7 @@ It can be useful if you want to: - Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool. - Use extra parameters for a set of your allocations that are available in #VmaPoolCreateInfo but not in #VmaAllocationCreateInfo - e.g., custom minimum alignment, custom `pNext` chain. +- Perform defragmentation on a specific subset of your allocations. To use custom memory pools: @@ -18644,6 +17810,14 @@ It is supported only when VmaPoolCreateInfo::blockSize = 0. To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +\note Excessive use of custom pools is a common mistake when using this library. +Custom pools may be useful for special purposes - when you want to +keep certain type of resources separate e.g. to reserve minimum amount of memory +for them or limit maximum amount of memory they can occupy. For most +resources this is not needed and so it is not recommended to create #VmaPool +objects and allocations out of them. Allocating from the default pool is sufficient. + + \section custom_memory_pools_MemTypeIndex Choosing memory type index When creating a pool, you must explicitly specify memory type index. @@ -18654,11 +17828,11 @@ that you are going to create in that pool. \code VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -exampleBufCreateInfo.size = 1024; // Whatever. -exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; // Change if needed. +exampleBufCreateInfo.size = 1024; // Doesn't matter +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; // Change if needed. +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; uint32_t memTypeIndex; vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex); @@ -18759,39 +17933,6 @@ you can achieve behavior of a ring buffer / queue. Ring buffer is available only in pools with one memory block - VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. -\section buddy_algorithm Buddy allocation algorithm - -There is another allocation algorithm that can be used with custom pools, called -"buddy". Its internal data structure is based on a binary tree of blocks, each having -size that is a power of two and a half of its parent's size. When you want to -allocate memory of certain size, a free node in the tree is located. If it is too -large, it is recursively split into two halves (called "buddies"). However, if -requested allocation size is not a power of two, the size of the allocation is -aligned up to the nearest power of two and the remaining space is wasted. When -two buddy nodes become free, they are merged back into one larger node. - -![Buddy allocator](../gfx/Buddy_allocator.png) - -The advantage of buddy allocation algorithm over default algorithm is faster -allocation and deallocation, as well as smaller external fragmentation. The -disadvantage is more wasted space (internal fragmentation). -For more information, please search the Internet for "Buddy memory allocation" - -sources that describe this concept in general. - -To use buddy allocation algorithm with a custom pool, add flag -#VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating -#VmaPool object. - -Several limitations apply to pools that use buddy algorithm: - -- It is recommended to use VmaPoolCreateInfo::blockSize that is a power of two. - Otherwise, only largest power of two smaller than the size is used for - allocations. The remaining space always stays unused. -- [Margins](@ref debugging_memory_usage_margins) and - [corruption detection](@ref debugging_memory_usage_corruption_detection) - don't work in such pools. -- [Defragmentation](@ref defragmentation) doesn't work with allocations made from - such pool. \page defragmentation Defragmentation @@ -18801,236 +17942,114 @@ to find a continuous range of free memory for a new allocation despite there is enough free space, just scattered across many small free ranges between existing allocations. -To mitigate this problem, you can use defragmentation feature: -structure #VmaDefragmentationInfo2, function vmaDefragmentationBegin(), vmaDefragmentationEnd(). -Given set of allocations, -this function can move them to compact used memory, ensure more continuous free -space and possibly also free some `VkDeviceMemory` blocks. - -What the defragmentation does is: - -- Updates #VmaAllocation objects to point to new `VkDeviceMemory` and offset. - After allocation has been moved, its VmaAllocationInfo::deviceMemory and/or - VmaAllocationInfo::offset changes. You must query them again using - vmaGetAllocationInfo() if you need them. -- Moves actual data in memory. - -What it doesn't do, so you need to do it yourself: - -- Recreate buffers and images that were bound to allocations that were defragmented and - bind them with their new places in memory. - You must use `vkDestroyBuffer()`, `vkDestroyImage()`, - `vkCreateBuffer()`, `vkCreateImage()`, vmaBindBufferMemory(), vmaBindImageMemory() - for that purpose and NOT vmaDestroyBuffer(), - vmaDestroyImage(), vmaCreateBuffer(), vmaCreateImage(), because you don't need to - destroy or create allocation objects! -- Recreate views and update descriptors that point to these buffers and images. - -\section defragmentation_cpu Defragmenting CPU memory - -Following example demonstrates how you can run defragmentation on CPU. -Only allocations created in memory types that are `HOST_VISIBLE` can be defragmented. -Others are ignored. - -The way it works is: - -- It temporarily maps entire memory blocks when necessary. -- It moves data using `memmove()` function. - -\code -// Given following variables already initialized: -VkDevice device; -VmaAllocator allocator; -std::vector<VkBuffer> buffers; -std::vector<VmaAllocation> allocations; - - -const uint32_t allocCount = (uint32_t)allocations.size(); -std::vector<VkBool32> allocationsChanged(allocCount); - -VmaDefragmentationInfo2 defragInfo = {}; -defragInfo.allocationCount = allocCount; -defragInfo.pAllocations = allocations.data(); -defragInfo.pAllocationsChanged = allocationsChanged.data(); -defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE; // No limit. -defragInfo.maxCpuAllocationsToMove = UINT32_MAX; // No limit. - -VmaDefragmentationContext defragCtx; -vmaDefragmentationBegin(allocator, &defragInfo, nullptr, &defragCtx); -vmaDefragmentationEnd(allocator, defragCtx); - -for(uint32_t i = 0; i < allocCount; ++i) -{ - if(allocationsChanged[i]) - { - // Destroy buffer that is immutably bound to memory region which is no longer valid. - vkDestroyBuffer(device, buffers[i], nullptr); - - // Create new buffer with same parameters. - VkBufferCreateInfo bufferInfo = ...; - vkCreateBuffer(device, &bufferInfo, nullptr, &buffers[i]); - - // You can make dummy call to vkGetBufferMemoryRequirements here to silence validation layer warning. - - // Bind new buffer to new memory region. Data contained in it is already moved. - VmaAllocationInfo allocInfo; - vmaGetAllocationInfo(allocator, allocations[i], &allocInfo); - vmaBindBufferMemory(allocator, allocations[i], buffers[i]); - } -} -\endcode - -Setting VmaDefragmentationInfo2::pAllocationsChanged is optional. -This output array tells whether particular allocation in VmaDefragmentationInfo2::pAllocations at the same index -has been modified during defragmentation. -You can pass null, but you then need to query every allocation passed to defragmentation -for new parameters using vmaGetAllocationInfo() if you might need to recreate and rebind a buffer or image associated with it. - -If you use [Custom memory pools](@ref choosing_memory_type_custom_memory_pools), -you can fill VmaDefragmentationInfo2::poolCount and VmaDefragmentationInfo2::pPools -instead of VmaDefragmentationInfo2::allocationCount and VmaDefragmentationInfo2::pAllocations -to defragment all allocations in given pools. -You cannot use VmaDefragmentationInfo2::pAllocationsChanged in that case. -You can also combine both methods. - -\section defragmentation_gpu Defragmenting GPU memory - -It is also possible to defragment allocations created in memory types that are not `HOST_VISIBLE`. -To do that, you need to pass a command buffer that meets requirements as described in -VmaDefragmentationInfo2::commandBuffer. The way it works is: - -- It creates temporary buffers and binds them to entire memory blocks when necessary. -- It issues `vkCmdCopyBuffer()` to passed command buffer. +To mitigate this problem, you can use defragmentation feature. +It doesn't happen automatically though and needs your cooperation, +because VMA is a low level library that only allocates memory. +It cannot recreate buffers and images in a new place as it doesn't remember the contents of `VkBufferCreateInfo` / `VkImageCreateInfo` structures. +It cannot copy their contents as it doesn't record any commands to a command buffer. Example: \code -// Given following variables already initialized: -VkDevice device; -VmaAllocator allocator; -VkCommandBuffer commandBuffer; -std::vector<VkBuffer> buffers; -std::vector<VmaAllocation> allocations; - - -const uint32_t allocCount = (uint32_t)allocations.size(); -std::vector<VkBool32> allocationsChanged(allocCount); - -VkCommandBufferBeginInfo cmdBufBeginInfo = ...; -vkBeginCommandBuffer(commandBuffer, &cmdBufBeginInfo); - -VmaDefragmentationInfo2 defragInfo = {}; -defragInfo.allocationCount = allocCount; -defragInfo.pAllocations = allocations.data(); -defragInfo.pAllocationsChanged = allocationsChanged.data(); -defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE; // Notice it is "GPU" this time. -defragInfo.maxGpuAllocationsToMove = UINT32_MAX; // Notice it is "GPU" this time. -defragInfo.commandBuffer = commandBuffer; +VmaDefragmentationInfo defragInfo = {}; +defragInfo.pool = myPool; +defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT; VmaDefragmentationContext defragCtx; -vmaDefragmentationBegin(allocator, &defragInfo, nullptr, &defragCtx); - -vkEndCommandBuffer(commandBuffer); +VkResult res = vmaBeginDefragmentation(allocator, &defragInfo, &defragCtx); +// Check res... -// Submit commandBuffer. -// Wait for a fence that ensures commandBuffer execution finished. - -vmaDefragmentationEnd(allocator, defragCtx); - -for(uint32_t i = 0; i < allocCount; ++i) +for(;;) { - if(allocationsChanged[i]) + VmaDefragmentationPassMoveInfo pass; + res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res == VK_INCOMPLETE) { - // Destroy buffer that is immutably bound to memory region which is no longer valid. - vkDestroyBuffer(device, buffers[i], nullptr); - - // Create new buffer with same parameters. - VkBufferCreateInfo bufferInfo = ...; - vkCreateBuffer(device, &bufferInfo, nullptr, &buffers[i]); - - // You can make dummy call to vkGetBufferMemoryRequirements here to silence validation layer warning. - - // Bind new buffer to new memory region. Data contained in it is already moved. - VmaAllocationInfo allocInfo; - vmaGetAllocationInfo(allocator, allocations[i], &allocInfo); - vmaBindBufferMemory(allocator, allocations[i], buffers[i]); + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + //- Inspect pass.pMoves[i].srcAllocation, identify what buffer or image it represents. + //- Recreate this buffer or image at pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. + //- Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. + } + //- Make sure the copy commands finished executing. + //- Update appropriate descriptors to point to the new places. + res = vmaEndDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... } + else + // Handle error... } -\endcode - -You can combine these two methods by specifying non-zero `maxGpu*` as well as `maxCpu*` parameters. -The library automatically chooses best method to defragment each memory pool. - -You may try not to block your entire program to wait until defragmentation finishes, -but do it in the background, as long as you carefully fullfill requirements described -in function vmaDefragmentationBegin(). -\section defragmentation_additional_notes Additional notes - -It is only legal to defragment allocations bound to: - -- buffers -- images created with `VK_IMAGE_CREATE_ALIAS_BIT`, `VK_IMAGE_TILING_LINEAR`, and - being currently in `VK_IMAGE_LAYOUT_GENERAL` or `VK_IMAGE_LAYOUT_PREINITIALIZED`. - -Defragmentation of images created with `VK_IMAGE_TILING_OPTIMAL` or in any other -layout may give undefined results. - -If you defragment allocations bound to images, new images to be bound to new -memory region after defragmentation should be created with `VK_IMAGE_LAYOUT_PREINITIALIZED` -and then transitioned to their original layout from before defragmentation if -needed using an image memory barrier. +vmaEndDefragmentation(allocator, defragCtx, nullptr); +\endcode -While using defragmentation, you may experience validation layer warnings, which you just need to ignore. -See [Validation layer warnings](@ref general_considerations_validation_layer_warnings). +You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool +(like in the example above) or all the default pools by setting this member to null. -Please don't expect memory to be fully compacted after defragmentation. -Algorithms inside are based on some heuristics that try to maximize number of Vulkan -memory blocks to make totally empty to release them, as well as to maximize continuous -empty space inside remaining blocks, while minimizing the number and size of allocations that -need to be moved. Some fragmentation may still remain - this is normal. +Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter. +Defragmentation algorithm tries to move all suitable allocations. +You can, however, refuse to move some of them inside a defragmentation pass, by setting +`pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +However, this is not recommended and may result in suboptimal packing of the allocations after defragmentation. +If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. -\section defragmentation_custom_algorithm Writing custom defragmentation algorithm +You can also decide to destroy an allocation instead of moving it. +You should then set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. -If you want to implement your own, custom defragmentation algorithm, -there is infrastructure prepared for that, -but it is not exposed through the library API - you need to hack its source code. -Here are steps needed to do this: +You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved +in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. +See members: VmaDefragmentationInfo::maxBytesPerPass, VmaDefragmentationInfo::maxAllocationsPerPass. --# Main thing you need to do is to define your own class derived from base abstract - class `VmaDefragmentationAlgorithm` and implement your version of its pure virtual methods. - See definition and comments of this class for details. --# Your code needs to interact with device memory block metadata. - If you need more access to its data than it is provided by its public interface, - declare your new class as a friend class e.g. in class `VmaBlockMetadata_Generic`. --# If you want to create a flag that would enable your algorithm or pass some additional - flags to configure it, add them to `VmaDefragmentationFlagBits` and use them in - VmaDefragmentationInfo2::flags. --# Modify function `VmaBlockVectorDefragmentationContext::Begin` to create object - of your new class whenever needed. +It is also safe to perform the defragmentation asynchronously to render frames and other Vulkan and VMA +usage, possibly from multiple threads, with the exception that allocations +returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended. \page statistics Statistics -This library contains functions that return information about its internal state, +This library contains several functions that return information about its internal state, especially the amount of memory allocated from Vulkan. -Please keep in mind that these functions need to traverse all internal data structures -to gather these information, so they may be quite time-consuming. -Don't call them too often. \section statistics_numeric_statistics Numeric statistics -You can query for overall statistics of the allocator using function vmaCalculateStats(). -Information are returned using structure #VmaStats. -It contains #VmaStatInfo - number of allocated blocks, number of allocations -(occupied ranges in these blocks), number of unused (free) ranges in these blocks, -number of bytes used and unused (but still allocated from Vulkan) and other information. -They are summed across memory heaps, memory types and total for whole allocator. +If you need to obtain basic statistics about memory usage per heap, together with current budget, +you can call function vmaGetHeapBudgets() and inspect structure #VmaBudget. +This is useful to keep track of memory usage and stay withing budget +(see also \ref staying_within_budget). +Example: -You can query for statistics of a custom pool using function vmaGetPoolStats(). -Information are returned using structure #VmaPoolStats. +\code +uint32_t heapIndex = ... + +VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; +vmaGetHeapBudgets(allocator, budgets); + +printf("My heap currently has %u allocations taking %llu B,\n", + budgets[heapIndex].statistics.allocationCount, + budgets[heapIndex].statistics.allocationBytes); +printf("allocated out of %u Vulkan device memory blocks taking %llu B,\n", + budgets[heapIndex].statistics.blockCount, + budgets[heapIndex].statistics.blockBytes); +printf("Vulkan reports total usage %llu B with budget %llu B.\n", + budgets[heapIndex].usage, + budgets[heapIndex].budget); +\endcode + +You can query for more detailed statistics per memory heap, type, and totals, +including minimum and maximum allocation size and unused range size, +by calling function vmaCalculateStatistics() and inspecting structure #VmaTotalStatistics. +This function is slower though, as it has to traverse all the internal data structures, +so it should be used only for debugging purposes. -You can query for information about specific allocation using function vmaGetAllocationInfo(). +You can query for statistics of a custom pool using function vmaGetPoolStatistics() +or vmaCalculatePoolStatistics(). + +You can query for information about a specific allocation using function vmaGetAllocationInfo(). It fill structure #VmaAllocationInfo. \section statistics_json_dump JSON dump @@ -19047,7 +18066,7 @@ The format of this JSON string is not part of official documentation of the libr but it will not change in backward-incompatible way without increasing library major version number and appropriate mention in changelog. -The JSON string contains all the data that can be obtained using vmaCalculateStats(). +The JSON string contains all the data that can be obtained using vmaCalculateStatistics(). It can also contain detailed map of allocated memory blocks and their regions - free and occupied by allocations. This allows e.g. to visualize the memory or assess fragmentation. @@ -19064,18 +18083,17 @@ some handle, index, key, ordinal number or any other value that would associate the allocation with your custom metadata. \code -VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -// Fill bufferInfo... +VkBufferCreateInfo bufCreateInfo = ... MyBufferMetadata* pMetadata = CreateBufferMetadata(); VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; allocCreateInfo.pUserData = pMetadata; VkBuffer buffer; VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocCreateInfo, &buffer, &allocation, nullptr); +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buffer, &allocation, nullptr); \endcode The pointer may be later retrieved as VmaAllocationInfo::pUserData: @@ -19089,7 +18107,7 @@ MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData; It can also be changed using function vmaSetAllocationUserData(). Values of (non-zero) allocations' `pUserData` are printed in JSON report created by -vmaBuildStatsString(), in hexadecimal form. +vmaBuildStatsString() in hexadecimal form. \section allocation_names Allocation names @@ -19097,19 +18115,18 @@ There is alternative mode available where `pUserData` pointer is used to point t a null-terminated string, giving a name to the allocation. To use this mode, set #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT flag in VmaAllocationCreateInfo::flags. Then `pUserData` passed as VmaAllocationCreateInfo::pUserData or argument to -vmaSetAllocationUserData() must be either null or pointer to a null-terminated string. +vmaSetAllocationUserData() must be either null or a pointer to a null-terminated string. The library creates internal copy of the string, so the pointer you pass doesn't need to be valid for whole lifetime of the allocation. You can free it after the call. \code -VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; -// Fill imageInfo... +VkImageCreateInfo imageInfo = ... std::string imageName = "Texture: "; imageName += fileName; VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; allocCreateInfo.pUserData = imageName.c_str(); @@ -19265,15 +18282,17 @@ It might be more convenient, but you need to make sure to use this new unit cons \section virtual_allocator_statistics Statistics -You can obtain statistics of a virtual block using vmaCalculateVirtualBlockStats(). -The function fills structure #VmaStatInfo - same as used by the normal Vulkan memory allocator. +You can obtain statistics of a virtual block using vmaGetVirtualBlockStatistics() +(to get brief statistics that are fast to calculate) +or vmaCalculateVirtualBlockStatistics() (to get more detailed statistics, slower to calculate). +The functions fill structures #VmaStatistics, #VmaDetailedStatistics respectively - same as used by the normal Vulkan memory allocator. Example: \code -VmaStatInfo statInfo; -vmaCalculateVirtualBlockStats(block, &statInfo); +VmaStatistics stats; +vmaGetVirtualBlockStatistics(block, &stats); printf("My virtual block has %llu bytes used by %u virtual allocations\n", - statInfo.usedBytes, statInfo.allocationCount); + stats.allocationBytes, stats.allocationCount); \endcode You can also request a full list of allocations and free regions as a string in JSON format by calling @@ -19353,7 +18372,6 @@ allocations, which have their own memory block of specific size. It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag or those automatically decided to put into dedicated allocations, e.g. due to its large size or recommended by VK_KHR_dedicated_allocation extension. -Margins are also not active in custom pools created with #VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT flag. Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. @@ -19439,33 +18457,14 @@ Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entir \page usage_patterns Recommended usage patterns +Vulkan gives great flexibility in memory allocation. +This chapter shows the most common patterns. + See also slides from talk: [Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New) -\section usage_patterns_common_mistakes Common mistakes - -<b>Use of CPU_TO_GPU instead of CPU_ONLY memory</b> - -#VMA_MEMORY_USAGE_CPU_TO_GPU is recommended only for resources that will be -mapped and written by the CPU, as well as read directly by the GPU - like some -buffers or textures updated every frame (dynamic). If you create a staging copy -of a resource to be written by CPU and then used as a source of transfer to -another resource placed in the GPU memory, that staging resource should be -created with #VMA_MEMORY_USAGE_CPU_ONLY. Please read the descriptions of these -enums carefully for details. - -<b>Unnecessary use of custom pools</b> - -\ref custom_memory_pools may be useful for special purposes - when you want to -keep certain type of resources separate e.g. to reserve minimum amount of memory -for them or limit maximum amount of memory they can occupy. For most -resources this is not needed and so it is not recommended to create #VmaPool -objects and allocations out of them. Allocating from the default pool is sufficient. - -\section usage_patterns_simple Simple patterns - -\subsection usage_patterns_simple_render_targets Render targets +\section usage_patterns_gpu_only GPU-only resource <b>When:</b> Any resources that you frequently write and read on GPU, @@ -19473,123 +18472,216 @@ e.g. images used as color attachments (aka "render targets"), depth-stencil atta images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)"). <b>What to do:</b> -Create them in video memory that is fastest to access from GPU using -#VMA_MEMORY_USAGE_GPU_ONLY. +Let the library select the optimal memory type, which will likely have `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. -Consider using [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension -and/or manually creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, -especially if they are large or if you plan to destroy and recreate them e.g. when -display resolution changes. +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +<b>Also consider:</b> +Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, +especially if they are large or if you plan to destroy and recreate them with different sizes +e.g. when display resolution changes. Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. -\subsection usage_patterns_simple_immutable_resources Immutable resources + +\section usage_patterns_staging_copy_upload Staging copy for upload <b>When:</b> -Any resources that you fill on CPU only once (aka "immutable") or infrequently -and then read frequently on GPU, -e.g. textures, vertex and index buffers, constant buffers that don't change often. +A "staging" buffer than you want to map and fill from CPU code, then use as a source od transfer +to some GPU resource. <b>What to do:</b> -Create them in video memory that is fastest to access from GPU using -#VMA_MEMORY_USAGE_GPU_ONLY. +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`. -To initialize content of such resource, create a CPU-side (aka "staging") copy of it -in system memory - #VMA_MEMORY_USAGE_CPU_ONLY, map it, fill it, -and submit a transfer from it to the GPU resource. -You can keep the staging copy if you need it for another upload transfer in the future. -If you don't, you can destroy it or reuse this buffer for uploading different resource -after the transfer finishes. +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; -Prefer to create just buffers in system memory rather than images, even for uploading textures. -Use `vkCmdCopyBufferToImage()`. -Dont use images with `VK_IMAGE_TILING_LINEAR`. +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); -\subsection usage_patterns_dynamic_resources Dynamic resources +... -<b>When:</b> -Any resources that change frequently (aka "dynamic"), e.g. every frame or every draw call, -written on CPU, read on GPU. +memcpy(allocInfo.pMappedData, myData, myDataSize); +\endcode -<b>What to do:</b> -Create them using #VMA_MEMORY_USAGE_CPU_TO_GPU. -You can map it and write to it directly on CPU, as well as read from it on GPU. +<b>Also consider:</b> +You can map the allocation using vmaMapMemory() or you can create it as persistenly mapped +using #VMA_ALLOCATION_CREATE_MAPPED_BIT, as in the example above. -This is a more complex situation. Different solutions are possible, -and the best one depends on specific GPU type, but you can use this simple approach for the start. -Prefer to write to such resource sequentially (e.g. using `memcpy`). -Don't perform random access or any reads from it on CPU, as it may be very slow. -Also note that textures written directly from the host through a mapped pointer need to be in LINEAR not OPTIMAL layout. -\subsection usage_patterns_readback Readback +\section usage_patterns_readback Readback <b>When:</b> -Resources that contain data written by GPU that you want to read back on CPU, +Buffers for data written by or transferred from the GPU that you want to read back on the CPU, e.g. results of some computations. <b>What to do:</b> -Create them using #VMA_MEMORY_USAGE_GPU_TO_CPU. -You can write to them directly on GPU, as well as map and read them on CPU. - -\section usage_patterns_advanced Advanced patterns +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. -\subsection usage_patterns_integrated_graphics Detecting integrated graphics - -You can support integrated graphics (like Intel HD Graphics, AMD APU) better -by detecting it in Vulkan. -To do it, call `vkGetPhysicalDeviceProperties()`, inspect -`VkPhysicalDeviceProperties::deviceType` and look for `VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU`. -When you find it, you can assume that memory is unified and all memory types are comparably fast -to access from GPU, regardless of `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; -You can then sum up sizes of all available memory heaps and treat them as useful for -your GPU resources, instead of only `DEVICE_LOCAL` ones. -You can also prefer to create your resources in memory types that are `HOST_VISIBLE` to map them -directly instead of submitting explicit transfer (see below). +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; -\subsection usage_patterns_direct_vs_transfer Direct access versus transfer +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); -For resources that you frequently write on CPU and read on GPU, many solutions are possible: +... --# Create one copy in video memory using #VMA_MEMORY_USAGE_GPU_ONLY, - second copy in system memory using #VMA_MEMORY_USAGE_CPU_ONLY and submit explicit transfer each time. --# Create just a single copy using #VMA_MEMORY_USAGE_CPU_TO_GPU, map it and fill it on CPU, - read it directly on GPU. --# Create just a single copy using #VMA_MEMORY_USAGE_CPU_ONLY, map it and fill it on CPU, - read it directly on GPU. +const float* downloadedData = (const float*)allocInfo.pMappedData; +\endcode -Which solution is the most efficient depends on your resource and especially on the GPU. -It is best to measure it and then make the decision. -Some general recommendations: -- On integrated graphics use (2) or (3) to avoid unnecessary time and memory overhead - related to using a second copy and making transfer. -- For small resources (e.g. constant buffers) use (2). - Discrete AMD cards have special 256 MiB pool of video memory that is directly mappable. - Even if the resource ends up in system memory, its data may be cached on GPU after first - fetch over PCIe bus. -- For larger resources (e.g. textures), decide between (1) and (2). - You may want to differentiate NVIDIA and AMD, e.g. by looking for memory type that is - both `DEVICE_LOCAL` and `HOST_VISIBLE`. When you find it, use (2), otherwise use (1). +\section usage_patterns_advanced_data_uploading Advanced data uploading + +For resources that you frequently write on CPU via mapped pointer and +freqnently read on GPU e.g. as a uniform buffer (also called "dynamic"), multiple options are possible: + +-# Easiest solution is to have one copy of the resource in `HOST_VISIBLE` memory, + even if it means system RAM (not `DEVICE_LOCAL`) on systems with a discrete graphics card, + and make the device reach out to that resource directly. + - Reads performed by the device will then go through PCI Express bus. + The performace of this access may be limited, but it may be fine depending on the size + of this resource (whether it is small enough to quickly end up in GPU cache) and the sparsity + of access. +-# On systems with unified memory (e.g. AMD APU or Intel integrated graphics, mobile chips), + a memory type may be available that is both `HOST_VISIBLE` (available for mapping) and `DEVICE_LOCAL` + (fast to access from the GPU). Then, it is likely the best choice for such type of resource. +-# Systems with a discrete graphics card and separate video memory may or may not expose + a memory type that is both `HOST_VISIBLE` and `DEVICE_LOCAL`, also known as Base Address Register (BAR). + If they do, it represents a piece of VRAM (or entire VRAM, if ReBAR is enabled in the motherboard BIOS) + that is available to CPU for mapping. + - Writes performed by the host to that memory go through PCI Express bus. + The performance of these writes may be limited, but it may be fine, especially on PCIe 4.0, + as long as rules of using uncached and write-combined memory are followed - only sequential writes and no reads. +-# Finally, you may need or prefer to create a separate copy of the resource in `DEVICE_LOCAL` memory, + a separate "staging" copy in `HOST_VISIBLE` memory and perform an explicit transfer command between them. + +Thankfully, VMA offers an aid to create and use such resources in the the way optimal +for the current Vulkan device. To help the library make the best choice, +use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT together with +#VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT. +It will then prefer a memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` (integrated memory or BAR), +but if no such memory type is available or allocation from it fails +(PC graphics cards have only 256 MB of BAR by default, unless ReBAR is supported and enabled in BIOS), +it will fall back to `DEVICE_LOCAL` memory for fast GPU access. +It is then up to you to detect that the allocation ended up in a memory type that is not `HOST_VISIBLE`, +so you need to create another "staging" allocation and perform explicit transfers. -Similarly, for resources that you frequently write on GPU and read on CPU, multiple -solutions are possible: +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); --# Create one copy in video memory using #VMA_MEMORY_USAGE_GPU_ONLY, - second copy in system memory using #VMA_MEMORY_USAGE_GPU_TO_CPU and submit explicit tranfer each time. --# Create just single copy using #VMA_MEMORY_USAGE_GPU_TO_CPU, write to it directly on GPU, - map it and read it on CPU. +VkMemoryPropertyFlags memPropFlags; +vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); -You should take some measurements to decide which option is faster in case of your specific -resource. +if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) +{ + // Allocation ended up in a mappable memory and is already mapped - write to it directly. -Note that textures accessed directly from the host through a mapped pointer need to be in LINEAR layout, -which may slow down their usage on the device. -Textures accessed only by the device and transfer operations can use OPTIMAL layout. + // [Executed in runtime]: + memcpy(allocInfo.pMappedData, myData, myDataSize); +} +else +{ + // Allocation ended up in a non-mappable memory - need to transfer. + VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + stagingBufCreateInfo.size = 65536; + stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + VmaAllocationCreateInfo stagingAllocCreateInfo = {}; + stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + + VkBuffer stagingBuf; + VmaAllocation stagingAlloc; + VmaAllocationInfo stagingAllocInfo; + vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, + &stagingBuf, &stagingAlloc, stagingAllocInfo); + + // [Executed in runtime]: + memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); + VkBufferCopy bufCopy = { + 0, // srcOffset + 0, // dstOffset, + myDataSize); // size + vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); +} +\endcode -If you don't want to specialize your code for specific types of GPUs, you can still make -an simple optimization for cases when your resource ends up in mappable memory to use it -directly in this case instead of creating CPU-side staging copy. -For details see [Finding out if memory is mappable](@ref memory_mapping_finding_if_memory_mappable). +\section usage_patterns_other_use_cases Other use cases + +Here are some other, less obvious use cases and their recommended settings: + +- An image that is used only as transfer source and destination, but it should stay on the device, + as it is used to temporarily store a copy of some texture, e.g. from the current to the next frame, + for temporal antialiasing or other temporal effects. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO +- An image that is used only as transfer source and destination, but it should be placed + in the system RAM despite it doesn't need to be mapped, because it serves as a "swap" copy to evict + least recently used textures from VRAM. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + as VMA needs a hint here to differentiate from the previous case. +- A buffer that you want to map and write from the CPU, directly read from the GPU + (e.g. as a uniform or vertex buffer), but you have a clear preference to place it in device or + host memory due to its large size. + - Use `VkBufferCreateInfo::usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST + - Use VmaAllocationCreateInfo::flags = #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT \page configuration Configuration @@ -19628,6 +18720,8 @@ by using a helper library like [volk](https://github.com/zeux/volk). Third, VMA tries to fetch remaining pointers that are still null by calling `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own. +You need to only fill in VmaVulkanFunctions::vkGetInstanceProcAddr and VmaVulkanFunctions::vkGetDeviceProcAddr. +Other pointers will be fetched automatically. If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`. Finally, all the function pointers required by the library (considering selected @@ -19652,7 +18746,7 @@ VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. \section heap_memory_limit Device heap memory limit When device memory of certain heap runs out of free space, new allocations may -fail (returning error code) or they may succeed, silently pushing some existing +fail (returning error code) or they may succeed, silently pushing some existing_ memory blocks from GPU VRAM to system RAM (which degrades performance). This behavior is implementation-dependent - it depends on GPU vendor and graphics driver. @@ -19673,10 +18767,14 @@ VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve performance on some GPUs. It augments Vulkan API with possibility to query driver whether it prefers particular buffer or image to have its own, dedicated allocation (separate `VkDeviceMemory` block) for better efficiency - to be able -to do some internal optimizations. +to do some internal optimizations. The extension is supported by this library. +It will be used automatically when enabled. + +It has been promoted to core Vulkan 1.1, so if you use eligible Vulkan version +and inform VMA about it by setting VmaAllocatorCreateInfo::vulkanApiVersion, +you are all set. -The extension is supported by this library. It will be used automatically when -enabled. To enable it: +Otherwise, if you want to use it as an extension: 1 . When creating Vulkan device, check if following 2 device extensions are supported (call `vkEnumerateDeviceExtensionProperties()`). @@ -19688,7 +18786,7 @@ If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`). If you enabled these extensions: 2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating -your #VmaAllocator`to inform the library that you enabled required extensions +your #VmaAllocator to inform the library that you enabled required extensions and you want the library to use them. \code @@ -19703,7 +18801,7 @@ buffer using vmaCreateBuffer() or image using vmaCreateImage(). When using the extension together with Vulkan Validation Layer, you will receive warnings like this: - vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer. +_vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer._ It is OK, you should just ignore it. It happens because you use function `vkGetBufferMemoryRequirements2KHR()` instead of standard @@ -19760,7 +18858,7 @@ out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligi devices. There are multiple ways to do it, for example: - You can request or prefer to allocate out of such memory types by adding - `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags + `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage. - If you manually found memory type index to use for this purpose, force allocation @@ -19778,7 +18876,7 @@ accompanying this library. Device extension VK_KHR_buffer_device_address allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code. -It is promoted to core Vulkan 1.2. +It has been promoted to core Vulkan 1.2. If you want to use this feature in connection with VMA, follow these steps: @@ -19839,7 +18937,7 @@ accompanying this library. you must not call vmaGetAllocationInfo() and vmaMapMemory() from different threads at the same time if you pass the same #VmaAllocation object to these functions. -- #VmaVirtualBlock is also not safe to be used from multiple threads simultaneously. +- #VmaVirtualBlock is not safe to be used from multiple threads simultaneously. \section general_considerations_validation_layer_warnings Validation layer warnings @@ -19863,7 +18961,7 @@ The library uses following algorithm for allocation, in order: -# Try to find free range of memory in existing blocks. -# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size. --# If failed, try to create such block with size/2, size/4, size/8. +-# If failed, try to create such block with size / 2, size / 4, size / 8. -# If failed, try to allocate separate `VkDeviceMemory` for this allocation, just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. -# If failed, choose other memory type that meets the requirements specified in @@ -19874,28 +18972,29 @@ The library uses following algorithm for allocation, in order: Features deliberately excluded from the scope of this library: -- **Data transfer.** Uploading (streaming) and downloading data of buffers and images - between CPU and GPU memory and related synchronization is responsibility of the user. - Defining some "texture" object that would automatically stream its data from a - staging copy in CPU memory to GPU memory would rather be a feature of another, - higher-level library implemented on top of VMA. -- **Recreation of buffers and images.** Although the library has functions for - buffer and image creation (vmaCreateBuffer(), vmaCreateImage()), you need to - recreate these objects yourself after defragmentation. That is because the big - structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in - #VmaAllocation object. -- **Handling CPU memory allocation failures.** When dynamically creating small C++ - objects in CPU memory (not Vulkan memory), allocation failures are not checked - and handled gracefully, because that would complicate code significantly and - is usually not needed in desktop PC applications anyway. - Success of an allocation is just checked with an assert. -- **Code free of any compiler warnings.** Maintaining the library to compile and - work correctly on so many different platforms is hard enough. Being free of - any warnings, on any version of any compiler, is simply not feasible. - There are many preprocessor macros that make some variables unused, function parameters unreferenced, - or conditional expressions constant in some configurations. - The code of this library should not be bigger or more complicated just to silence these warnings. - It is recommended to disable such warnings instead. -- This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but - are not going to be included into this repository. +-# **Data transfer.** Uploading (streaming) and downloading data of buffers and images + between CPU and GPU memory and related synchronization is responsibility of the user. + Defining some "texture" object that would automatically stream its data from a + staging copy in CPU memory to GPU memory would rather be a feature of another, + higher-level library implemented on top of VMA. + VMA doesn't record any commands to a `VkCommandBuffer`. It just allocates memory. +-# **Recreation of buffers and images.** Although the library has functions for + buffer and image creation: vmaCreateBuffer(), vmaCreateImage(), you need to + recreate these objects yourself after defragmentation. That is because the big + structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in + #VmaAllocation object. +-# **Handling CPU memory allocation failures.** When dynamically creating small C++ + objects in CPU memory (not Vulkan memory), allocation failures are not checked + and handled gracefully, because that would complicate code significantly and + is usually not needed in desktop PC applications anyway. + Success of an allocation is just checked with an assert. +-# **Code free of any compiler warnings.** Maintaining the library to compile and + work correctly on so many different platforms is hard enough. Being free of + any warnings, on any version of any compiler, is simply not feasible. + There are many preprocessor macros that make some variables unused, function parameters unreferenced, + or conditional expressions constant in some configurations. + The code of this library should not be bigger or more complicated just to silence these warnings. + It is recommended to disable such warnings instead. +-# This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but + are not going to be included into this repository. */ |