Index: src/gpu/vk/GrVkMemory.cpp |
diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp |
index e0ab3a6c8ca7b141cf46b19b97b017ad3cab1647..fa0bcb5cd7b25fc5fbc16eaec4487e23d49cb596 100644 |
--- a/src/gpu/vk/GrVkMemory.cpp |
+++ b/src/gpu/vk/GrVkMemory.cpp |
@@ -29,38 +29,26 @@ static bool get_valid_memory_type_index(VkPhysicalDeviceMemoryProperties physDev |
return false; |
} |
-static bool alloc_device_memory(const GrVkGpu* gpu, |
- VkMemoryRequirements* memReqs, |
- const VkMemoryPropertyFlags flags, |
- VkDeviceMemory* memory) { |
- uint32_t typeIndex; |
- if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
- memReqs->memoryTypeBits, |
- flags, |
- &typeIndex)) { |
- return false; |
- } |
- |
- VkMemoryAllocateInfo allocInfo = { |
- VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType |
- NULL, // pNext |
- memReqs->size, // allocationSize |
- typeIndex, // memoryTypeIndex |
+static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) { |
+ const GrVkGpu::Heap kBufferToHeap[]{ |
+ GrVkGpu::kVertexBuffer_Heap, |
+ GrVkGpu::kIndexBuffer_Heap, |
+ GrVkGpu::kUniformBuffer_Heap, |
+ GrVkGpu::kCopyReadBuffer_Heap, |
+ GrVkGpu::kCopyWriteBuffer_Heap, |
}; |
+ GR_STATIC_ASSERT(0 == GrVkBuffer::kVertex_Type); |
+ GR_STATIC_ASSERT(1 == GrVkBuffer::kIndex_Type); |
+ GR_STATIC_ASSERT(2 == GrVkBuffer::kUniform_Type); |
+ GR_STATIC_ASSERT(3 == GrVkBuffer::kCopyRead_Type); |
+ GR_STATIC_ASSERT(4 == GrVkBuffer::kCopyWrite_Type); |
- VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(), |
- &allocInfo, |
- nullptr, |
- memory)); |
- if (err) { |
- return false; |
- } |
- return true; |
+ return kBufferToHeap[type]; |
} |
bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, |
VkBuffer buffer, |
- const VkMemoryPropertyFlags flags, |
+ GrVkBuffer::Type type, |
GrVkAlloc* alloc) { |
const GrVkInterface* iface = gpu->vkInterface(); |
VkDevice device = gpu->device(); |
@@ -68,30 +56,61 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, |
VkMemoryRequirements memReqs; |
GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); |
- if (!alloc_device_memory(gpu, &memReqs, flags, &alloc->fMemory)) { |
+ VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT; |
+ uint32_t typeIndex; |
+ if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
+ memReqs.memoryTypeBits, |
+ desiredMemProps, |
+ &typeIndex)) { |
+ // this memory type should always be available |
+ SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
+ memReqs.memoryTypeBits, |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, |
+ &typeIndex)); |
+ } |
+ |
+ GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); |
+ |
+ if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { |
+ SkDebugf("Failed to alloc buffer\n"); |
return false; |
} |
- // for now, offset is always 0 |
- alloc->fOffset = 0; |
// Bind Memory to device |
VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, |
alloc->fMemory, alloc->fOffset)); |
if (err) { |
- GR_VK_CALL(iface, FreeMemory(device, alloc->fMemory, nullptr)); |
+ SkASSERT_RELEASE(heap->free(*alloc)); |
return false; |
} |
+ |
return true; |
} |
-void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc) { |
- const GrVkInterface* iface = gpu->vkInterface(); |
- GR_VK_CALL(iface, FreeMemory(gpu->device(), alloc.fMemory, nullptr)); |
+void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, |
+ const GrVkAlloc& alloc) { |
+ |
+ GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); |
+ SkASSERT_RELEASE(heap->free(alloc)); |
+} |
+ |
+// for debugging |
+static uint64_t gTotalImageMemory = 0; |
+static uint64_t gTotalImageMemoryFullPage = 0; |
+ |
+const VkDeviceSize kMaxSmallImageSize = 16 * 1024; |
+const VkDeviceSize kMinVulkanPageSize = 16 * 1024; |
+ |
+static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) { |
+ return (size + alignment - 1) & ~(alignment - 1); |
} |
bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, |
VkImage image, |
- const VkMemoryPropertyFlags flags, |
+ bool linearTiling, |
GrVkAlloc* alloc) { |
const GrVkInterface* iface = gpu->vkInterface(); |
VkDevice device = gpu->device(); |
@@ -99,25 +118,76 @@ bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, |
VkMemoryRequirements memReqs; |
GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs)); |
- if (!alloc_device_memory(gpu, &memReqs, flags, &alloc->fMemory)) { |
+ uint32_t typeIndex; |
+ GrVkHeap* heap; |
+ if (linearTiling) { |
+ VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT; |
+ if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
+ memReqs.memoryTypeBits, |
+ desiredMemProps, |
+ &typeIndex)) { |
+ // this memory type should always be available |
+ SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
+ memReqs.memoryTypeBits, |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, |
+ &typeIndex)); |
+ } |
+ heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); |
+ } else { |
+ // this memory type should always be available |
+ SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
+ memReqs.memoryTypeBits, |
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, |
+ &typeIndex)); |
+ if (memReqs.size <= kMaxSmallImageSize) { |
+ heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); |
+ } else { |
+ heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); |
+ } |
+ } |
+ |
+ if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { |
+ SkDebugf("Failed to alloc image\n"); |
return false; |
} |
- // for now, offset is always 0 |
- alloc->fOffset = 0; |
// Bind Memory to device |
VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image, |
alloc->fMemory, alloc->fOffset)); |
if (err) { |
- GR_VK_CALL(iface, FreeMemory(device, alloc->fMemory, nullptr)); |
+ SkASSERT_RELEASE(heap->free(*alloc)); |
return false; |
} |
+ |
+ gTotalImageMemory += alloc->fSize; |
+ |
+ VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize); |
+ gTotalImageMemoryFullPage += pageAlignedSize; |
+ |
return true; |
} |
-void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc) { |
- const GrVkInterface* iface = gpu->vkInterface(); |
- GR_VK_CALL(iface, FreeMemory(gpu->device(), alloc.fMemory, nullptr)); |
+void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, |
+ const GrVkAlloc& alloc) { |
+ GrVkHeap* heap; |
+ if (linearTiling) { |
+ heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); |
+ } else if (alloc.fSize <= kMaxSmallImageSize) { |
+ heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); |
+ } else { |
+ heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); |
+ } |
+ if (!heap->free(alloc)) { |
+ // must be an adopted allocation |
+ GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr)); |
+ } else { |
+ gTotalImageMemory -= alloc.fSize; |
+ VkDeviceSize pageAlignedSize = align_size(alloc.fSize, kMinVulkanPageSize); |
+ gTotalImageMemoryFullPage -= pageAlignedSize; |
+ } |
} |
VkPipelineStageFlags GrVkMemory::LayoutToPipelineStageFlags(const VkImageLayout layout) { |
@@ -169,3 +239,289 @@ VkAccessFlags GrVkMemory::LayoutToSrcAccessMask(const VkImageLayout layout) { |
} |
return flags; |
} |
+ |
+GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, |
+ VkDeviceSize size, VkDeviceSize alignment) |
+ : fGpu(gpu) |
+ , fMemoryTypeIndex(memoryTypeIndex) { |
+ |
+ VkMemoryAllocateInfo allocInfo = { |
+ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType |
+ NULL, // pNext |
+ size, // allocationSize |
+ memoryTypeIndex, // memoryTypeIndex |
+ }; |
+ |
+ VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(), |
+ &allocInfo, |
+ nullptr, |
+ &fAlloc)); |
+ |
+ if (VK_SUCCESS == err) { |
+ fSize = size; |
+ fAlignment = alignment; |
+ fFreeSize = size; |
+ fLargestBlockSize = size; |
+ fLargestBlockOffset = 0; |
+ |
+ Block* block = fFreeList.addToTail(); |
+ block->fOffset = 0; |
+ block->fSize = fSize; |
+ } else { |
+ fSize = 0; |
+ fAlignment = 0; |
+ fFreeSize = 0; |
+ fLargestBlockSize = 0; |
+ } |
+} |
+ |
+GrVkSubHeap::~GrVkSubHeap() { |
+ const GrVkInterface* iface = fGpu->vkInterface(); |
+ GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr)); |
+ |
+ fFreeList.reset(); |
+} |
+ |
+bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) { |
+ VkDeviceSize alignedSize = align_size(size, fAlignment); |
+ |
+ // find the smallest block big enough for our allocation |
+ FreeList::Iter iter = fFreeList.headIter(); |
+ FreeList::Iter bestFitIter; |
+ VkDeviceSize bestFitSize = fSize + 1; |
+ VkDeviceSize secondLargestSize = 0; |
+ VkDeviceSize secondLargestOffset = 0; |
+ while (iter.get()) { |
+ Block* block = iter.get(); |
+ // need to adjust size to match desired alignment |
+ SkASSERT(align_size(block->fOffset, fAlignment) - block->fOffset == 0); |
+ if (block->fSize >= alignedSize && block->fSize < bestFitSize) { |
+ bestFitIter = iter; |
+ bestFitSize = block->fSize; |
+ } |
+ if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) { |
+ secondLargestSize = block->fSize; |
+ secondLargestOffset = block->fOffset; |
+ } |
+ iter.next(); |
+ } |
+ SkASSERT(secondLargestSize <= fLargestBlockSize); |
+ |
+ Block* bestFit = bestFitIter.get(); |
+ if (bestFit) { |
+ alloc->fMemory = fAlloc; |
+ SkASSERT(align_size(bestFit->fOffset, fAlignment) == bestFit->fOffset); |
+ alloc->fOffset = bestFit->fOffset; |
+ alloc->fSize = alignedSize; |
+ // adjust or remove current block |
+ VkDeviceSize originalBestFitOffset = bestFit->fOffset; |
+ if (bestFit->fSize > alignedSize) { |
+ bestFit->fOffset += alignedSize; |
+ bestFit->fSize -= alignedSize; |
+ if (fLargestBlockOffset == originalBestFitOffset) { |
+ if (bestFit->fSize >= secondLargestSize) { |
+ fLargestBlockSize = bestFit->fSize; |
+ fLargestBlockOffset = bestFit->fOffset; |
+ } else { |
+ fLargestBlockSize = secondLargestSize; |
+ fLargestBlockOffset = secondLargestOffset; |
+ } |
+ } |
+#ifdef SK_DEBUG |
+ VkDeviceSize largestSize = 0; |
+ iter = fFreeList.headIter(); |
+ while (iter.get()) { |
+ Block* block = iter.get(); |
+ if (largestSize < block->fSize) { |
+ largestSize = block->fSize; |
+ } |
+ iter.next(); |
+ } |
+ SkASSERT(largestSize == fLargestBlockSize) |
+#endif |
+ } else { |
+ SkASSERT(bestFit->fSize == alignedSize); |
+ if (fLargestBlockOffset == originalBestFitOffset) { |
+ fLargestBlockSize = secondLargestSize; |
+ fLargestBlockOffset = secondLargestOffset; |
+ } |
+ fFreeList.remove(bestFit); |
+#ifdef SK_DEBUG |
+ VkDeviceSize largestSize = 0; |
+ iter = fFreeList.headIter(); |
+ while (iter.get()) { |
+ Block* block = iter.get(); |
+ if (largestSize < block->fSize) { |
+ largestSize = block->fSize; |
+ } |
+ iter.next(); |
+ } |
+ SkASSERT(largestSize == fLargestBlockSize); |
+#endif |
+ } |
+ fFreeSize -= alignedSize; |
+ |
+ return true; |
+ } |
+ |
+ SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize); |
+ |
+ return false; |
+} |
+ |
+ |
+void GrVkSubHeap::free(const GrVkAlloc& alloc) { |
+ SkASSERT(alloc.fMemory == fAlloc); |
+ |
+ // find the block right after this allocation |
+ FreeList::Iter iter = fFreeList.headIter(); |
+ while (iter.get() && iter.get()->fOffset < alloc.fOffset) { |
+ iter.next(); |
+ } |
+ FreeList::Iter prev = iter; |
+ prev.prev(); |
+ // we have four cases: |
+ // we exactly follow the previous one |
+ Block* block; |
+ if (prev.get() && prev.get()->fOffset + prev.get()->fSize == alloc.fOffset) { |
+ block = prev.get(); |
+ block->fSize += alloc.fSize; |
+ if (block->fOffset == fLargestBlockOffset) { |
+ fLargestBlockSize = block->fSize; |
+ } |
+ // and additionally we may exactly precede the next one |
+ if (iter.get() && iter.get()->fOffset == alloc.fOffset + alloc.fSize) { |
+ block->fSize += iter.get()->fSize; |
+ if (iter.get()->fOffset == fLargestBlockOffset) { |
+ fLargestBlockOffset = block->fOffset; |
+ fLargestBlockSize = block->fSize; |
+ } |
+ fFreeList.remove(iter.get()); |
+ } |
+ // or we only exactly proceed the next one |
+ } else if (iter.get() && iter.get()->fOffset == alloc.fOffset + alloc.fSize) { |
+ block = iter.get(); |
+ block->fSize += alloc.fSize; |
+ if (block->fOffset == fLargestBlockOffset) { |
+ fLargestBlockOffset = alloc.fOffset; |
+ fLargestBlockSize = block->fSize; |
+ } |
+ block->fOffset = alloc.fOffset; |
+ // or we fall somewhere in between, with gaps |
+ } else { |
+ block = fFreeList.addBefore(iter); |
+ block->fOffset = alloc.fOffset; |
+ block->fSize = alloc.fSize; |
+ } |
+ fFreeSize += alloc.fSize; |
+ if (block->fSize > fLargestBlockSize) { |
+ fLargestBlockSize = block->fSize; |
+ fLargestBlockOffset = block->fOffset; |
+ } |
+ |
+#ifdef SK_DEBUG |
+ VkDeviceSize largestSize = 0; |
+ iter = fFreeList.headIter(); |
+ while (iter.get()) { |
+ Block* block = iter.get(); |
+ if (largestSize < block->fSize) { |
+ largestSize = block->fSize; |
+ } |
+ iter.next(); |
+ } |
+ SkASSERT(fLargestBlockSize == largestSize); |
+#endif |
+} |
+ |
+GrVkHeap::~GrVkHeap() { |
+} |
+ |
+bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment, |
+ uint32_t memoryTypeIndex, GrVkAlloc* alloc) { |
+ VkDeviceSize alignedSize = align_size(size, alignment); |
+ |
+ // first try to find a subheap that fits our allocation request |
+ int bestFitIndex = -1; |
+ VkDeviceSize bestFitSize = 0x7FFFFFFF; |
+ for (auto i = 0; i < fSubHeaps.count(); ++i) { |
+ if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex) { |
+ VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize(); |
+ if (heapSize > alignedSize && heapSize < bestFitSize) { |
+ bestFitIndex = i; |
+ bestFitSize = heapSize; |
+ } |
+ } |
+ } |
+ |
+ if (bestFitIndex >= 0) { |
+ SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); |
+ if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { |
+ fUsedSize += alloc->fSize; |
+ return true; |
+ } |
+ return false; |
+ } |
+ |
+ // need to allocate a new subheap |
+ SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); |
+ subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, fSubHeapSize, alignment)); |
+ fAllocSize += fSubHeapSize; |
+ if (subHeap->alloc(size, alloc)) { |
+ fUsedSize += alloc->fSize; |
+ return true; |
+ } |
+ |
+ return false; |
+} |
+ |
+bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment, |
+ uint32_t memoryTypeIndex, GrVkAlloc* alloc) { |
+ VkDeviceSize alignedSize = align_size(size, alignment); |
+ |
+ // first try to find an unallocated subheap that fits our allocation request |
+ int bestFitIndex = -1; |
+ VkDeviceSize bestFitSize = 0x7FFFFFFF; |
+ for (auto i = 0; i < fSubHeaps.count(); ++i) { |
+ if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && fSubHeaps[i]->unallocated()) { |
+ VkDeviceSize heapSize = fSubHeaps[i]->size(); |
+ if (heapSize > alignedSize && heapSize < bestFitSize) { |
+ bestFitIndex = i; |
+ bestFitSize = heapSize; |
+ } |
+ } |
+ } |
+ |
+ if (bestFitIndex >= 0) { |
+ SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); |
+ if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { |
+ fUsedSize += alloc->fSize; |
+ return true; |
+ } |
+ return false; |
+ } |
+ |
+ // need to allocate a new subheap |
+ SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); |
+ subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize, alignment)); |
+ fAllocSize += alignedSize; |
+ if (subHeap->alloc(size, alloc)) { |
+ fUsedSize += alloc->fSize; |
+ return true; |
+ } |
+ |
+ return false; |
+} |
+ |
+bool GrVkHeap::free(const GrVkAlloc& alloc) { |
+ for (auto i = 0; i < fSubHeaps.count(); ++i) { |
+ if (fSubHeaps[i]->memory() == alloc.fMemory) { |
+ fSubHeaps[i]->free(alloc); |
+ fUsedSize -= alloc.fSize; |
+ return true; |
+ } |
+ } |
+ |
+ return false; |
+} |
+ |
+ |