Chromium Code Reviews| Index: src/gpu/vk/GrVkMemory.cpp |
| diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp |
| index e0ab3a6c8ca7b141cf46b19b97b017ad3cab1647..4db2b400cc0ec7c04cb19f579352811f02c7630a 100644 |
| --- a/src/gpu/vk/GrVkMemory.cpp |
| +++ b/src/gpu/vk/GrVkMemory.cpp |
| @@ -29,38 +29,21 @@ static bool get_valid_memory_type_index(VkPhysicalDeviceMemoryProperties physDev |
| return false; |
| } |
| -static bool alloc_device_memory(const GrVkGpu* gpu, |
| - VkMemoryRequirements* memReqs, |
| - const VkMemoryPropertyFlags flags, |
| - VkDeviceMemory* memory) { |
| - uint32_t typeIndex; |
| - if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| - memReqs->memoryTypeBits, |
| - flags, |
| - &typeIndex)) { |
| - return false; |
| - } |
| - |
| - VkMemoryAllocateInfo allocInfo = { |
| - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType |
| - NULL, // pNext |
| - memReqs->size, // allocationSize |
| - typeIndex, // memoryTypeIndex |
| +static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) { |
| + const GrVkGpu::Heap kBufferToHeap[]{ |
|
bsalomon
2016/06/10 15:14:53
Should we have some static asserts somewhere that
jvanverth1
2016/06/10 15:41:20
Ok, I'll add those.
jvanverth1
2016/06/13 19:59:22
Done.
|
| + GrVkGpu::kVertexBuffer_Heap, |
| + GrVkGpu::kIndexBuffer_Heap, |
| + GrVkGpu::kUniformBuffer_Heap, |
| + GrVkGpu::kCopyReadBuffer_Heap, |
| + GrVkGpu::kCopyWriteBuffer_Heap, |
| }; |
| - VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(), |
| - &allocInfo, |
| - nullptr, |
| - memory)); |
| - if (err) { |
| - return false; |
| - } |
| - return true; |
| + return kBufferToHeap[type]; |
| } |
| bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, |
| VkBuffer buffer, |
| - const VkMemoryPropertyFlags flags, |
| + GrVkBuffer::Type type, |
| GrVkAlloc* alloc) { |
| const GrVkInterface* iface = gpu->vkInterface(); |
| VkDevice device = gpu->device(); |
| @@ -68,30 +51,53 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, |
| VkMemoryRequirements memReqs; |
| GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); |
| - if (!alloc_device_memory(gpu, &memReqs, flags, &alloc->fMemory)) { |
| + VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
| + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | |
| + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; |
| + uint32_t typeIndex; |
| + if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + memReqs.memoryTypeBits, |
| + desiredMemProps, |
| + &typeIndex)) { |
| + // this memory type should always be available |
| + SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + memReqs.memoryTypeBits, |
| + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
| + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, |
| + &typeIndex)); |
| + } |
| + |
| + GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); |
| + |
| + if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { |
| + SkDebugf("Failed to alloc buffer\n"); |
| return false; |
| } |
| - // for now, offset is always 0 |
| - alloc->fOffset = 0; |
| // Bind Memory to device |
| VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, |
| alloc->fMemory, alloc->fOffset)); |
| if (err) { |
| - GR_VK_CALL(iface, FreeMemory(device, alloc->fMemory, nullptr)); |
| + SkASSERT_RELEASE(heap->free(*alloc)); |
| return false; |
| } |
| + |
| return true; |
| } |
| -void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc) { |
| - const GrVkInterface* iface = gpu->vkInterface(); |
| - GR_VK_CALL(iface, FreeMemory(gpu->device(), alloc.fMemory, nullptr)); |
| +void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, |
| + const GrVkAlloc& alloc) { |
| + |
| + GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); |
| + SkASSERT_RELEASE(heap->free(alloc)); |
| } |
| +static uint64_t gTotalImageMemory = 0; |
|
bsalomon
2016/06/10 15:14:53
Is this for debugging?
jvanverth1
2016/06/10 15:41:20
Yes, I'd rather leave it in for now.
jvanverth1
2016/06/13 19:59:22
Added comment.
|
| +static uint64_t gTotalImageMemoryFullPage = 0; |
| + |
| bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, |
| VkImage image, |
| - const VkMemoryPropertyFlags flags, |
| + bool linearTiling, |
| GrVkAlloc* alloc) { |
| const GrVkInterface* iface = gpu->vkInterface(); |
| VkDevice device = gpu->device(); |
| @@ -99,25 +105,76 @@ bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, |
| VkMemoryRequirements memReqs; |
| GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs)); |
| - if (!alloc_device_memory(gpu, &memReqs, flags, &alloc->fMemory)) { |
| + uint32_t typeIndex; |
| + GrVkHeap* heap; |
| + if (linearTiling) { |
| + VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
| + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | |
| + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; |
| + if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + memReqs.memoryTypeBits, |
| + desiredMemProps, |
| + &typeIndex)) { |
| + // this memory type should always be available |
| + SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + memReqs.memoryTypeBits, |
| + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
| + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, |
| + &typeIndex)); |
| + } |
| + heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); |
| + } else { |
| + // this memory type should always be available |
| + SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + memReqs.memoryTypeBits, |
| + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, |
| + &typeIndex)); |
| + if (memReqs.size < 16 * 1024) { |
| + heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); |
| + } else { |
| + heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); |
| + } |
| + } |
| + |
| + if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { |
| + SkDebugf("Failed to alloc image\n"); |
| return false; |
| } |
| - // for now, offset is always 0 |
| - alloc->fOffset = 0; |
| // Bind Memory to device |
| VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image, |
| alloc->fMemory, alloc->fOffset)); |
| if (err) { |
| - GR_VK_CALL(iface, FreeMemory(device, alloc->fMemory, nullptr)); |
| + SkASSERT_RELEASE(heap->free(*alloc)); |
| return false; |
| } |
| + |
| + gTotalImageMemory += alloc->fSize; |
| + |
| + VkDeviceSize alignedSize = (alloc->fSize + 16*1024 - 1) & ~(16*1024 - 1); |
| + gTotalImageMemoryFullPage += alignedSize; |
| + |
| return true; |
| } |
| -void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc) { |
| - const GrVkInterface* iface = gpu->vkInterface(); |
| - GR_VK_CALL(iface, FreeMemory(gpu->device(), alloc.fMemory, nullptr)); |
| +void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, |
| + const GrVkAlloc& alloc) { |
| + GrVkHeap* heap; |
| + if (linearTiling) { |
| + heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); |
| + } else if (alloc.fSize < 16 * 1024) { |
| + heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); |
| + } else { |
| + heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); |
| + } |
| + if (!heap->free(alloc)) { |
| + // must be an adopted allocation |
| + GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr)); |
| + } else { |
| + gTotalImageMemory -= alloc.fSize; |
| + VkDeviceSize alignedSize = (alloc.fSize + 16 * 1024 - 1) & ~(16 * 1024 - 1); |
| + gTotalImageMemoryFullPage -= alignedSize; |
| + } |
| } |
| VkPipelineStageFlags GrVkMemory::LayoutToPipelineStageFlags(const VkImageLayout layout) { |
| @@ -169,3 +226,309 @@ VkAccessFlags GrVkMemory::LayoutToSrcAccessMask(const VkImageLayout layout) { |
| } |
| return flags; |
| } |
| + |
| +GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, VkDeviceSize size) |
| + : fGpu(gpu) |
| + , fMemoryTypeIndex(memoryTypeIndex) { |
| + |
| + VkMemoryAllocateInfo allocInfo = { |
| + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType |
| + NULL, // pNext |
| + size, // allocationSize |
| + memoryTypeIndex, // memoryTypeIndex |
| + }; |
| + |
| + VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(), |
| + &allocInfo, |
|
egdaniel
2016/06/13 16:44:44
align these will allocate memory?
jvanverth1
2016/06/13 19:59:22
Done.
|
| + nullptr, |
| + &fAlloc)); |
| + |
| + if (VK_SUCCESS == err) { |
| + fSize = size; |
| + fFreeSize = size; |
| + fLargestBlockSize = size; |
| + fLargestBlockOffset = 0; |
| + |
| + Block* block = fFreeList.addToTail(); |
| + block->fOffset = 0; |
| + block->fSize = fSize; |
| + } else { |
| + fSize = 0; |
| + fFreeSize = 0; |
| + fLargestBlockSize = 0; |
| + } |
| +} |
| + |
| +GrVkSubHeap::~GrVkSubHeap() { |
| + const GrVkInterface* iface = fGpu->vkInterface(); |
| + GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr)); |
| + |
| + fFreeList.reset(); |
| +} |
| + |
| +static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) { |
| + return (size + alignment - 1) & ~(alignment - 1); |
| +} |
| + |
| +bool GrVkSubHeap::alloc(VkDeviceSize size, VkDeviceSize alignment, GrVkAlloc* alloc) { |
| + VkDeviceSize alignedSize = align_size(size, alignment); |
| + |
| + // find the smallest block big enough for our allocation |
| + FreeList::Iter iter = fFreeList.headIter(); |
| + FreeList::Iter bestFitIter; |
| + VkDeviceSize bestFitSize = fSize + 1; |
| + VkDeviceSize secondLargestSize = 0; |
| + VkDeviceSize secondLargestOffset = 0; |
| + while (iter.get()) { |
| + Block* block = iter.get(); |
| + // need to adjust size to match desired alignment |
| + VkDeviceSize alignedDiff = align_size(block->fOffset, alignment) - block->fOffset; |
| + VkDeviceSize blockAlignedSize = block->fSize - alignedDiff; |
| + if (blockAlignedSize >= alignedSize && blockAlignedSize < bestFitSize) { |
|
egdaniel
2016/06/13 16:44:44
from our conversation, I think a better criteria f
jvanverth1
2016/06/13 19:59:22
I've modified it to have a fixed alignment for a g
|
| + bestFitIter = iter; |
| + bestFitSize = blockAlignedSize; |
| + } |
| + if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) { |
| + secondLargestSize = block->fSize; |
| + secondLargestOffset = block->fOffset; |
| + } |
| + iter.next(); |
| + } |
| + SkASSERT(secondLargestSize <= fLargestBlockSize); |
| + |
| + Block* bestFit = bestFitIter.get(); |
| + if (bestFit) { |
| + alloc->fMemory = fAlloc; |
| + alloc->fOffset = align_size(bestFit->fOffset, alignment); |
| + alloc->fSize = alignedSize; |
| + VkDeviceSize originalBestFitOffset = bestFit->fOffset; |
| + // if there's an unaligned area at the start of the block, |
| + // we need to add a new block to hold it |
| + VkDeviceSize padSize = 0; |
| + if (alloc->fOffset != bestFit->fOffset) { |
| + Block* pad = fFreeList.addBefore(bestFitIter); |
| + pad->fOffset = bestFit->fOffset; |
| + pad->fSize = alloc->fOffset - bestFit->fOffset; |
| + padSize = pad->fSize; |
| + bestFit->fOffset += padSize; |
| + bestFit->fSize -= padSize; |
| + } |
| + // adjust or remove current block |
| + if (bestFit->fSize > alignedSize) { |
| + bestFit->fOffset += alignedSize; |
| + bestFit->fSize -= alignedSize; |
| + if (fLargestBlockOffset == originalBestFitOffset) { |
| + if (bestFit->fSize >= secondLargestSize) { |
| + fLargestBlockSize = bestFit->fSize; |
| + fLargestBlockOffset = bestFit->fOffset; |
| + } else { |
| + fLargestBlockSize = secondLargestSize; |
| + fLargestBlockOffset = secondLargestOffset; |
| + } |
| + } |
| +#ifdef SK_DEBUG |
| + VkDeviceSize largestSize = 0; |
| + iter = fFreeList.headIter(); |
| + while (iter.get()) { |
| + Block* block = iter.get(); |
| + if (largestSize < block->fSize) { |
| + largestSize = block->fSize; |
| + } |
| + iter.next(); |
| + } |
| + SkASSERT(largestSize == fLargestBlockSize) |
| +#endif |
| + } else { |
| + SkASSERT(bestFit->fSize == alignedSize); |
| + if (fLargestBlockOffset == originalBestFitOffset) { |
| + fLargestBlockSize = secondLargestSize; |
| + fLargestBlockOffset = secondLargestOffset; |
| + } |
| + fFreeList.remove(bestFit); |
| +#ifdef SK_DEBUG |
| + VkDeviceSize largestSize = 0; |
| + iter = fFreeList.headIter(); |
| + while (iter.get()) { |
| + Block* block = iter.get(); |
| + if (largestSize < block->fSize) { |
| + largestSize = block->fSize; |
| + } |
| + iter.next(); |
| + } |
| + SkASSERT(largestSize == fLargestBlockSize); |
| +#endif |
| + } |
| + fFreeSize -= alignedSize; |
| + |
| + return true; |
| + } |
| + |
| + SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize); |
| + |
| + return false; |
| +} |
| + |
| + |
| +void GrVkSubHeap::free(const GrVkAlloc& alloc) { |
| + SkASSERT(alloc.fMemory == fAlloc); |
| + |
| + // find the block right after this allocation |
| + FreeList::Iter iter = fFreeList.headIter(); |
| + while (iter.get() && iter.get()->fOffset < alloc.fOffset) { |
| + iter.next(); |
| + } |
| + FreeList::Iter prev = iter; |
| + prev.prev(); |
| + // we have four cases: |
| + // we exactly follow the previous one |
| + Block* block; |
| + if (prev.get() && prev.get()->fOffset + prev.get()->fSize == alloc.fOffset) { |
| + block = prev.get(); |
| + block->fSize += alloc.fSize; |
| + if (block->fOffset == fLargestBlockOffset) { |
| + fLargestBlockSize = block->fSize; |
| + } |
| + // and additionally we may exactly precede the next one |
| + if (iter.get() && iter.get()->fOffset == alloc.fOffset + alloc.fSize) { |
| + block->fSize += iter.get()->fSize; |
| + if (iter.get()->fOffset == fLargestBlockOffset) { |
| + fLargestBlockOffset = block->fOffset; |
| + fLargestBlockSize = block->fSize; |
| + } |
| + fFreeList.remove(iter.get()); |
| + } |
| + // or we only exactly proceed the next one |
| + } else if (iter.get() && iter.get()->fOffset == alloc.fOffset + alloc.fSize) { |
| + block = iter.get(); |
| + block->fSize += alloc.fSize; |
| + if (block->fOffset == fLargestBlockOffset) { |
| + fLargestBlockOffset = alloc.fOffset; |
| + fLargestBlockSize = block->fSize; |
| + } |
| + block->fOffset = alloc.fOffset; |
| + // or we fall somewhere in between, with gaps |
| + } else { |
| + block = fFreeList.addBefore(iter); |
| + block->fOffset = alloc.fOffset; |
| + block->fSize = alloc.fSize; |
| + } |
| + fFreeSize += alloc.fSize; |
| + if (block->fSize > fLargestBlockSize) { |
| + fLargestBlockSize = block->fSize; |
| + fLargestBlockOffset = block->fOffset; |
| + } |
| + |
| +#ifdef SK_DEBUG |
| + VkDeviceSize largestSize = 0; |
| + iter = fFreeList.headIter(); |
| + while (iter.get()) { |
| + Block* block = iter.get(); |
| + if (largestSize < block->fSize) { |
| + largestSize = block->fSize; |
| + } |
| + iter.next(); |
| + } |
| + SkASSERT(fLargestBlockSize == largestSize); |
| +#endif |
| +} |
| + |
| +GrVkHeap::~GrVkHeap() { |
| + // TODO: figure out auto delete |
| + for (auto i = 0; i < fSubHeaps.count(); ++i) { |
| + delete fSubHeaps[i]; |
| + fSubHeaps[i] = nullptr; |
| + } |
| +} |
| + |
| +bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment, |
| + uint32_t memoryTypeIndex, GrVkAlloc* alloc) { |
| + VkDeviceSize alignedSize = align_size(size, alignment); |
| + |
| + // first try to find a subheap that fits our allocation request |
| + int bestFitIndex = -1; |
| + VkDeviceSize bestFitSize = 0x7FFFFFFF; |
| + for (auto i = 0; i < fSubHeaps.count(); ++i) { |
| + if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex) { |
| + VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize(); |
| + if (heapSize > alignedSize && heapSize < bestFitSize) { |
| + bestFitIndex = i; |
| + bestFitSize = heapSize; |
| + } |
| + } |
| + } |
| + |
| + // TODO: how to make sure freesize == size of largest free block? |
| + if (bestFitIndex >= 0) { |
| + if (fSubHeaps[bestFitIndex]->alloc(size, alignment, alloc)) { |
| + fUsedSize += alloc->fSize; |
| + SkASSERT(fUsedSize < 256 * 1024 * 1024); |
| + return true; |
| + } |
| + return false; |
| + } |
| + |
| + // need to allocate a new subheap |
| + GrVkSubHeap*& subHeap = fSubHeaps.push_back(); |
| + subHeap = new GrVkSubHeap(fGpu, memoryTypeIndex, fSubHeapSize); |
| + fAllocSize += fSubHeapSize; |
| + if (subHeap->alloc(size, alignment, alloc)) { |
| + fUsedSize += alloc->fSize; |
| + SkASSERT(fUsedSize < 256 * 1024 * 1024); |
| + return true; |
| + } |
| + |
| + return false; |
| +} |
| + |
| +bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment, |
| + uint32_t memoryTypeIndex, GrVkAlloc* alloc) { |
| + VkDeviceSize alignedSize = align_size(size, alignment); |
| + |
| + // first try to find an unallocated subheap that fits our allocation request |
| + int bestFitIndex = -1; |
| + VkDeviceSize bestFitSize = 0x7FFFFFFF; |
| + for (auto i = 0; i < fSubHeaps.count(); ++i) { |
| + if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && fSubHeaps[i]->unallocated()) { |
| + VkDeviceSize heapSize = fSubHeaps[i]->size(); |
| + if (heapSize > alignedSize && heapSize < bestFitSize) { |
| + bestFitIndex = i; |
| + bestFitSize = heapSize; |
| + } |
| + } |
| + } |
| + |
| + if (bestFitIndex >= 0) { |
| + if (fSubHeaps[bestFitIndex]->alloc(size, alignment, alloc)) { |
| + fUsedSize += alloc->fSize; |
| + SkASSERT(fUsedSize < 256 * 1024 * 1024); |
| + return true; |
| + } |
| + return false; |
| + } |
| + |
| + // need to allocate a new subheap |
| + GrVkSubHeap*& subHeap = fSubHeaps.push_back(); |
| + subHeap = new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize); |
| + fAllocSize += alignedSize; |
| + if (subHeap->alloc(size, alignment, alloc)) { |
| + fUsedSize += alloc->fSize; |
| + SkASSERT(fUsedSize < 256 * 1024 * 1024); |
| + return true; |
| + } |
| + |
| + return false; |
| +} |
| + |
| +bool GrVkHeap::free(const GrVkAlloc& alloc) { |
| + for (auto i = 0; i < fSubHeaps.count(); ++i) { |
| + if (fSubHeaps[i]->memory() == alloc.fMemory) { |
| + fSubHeaps[i]->free(alloc); |
| + fUsedSize -= alloc.fSize; |
| + return true; |
| + } |
| + } |
| + |
| + return false; |
| +} |
| + |
| + |