Chromium Code Reviews| Index: src/gpu/vk/GrVkMemory.cpp |
| diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp |
| index 48bea9c67ef762774e0ca1a968ae2b6656d4506c..db2e335d884b1e16c704f07b641d7b510a7e957a 100644 |
| --- a/src/gpu/vk/GrVkMemory.cpp |
| +++ b/src/gpu/vk/GrVkMemory.cpp |
| @@ -10,7 +10,7 @@ |
| #include "GrVkGpu.h" |
| #include "GrVkUtil.h" |
| -static bool get_valid_memory_type_index(VkPhysicalDeviceMemoryProperties physDevMemProps, |
| +static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps, |
| uint32_t typeBits, |
| VkMemoryPropertyFlags requestedMemFlags, |
| uint32_t* typeIndex) { |
| @@ -58,19 +58,18 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, |
| GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); |
| VkMemoryPropertyFlags desiredMemProps = dynamic ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
|
egdaniel
2016/09/15 20:42:17
Is it possible that there is no speed up because w
jvanverth1
2016/09/19 19:40:46
If there's an option that is non-coherent but cach
egdaniel
2016/09/19 19:56:21
So I definitely thing we should prefer one of non-
jvanverth1
2016/09/20 14:03:55
Reading the spec, a host visible option that is ne
egdaniel
2016/09/20 14:10:26
Ah right I forgot there was a required set of poss
|
| - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | |
| VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
| : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; |
| uint32_t typeIndex = 0; |
| - if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); |
| + if (!get_valid_memory_type_index(phDevMemProps, |
| memReqs.memoryTypeBits, |
| desiredMemProps, |
| &typeIndex)) { |
| // this memory type should always be available |
| - SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, |
| memReqs.memoryTypeBits, |
| - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
| - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, |
| + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, |
| &typeIndex)); |
| } |
| @@ -89,6 +88,10 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, |
| return false; |
| } |
| + VkMemoryPropertyFlags prFlags = phDevMemProps.memoryTypes[typeIndex].propertyFlags; |
| + alloc->fFlags = prFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 |
| + : GrVkAlloc::kNoncoherent_Flag; |
| + |
| return true; |
| } |
| @@ -122,25 +125,24 @@ bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, |
| uint32_t typeIndex = 0; |
| GrVkHeap* heap; |
| + const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); |
| if (linearTiling) { |
| VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
| - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | |
| VK_MEMORY_PROPERTY_HOST_CACHED_BIT; |
| - if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + if (!get_valid_memory_type_index(phDevMemProps, |
| memReqs.memoryTypeBits, |
| desiredMemProps, |
| &typeIndex)) { |
| // this memory type should always be available |
| - SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, |
| memReqs.memoryTypeBits, |
| - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
| - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, |
| + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, |
| &typeIndex)); |
| } |
| heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); |
| } else { |
| // this memory type should always be available |
| - SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), |
| + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, |
| memReqs.memoryTypeBits, |
| VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, |
| &typeIndex)); |
| @@ -164,6 +166,10 @@ bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, |
| return false; |
| } |
| + VkMemoryPropertyFlags prFlags = phDevMemProps.memoryTypes[typeIndex].propertyFlags; |
| + alloc->fFlags = prFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 |
| + : GrVkAlloc::kNoncoherent_Flag; |
| + |
| gTotalImageMemory += alloc->fSize; |
| VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize); |
| @@ -244,6 +250,35 @@ VkAccessFlags GrVkMemory::LayoutToSrcAccessMask(const VkImageLayout layout) { |
| return flags; |
| } |
| +void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { |
| + if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { |
| + VkMappedMemoryRange mappedMemoryRange; |
| + memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); |
| + mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; |
| + mappedMemoryRange.memory = alloc.fMemory; |
| + mappedMemoryRange.offset = alloc.fOffset; |
| + mappedMemoryRange.size = alloc.fSize; |
|
egdaniel
2016/09/15 20:42:17
should we not be tracking which memory gets change
jvanverth1
2016/09/19 19:40:46
Currently we just map, the client does what they l
egdaniel
2016/09/19 19:56:21
sgtm
|
| + // TODO: batch these into a single call before command buffer submit? |
|
egdaniel
2016/09/15 20:42:17
We actaully do a pretty good job of already batchi
jvanverth1
2016/09/19 19:40:46
Acknowledged.
|
| + // What does the spec mean by "the host writes have completed"? |
|
egdaniel
2016/09/15 20:42:17
My guess is this is involved if different thread i
jvanverth1
2016/09/19 19:40:46
Acknowledged.
|
| + GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), |
| + 1, &mappedMemoryRange)); |
| + } |
| +} |
| + |
| +void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { |
| + if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { |
| + VkMappedMemoryRange mappedMemoryRange; |
| + memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); |
| + mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; |
| + mappedMemoryRange.memory = alloc.fMemory; |
| + mappedMemoryRange.offset = alloc.fOffset; |
| + mappedMemoryRange.size = alloc.fSize; |
| + // we only use this for readPixels, so probably no need to batch |
| + GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), |
| + 1, &mappedMemoryRange)); |
| + } |
| +} |
| + |
| bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize, |
| VkDeviceSize* allocOffset, VkDeviceSize* allocSize) { |
| VkDeviceSize alignedSize = align_size(requestedSize, fAlignment); |