OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "GrVkMemory.h" | 8 #include "GrVkMemory.h" |
9 | 9 |
10 #include "GrVkGpu.h" | 10 #include "GrVkGpu.h" |
11 #include "GrVkUtil.h" | 11 #include "GrVkUtil.h" |
12 | 12 |
13 static bool get_valid_memory_type_index(VkPhysicalDeviceMemoryProperties physDev MemProps, | 13 static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps, |
14 uint32_t typeBits, | 14 uint32_t typeBits, |
15 VkMemoryPropertyFlags requestedMemFlags, | 15 VkMemoryPropertyFlags requestedMemFlags, |
16 uint32_t* typeIndex) { | 16 uint32_t* typeIndex) { |
17 uint32_t checkBit = 1; | 17 uint32_t checkBit = 1; |
18 for (uint32_t i = 0; i < 32; ++i) { | 18 for (uint32_t i = 0; i < 32; ++i) { |
19 if (typeBits & checkBit) { | 19 if (typeBits & checkBit) { |
20 uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFla gs & | 20 uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFla gs & |
21 requestedMemFlags; | 21 requestedMemFlags; |
22 if (supportedFlags == requestedMemFlags) { | 22 if (supportedFlags == requestedMemFlags) { |
23 *typeIndex = i; | 23 *typeIndex = i; |
(...skipping 26 matching lines...) Expand all Loading... | |
50 VkBuffer buffer, | 50 VkBuffer buffer, |
51 GrVkBuffer::Type type, | 51 GrVkBuffer::Type type, |
52 bool dynamic, | 52 bool dynamic, |
53 GrVkAlloc* alloc) { | 53 GrVkAlloc* alloc) { |
54 const GrVkInterface* iface = gpu->vkInterface(); | 54 const GrVkInterface* iface = gpu->vkInterface(); |
55 VkDevice device = gpu->device(); | 55 VkDevice device = gpu->device(); |
56 | 56 |
57 VkMemoryRequirements memReqs; | 57 VkMemoryRequirements memReqs; |
58 GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); | 58 GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); |
59 | 59 |
60 VkMemoryPropertyFlags desiredMemProps = dynamic ? VK_MEMORY_PROPERTY_HOST_VI SIBLE_BIT | | 60 VkMemoryPropertyFlags desiredMemProps = dynamic ? VK_MEMORY_PROPERTY_HOST_VI SIBLE_BIT | |
egdaniel
2016/09/15 20:42:17
Is it possible that there is no speed up because w
jvanverth1
2016/09/19 19:40:46
If there's an option that is non-coherent but cach
egdaniel
2016/09/19 19:56:21
So I definitely thing we should prefer one of non-
jvanverth1
2016/09/20 14:03:55
Reading the spec, a host visible option that is ne
egdaniel
2016/09/20 14:10:26
Ah right I forgot there was a required set of poss
| |
61 VK_MEMORY_PROPERTY_HOST_CO HERENT_BIT | | |
62 VK_MEMORY_PROPERTY_HOST_CA CHED_BIT | 61 VK_MEMORY_PROPERTY_HOST_CA CHED_BIT |
63 : VK_MEMORY_PROPERTY_DEVICE_ LOCAL_BIT; | 62 : VK_MEMORY_PROPERTY_DEVICE_ LOCAL_BIT; |
64 uint32_t typeIndex = 0; | 63 uint32_t typeIndex = 0; |
65 if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), | 64 const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceM emoryProperties(); |
65 if (!get_valid_memory_type_index(phDevMemProps, | |
66 memReqs.memoryTypeBits, | 66 memReqs.memoryTypeBits, |
67 desiredMemProps, | 67 desiredMemProps, |
68 &typeIndex)) { | 68 &typeIndex)) { |
69 // this memory type should always be available | 69 // this memory type should always be available |
70 SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryPr operties(), | 70 SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, |
71 memReqs.memoryTypeBits, | 71 memReqs.memoryTypeBits, |
72 VK_MEMORY_PROPERTY_HOST_VIS IBLE_BIT | | 72 VK_MEMORY_PROPERTY_HOST_VIS IBLE_BIT, |
73 VK_MEMORY_PROPERTY_HOST_COH ERENT_BIT, | |
74 &typeIndex)); | 73 &typeIndex)); |
75 } | 74 } |
76 | 75 |
77 GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); | 76 GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); |
78 | 77 |
79 if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { | 78 if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { |
80 SkDebugf("Failed to alloc buffer\n"); | 79 SkDebugf("Failed to alloc buffer\n"); |
81 return false; | 80 return false; |
82 } | 81 } |
83 | 82 |
84 // Bind Memory to device | 83 // Bind Memory to device |
85 VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, | 84 VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, |
86 alloc->fMemory, alloc->fOf fset)); | 85 alloc->fMemory, alloc->fOf fset)); |
87 if (err) { | 86 if (err) { |
88 SkASSERT_RELEASE(heap->free(*alloc)); | 87 SkASSERT_RELEASE(heap->free(*alloc)); |
89 return false; | 88 return false; |
90 } | 89 } |
91 | 90 |
91 VkMemoryPropertyFlags prFlags = phDevMemProps.memoryTypes[typeIndex].propert yFlags; | |
92 alloc->fFlags = prFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 | |
93 : GrVkAlloc:: kNoncoherent_Flag; | |
94 | |
92 return true; | 95 return true; |
93 } | 96 } |
94 | 97 |
95 void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, | 98 void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, |
96 const GrVkAlloc& alloc) { | 99 const GrVkAlloc& alloc) { |
97 | 100 |
98 GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); | 101 GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); |
99 SkASSERT_RELEASE(heap->free(alloc)); | 102 SkASSERT_RELEASE(heap->free(alloc)); |
100 } | 103 } |
101 | 104 |
(...skipping 13 matching lines...) Expand all Loading... | |
115 bool linearTiling, | 118 bool linearTiling, |
116 GrVkAlloc* alloc) { | 119 GrVkAlloc* alloc) { |
117 const GrVkInterface* iface = gpu->vkInterface(); | 120 const GrVkInterface* iface = gpu->vkInterface(); |
118 VkDevice device = gpu->device(); | 121 VkDevice device = gpu->device(); |
119 | 122 |
120 VkMemoryRequirements memReqs; | 123 VkMemoryRequirements memReqs; |
121 GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs)); | 124 GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs)); |
122 | 125 |
123 uint32_t typeIndex = 0; | 126 uint32_t typeIndex = 0; |
124 GrVkHeap* heap; | 127 GrVkHeap* heap; |
128 const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceM emoryProperties(); | |
125 if (linearTiling) { | 129 if (linearTiling) { |
126 VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_ BIT | | 130 VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_ BIT | |
127 VK_MEMORY_PROPERTY_HOST_COHERENT _BIT | | |
128 VK_MEMORY_PROPERTY_HOST_CACHED_B IT; | 131 VK_MEMORY_PROPERTY_HOST_CACHED_B IT; |
129 if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), | 132 if (!get_valid_memory_type_index(phDevMemProps, |
130 memReqs.memoryTypeBits, | 133 memReqs.memoryTypeBits, |
131 desiredMemProps, | 134 desiredMemProps, |
132 &typeIndex)) { | 135 &typeIndex)) { |
133 // this memory type should always be available | 136 // this memory type should always be available |
134 SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemo ryProperties(), | 137 SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, |
135 memReqs.memoryTypeBits, | 138 memReqs.memoryTypeBits, |
136 VK_MEMORY_PROPERTY_HOST _VISIBLE_BIT | | 139 VK_MEMORY_PROPERTY_HOST _VISIBLE_BIT, |
137 VK_MEMORY_PROPERTY_HOST _COHERENT_BIT, | |
138 &typeIndex)); | 140 &typeIndex)); |
139 } | 141 } |
140 heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); | 142 heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); |
141 } else { | 143 } else { |
142 // this memory type should always be available | 144 // this memory type should always be available |
143 SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryPr operties(), | 145 SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, |
144 memReqs.memoryTypeBits, | 146 memReqs.memoryTypeBits, |
145 VK_MEMORY_PROPERTY_DEVICE_L OCAL_BIT, | 147 VK_MEMORY_PROPERTY_DEVICE_L OCAL_BIT, |
146 &typeIndex)); | 148 &typeIndex)); |
147 if (memReqs.size <= kMaxSmallImageSize) { | 149 if (memReqs.size <= kMaxSmallImageSize) { |
148 heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); | 150 heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); |
149 } else { | 151 } else { |
150 heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); | 152 heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); |
151 } | 153 } |
152 } | 154 } |
153 | 155 |
154 if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { | 156 if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { |
155 SkDebugf("Failed to alloc image\n"); | 157 SkDebugf("Failed to alloc image\n"); |
156 return false; | 158 return false; |
157 } | 159 } |
158 | 160 |
159 // Bind Memory to device | 161 // Bind Memory to device |
160 VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image, | 162 VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image, |
161 alloc->fMemory, alloc->fOffset)); | 163 alloc->fMemory, alloc->fOffset)); |
162 if (err) { | 164 if (err) { |
163 SkASSERT_RELEASE(heap->free(*alloc)); | 165 SkASSERT_RELEASE(heap->free(*alloc)); |
164 return false; | 166 return false; |
165 } | 167 } |
166 | 168 |
169 VkMemoryPropertyFlags prFlags = phDevMemProps.memoryTypes[typeIndex].propert yFlags; | |
170 alloc->fFlags = prFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 | |
171 : GrVkAlloc:: kNoncoherent_Flag; | |
172 | |
167 gTotalImageMemory += alloc->fSize; | 173 gTotalImageMemory += alloc->fSize; |
168 | 174 |
169 VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize); | 175 VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize); |
170 gTotalImageMemoryFullPage += pageAlignedSize; | 176 gTotalImageMemoryFullPage += pageAlignedSize; |
171 | 177 |
172 return true; | 178 return true; |
173 } | 179 } |
174 | 180 |
175 void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, | 181 void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, |
176 const GrVkAlloc& alloc) { | 182 const GrVkAlloc& alloc) { |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
237 } else if (VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL == layout) { | 243 } else if (VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL == layout) { |
238 flags = VK_ACCESS_TRANSFER_WRITE_BIT; | 244 flags = VK_ACCESS_TRANSFER_WRITE_BIT; |
239 } else if (VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL == layout) { | 245 } else if (VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL == layout) { |
240 flags = VK_ACCESS_TRANSFER_READ_BIT; | 246 flags = VK_ACCESS_TRANSFER_READ_BIT; |
241 } else if (VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL == layout) { | 247 } else if (VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL == layout) { |
242 flags = VK_ACCESS_SHADER_READ_BIT; | 248 flags = VK_ACCESS_SHADER_READ_BIT; |
243 } | 249 } |
244 return flags; | 250 return flags; |
245 } | 251 } |
246 | 252 |
253 void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { | |
254 if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { | |
255 VkMappedMemoryRange mappedMemoryRange; | |
256 memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); | |
257 mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; | |
258 mappedMemoryRange.memory = alloc.fMemory; | |
259 mappedMemoryRange.offset = alloc.fOffset; | |
260 mappedMemoryRange.size = alloc.fSize; | |
egdaniel
2016/09/15 20:42:17
should we not be tracking which memory gets change
jvanverth1
2016/09/19 19:40:46
Currently we just map, the client does what they l
egdaniel
2016/09/19 19:56:21
sgtm
| |
261 // TODO: batch these into a single call before command buffer submit? | |
egdaniel
2016/09/15 20:42:17
We actaully do a pretty good job of already batchi
jvanverth1
2016/09/19 19:40:46
Acknowledged.
| |
262 // What does the spec mean by "the host writes have completed"? | |
egdaniel
2016/09/15 20:42:17
My guess is this is involved if different thread i
jvanverth1
2016/09/19 19:40:46
Acknowledged.
| |
263 GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), | |
264 1, &mappedMemoryR ange)); | |
265 } | |
266 } | |
267 | |
268 void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& allo c) { | |
269 if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { | |
270 VkMappedMemoryRange mappedMemoryRange; | |
271 memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); | |
272 mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; | |
273 mappedMemoryRange.memory = alloc.fMemory; | |
274 mappedMemoryRange.offset = alloc.fOffset; | |
275 mappedMemoryRange.size = alloc.fSize; | |
276 // we only use this for readPixels, so probably no need to batch | |
277 GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device( ), | |
278 1, &mappedMemoryR ange)); | |
279 } | |
280 } | |
281 | |
247 bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize, | 282 bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize, |
248 VkDeviceSize* allocOffset, VkDeviceSize* allocSize ) { | 283 VkDeviceSize* allocOffset, VkDeviceSize* allocSize ) { |
249 VkDeviceSize alignedSize = align_size(requestedSize, fAlignment); | 284 VkDeviceSize alignedSize = align_size(requestedSize, fAlignment); |
250 | 285 |
251 // find the smallest block big enough for our allocation | 286 // find the smallest block big enough for our allocation |
252 FreeList::Iter iter = fFreeList.headIter(); | 287 FreeList::Iter iter = fFreeList.headIter(); |
253 FreeList::Iter bestFitIter; | 288 FreeList::Iter bestFitIter; |
254 VkDeviceSize bestFitSize = fSize + 1; | 289 VkDeviceSize bestFitSize = fSize + 1; |
255 VkDeviceSize secondLargestSize = 0; | 290 VkDeviceSize secondLargestSize = 0; |
256 VkDeviceSize secondLargestOffset = 0; | 291 VkDeviceSize secondLargestOffset = 0; |
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
553 fSubHeaps[i]->free(alloc); | 588 fSubHeaps[i]->free(alloc); |
554 fUsedSize -= alloc.fSize; | 589 fUsedSize -= alloc.fSize; |
555 return true; | 590 return true; |
556 } | 591 } |
557 } | 592 } |
558 | 593 |
559 return false; | 594 return false; |
560 } | 595 } |
561 | 596 |
562 | 597 |
OLD | NEW |