turnip: divide cube map depth by 6
[mesa.git] / src / freedreno / vulkan / tu_pass.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27 #include "tu_private.h"
28
29 #include "vk_util.h"
30 #include "vk_format.h"
31
32 static void update_samples(struct tu_subpass *subpass,
33 VkSampleCountFlagBits samples)
34 {
35 assert(subpass->samples == 0 || subpass->samples == samples);
36 subpass->samples = samples;
37 }
38
39 #define GMEM_ALIGN 0x4000
40
41 static void
42 create_render_pass_common(struct tu_render_pass *pass,
43 const struct tu_physical_device *phys_dev)
44 {
45 /* calculate total bytes per pixel */
46 uint32_t cpp_total = 0;
47 for (uint32_t i = 0; i < pass->attachment_count; i++) {
48 struct tu_render_pass_attachment *att = &pass->attachments[i];
49 if (att->gmem_offset >= 0)
50 cpp_total += att->cpp;
51 }
52
53 /* no gmem attachments */
54 if (cpp_total == 0) {
55 /* any value non-zero value so tiling config works with no attachments */
56 pass->gmem_pixels = 1024*1024;
57 return;
58 }
59
60 /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
61 * doesn't break things. maybe there is a better solution?
62 * TODO: this algorithm isn't optimal
63 * for example, two attachments with cpp = {1, 4}
64 * result: nblocks = {12, 52}, pixels = 196608
65 * optimal: nblocks = {13, 51}, pixels = 208896
66 */
67 uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / GMEM_ALIGN;
68 uint32_t offset = 0, pixels = ~0u;
69 for (uint32_t i = 0; i < pass->attachment_count; i++) {
70 struct tu_render_pass_attachment *att = &pass->attachments[i];
71 if (att->gmem_offset < 0)
72 continue;
73
74 att->gmem_offset = offset;
75
76 /* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */
77 uint32_t align = MAX2(1, att->cpp / 16);
78 uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
79
80 gmem_blocks -= nblocks;
81 cpp_total -= att->cpp;
82 offset += nblocks * GMEM_ALIGN;
83 pixels = MIN2(pixels, nblocks * GMEM_ALIGN / att->cpp);
84 }
85
86 pass->gmem_pixels = pixels;
87
88 for (uint32_t i = 0; i < pass->subpass_count; i++) {
89 struct tu_subpass *subpass = &pass->subpasses[i];
90
91 subpass->srgb_cntl = 0;
92 subpass->render_components = 0;
93
94 for (uint32_t i = 0; i < subpass->color_count; ++i) {
95 uint32_t a = subpass->color_attachments[i].attachment;
96 if (a == VK_ATTACHMENT_UNUSED)
97 continue;
98
99 subpass->render_components |= 0xf << (i * 4);
100
101 if (vk_format_is_srgb(pass->attachments[a].format))
102 subpass->srgb_cntl |= 1 << i;
103 }
104 }
105
106 /* disable unused attachments */
107 for (uint32_t i = 0; i < pass->attachment_count; i++) {
108 struct tu_render_pass_attachment *att = &pass->attachments[i];
109 if (att->gmem_offset < 0) {
110 att->clear_mask = 0;
111 att->load = false;
112 }
113 }
114 }
115
116 static void
117 attachment_set_ops(struct tu_render_pass_attachment *att,
118 VkAttachmentLoadOp load_op,
119 VkAttachmentLoadOp stencil_load_op,
120 VkAttachmentStoreOp store_op,
121 VkAttachmentStoreOp stencil_store_op)
122 {
123 /* load/store ops */
124 att->clear_mask =
125 (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
126 att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
127 att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
128
129 bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
130 bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
131 bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
132
133 switch (att->format) {
134 case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
135 if (att->clear_mask)
136 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
137 if (stencil_clear)
138 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
139 if (stencil_load)
140 att->load = true;
141 if (stencil_store)
142 att->store = true;
143 break;
144 case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
145 att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
146 att->load = stencil_load;
147 att->store = stencil_store;
148 break;
149 default:
150 break;
151 }
152 }
153
154 VkResult
155 tu_CreateRenderPass(VkDevice _device,
156 const VkRenderPassCreateInfo *pCreateInfo,
157 const VkAllocationCallbacks *pAllocator,
158 VkRenderPass *pRenderPass)
159 {
160 TU_FROM_HANDLE(tu_device, device, _device);
161 struct tu_render_pass *pass;
162 size_t size;
163 size_t attachments_offset;
164
165 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
166
167 size = sizeof(*pass);
168 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
169 attachments_offset = size;
170 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
171
172 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
173 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
174 if (pass == NULL)
175 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
176
177 memset(pass, 0, size);
178 pass->attachment_count = pCreateInfo->attachmentCount;
179 pass->subpass_count = pCreateInfo->subpassCount;
180 pass->attachments = (void *) pass + attachments_offset;
181
182 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
183 struct tu_render_pass_attachment *att = &pass->attachments[i];
184
185 att->format = pCreateInfo->pAttachments[i].format;
186 att->samples = pCreateInfo->pAttachments[i].samples;
187 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
188 att->gmem_offset = -1;
189
190 attachment_set_ops(att,
191 pCreateInfo->pAttachments[i].loadOp,
192 pCreateInfo->pAttachments[i].stencilLoadOp,
193 pCreateInfo->pAttachments[i].storeOp,
194 pCreateInfo->pAttachments[i].stencilStoreOp);
195 }
196
197 uint32_t subpass_attachment_count = 0;
198 struct tu_subpass_attachment *p;
199 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
200 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
201
202 subpass_attachment_count +=
203 desc->inputAttachmentCount + desc->colorAttachmentCount +
204 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
205 }
206
207 if (subpass_attachment_count) {
208 pass->subpass_attachments = vk_alloc2(
209 &device->alloc, pAllocator,
210 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
211 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
212 if (pass->subpass_attachments == NULL) {
213 vk_free2(&device->alloc, pAllocator, pass);
214 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
215 }
216 } else
217 pass->subpass_attachments = NULL;
218
219 p = pass->subpass_attachments;
220 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
221 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
222 struct tu_subpass *subpass = &pass->subpasses[i];
223
224 subpass->input_count = desc->inputAttachmentCount;
225 subpass->color_count = desc->colorAttachmentCount;
226 subpass->samples = 0;
227
228 if (desc->inputAttachmentCount > 0) {
229 subpass->input_attachments = p;
230 p += desc->inputAttachmentCount;
231
232 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
233 uint32_t a = desc->pInputAttachments[j].attachment;
234 subpass->input_attachments[j].attachment = a;
235 if (a != VK_ATTACHMENT_UNUSED)
236 pass->attachments[a].gmem_offset = 0;
237 }
238 }
239
240 if (desc->colorAttachmentCount > 0) {
241 subpass->color_attachments = p;
242 p += desc->colorAttachmentCount;
243
244 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
245 uint32_t a = desc->pColorAttachments[j].attachment;
246 subpass->color_attachments[j].attachment = a;
247
248 if (a != VK_ATTACHMENT_UNUSED) {
249 pass->attachments[a].gmem_offset = 0;
250 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
251 }
252 }
253 }
254
255 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
256 if (desc->pResolveAttachments) {
257 p += desc->colorAttachmentCount;
258 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
259 subpass->resolve_attachments[j].attachment =
260 desc->pResolveAttachments[j].attachment;
261 }
262 }
263
264 uint32_t a = desc->pDepthStencilAttachment ?
265 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
266 subpass->depth_stencil_attachment.attachment = a;
267 if (a != VK_ATTACHMENT_UNUSED) {
268 pass->attachments[a].gmem_offset = 0;
269 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
270 }
271
272 subpass->samples = subpass->samples ?: 1;
273 }
274
275 *pRenderPass = tu_render_pass_to_handle(pass);
276
277 create_render_pass_common(pass, device->physical_device);
278
279 return VK_SUCCESS;
280 }
281
282 VkResult
283 tu_CreateRenderPass2(VkDevice _device,
284 const VkRenderPassCreateInfo2KHR *pCreateInfo,
285 const VkAllocationCallbacks *pAllocator,
286 VkRenderPass *pRenderPass)
287 {
288 TU_FROM_HANDLE(tu_device, device, _device);
289 struct tu_render_pass *pass;
290 size_t size;
291 size_t attachments_offset;
292
293 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
294
295 size = sizeof(*pass);
296 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
297 attachments_offset = size;
298 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
299
300 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
301 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
302 if (pass == NULL)
303 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
304
305 memset(pass, 0, size);
306 pass->attachment_count = pCreateInfo->attachmentCount;
307 pass->subpass_count = pCreateInfo->subpassCount;
308 pass->attachments = (void *) pass + attachments_offset;
309
310 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
311 struct tu_render_pass_attachment *att = &pass->attachments[i];
312
313 att->format = pCreateInfo->pAttachments[i].format;
314 att->samples = pCreateInfo->pAttachments[i].samples;
315 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
316 att->gmem_offset = -1;
317
318 attachment_set_ops(att,
319 pCreateInfo->pAttachments[i].loadOp,
320 pCreateInfo->pAttachments[i].stencilLoadOp,
321 pCreateInfo->pAttachments[i].storeOp,
322 pCreateInfo->pAttachments[i].stencilStoreOp);
323 }
324 uint32_t subpass_attachment_count = 0;
325 struct tu_subpass_attachment *p;
326 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
327 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
328
329 subpass_attachment_count +=
330 desc->inputAttachmentCount + desc->colorAttachmentCount +
331 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
332 }
333
334 if (subpass_attachment_count) {
335 pass->subpass_attachments = vk_alloc2(
336 &device->alloc, pAllocator,
337 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
338 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
339 if (pass->subpass_attachments == NULL) {
340 vk_free2(&device->alloc, pAllocator, pass);
341 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
342 }
343 } else
344 pass->subpass_attachments = NULL;
345
346 p = pass->subpass_attachments;
347 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
348 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
349 struct tu_subpass *subpass = &pass->subpasses[i];
350
351 subpass->input_count = desc->inputAttachmentCount;
352 subpass->color_count = desc->colorAttachmentCount;
353 subpass->samples = 0;
354
355 if (desc->inputAttachmentCount > 0) {
356 subpass->input_attachments = p;
357 p += desc->inputAttachmentCount;
358
359 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
360 uint32_t a = desc->pInputAttachments[j].attachment;
361 subpass->input_attachments[j].attachment = a;
362 if (a != VK_ATTACHMENT_UNUSED)
363 pass->attachments[a].gmem_offset = 0;
364 }
365 }
366
367 if (desc->colorAttachmentCount > 0) {
368 subpass->color_attachments = p;
369 p += desc->colorAttachmentCount;
370
371 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
372 uint32_t a = desc->pColorAttachments[j].attachment;
373 subpass->color_attachments[j].attachment = a;
374
375 if (a != VK_ATTACHMENT_UNUSED) {
376 pass->attachments[a].gmem_offset = 0;
377 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
378 }
379 }
380 }
381
382 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
383 if (desc->pResolveAttachments) {
384 p += desc->colorAttachmentCount;
385 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
386 subpass->resolve_attachments[j].attachment =
387 desc->pResolveAttachments[j].attachment;
388 }
389 }
390
391
392 uint32_t a = desc->pDepthStencilAttachment ?
393 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
394 subpass->depth_stencil_attachment.attachment = a;
395 if (a != VK_ATTACHMENT_UNUSED) {
396 pass->attachments[a].gmem_offset = 0;
397 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
398 }
399
400 subpass->samples = subpass->samples ?: 1;
401 }
402
403 *pRenderPass = tu_render_pass_to_handle(pass);
404
405 create_render_pass_common(pass, device->physical_device);
406
407 return VK_SUCCESS;
408 }
409
410 void
411 tu_DestroyRenderPass(VkDevice _device,
412 VkRenderPass _pass,
413 const VkAllocationCallbacks *pAllocator)
414 {
415 TU_FROM_HANDLE(tu_device, device, _device);
416 TU_FROM_HANDLE(tu_render_pass, pass, _pass);
417
418 if (!_pass)
419 return;
420
421 vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
422 vk_free2(&device->alloc, pAllocator, pass);
423 }
424
425 void
426 tu_GetRenderAreaGranularity(VkDevice _device,
427 VkRenderPass renderPass,
428 VkExtent2D *pGranularity)
429 {
430 pGranularity->width = GMEM_ALIGN_W;
431 pGranularity->height = GMEM_ALIGN_H;
432 }