turnip: Don't bother clamping VB size.
[mesa.git] / src / freedreno / vulkan / tu_pass.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27 #include "tu_private.h"
28
29 #include "vk_util.h"
30 #include "vk_format.h"
31
32 static void update_samples(struct tu_subpass *subpass,
33 VkSampleCountFlagBits samples)
34 {
35 assert(subpass->samples == 0 || subpass->samples == samples);
36 subpass->samples = samples;
37 }
38
39 static void
40 create_render_pass_common(struct tu_render_pass *pass,
41 const struct tu_physical_device *phys_dev)
42 {
43 uint32_t block_align_shift = 4; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
44 uint32_t tile_align_w = phys_dev->tile_align_w;
45 uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * TILE_ALIGN_H;
46
47 /* calculate total bytes per pixel */
48 uint32_t cpp_total = 0;
49 for (uint32_t i = 0; i < pass->attachment_count; i++) {
50 struct tu_render_pass_attachment *att = &pass->attachments[i];
51 if (att->gmem_offset >= 0) {
52 cpp_total += att->cpp;
53 /* texture pitch must be aligned to 64, use a tile_align_w that is
54 * a multiple of 64 for cpp==1 attachment to work as input attachment
55 */
56 if (att->cpp == 1 && tile_align_w % 64 != 0) {
57 tile_align_w *= 2;
58 block_align_shift -= 1;
59 }
60 }
61 }
62
63 pass->tile_align_w = tile_align_w;
64
65 /* no gmem attachments */
66 if (cpp_total == 0) {
67 /* any value non-zero value so tiling config works with no attachments */
68 pass->gmem_pixels = 1024*1024;
69 return;
70 }
71
72 /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
73 * doesn't break things. maybe there is a better solution?
74 * TODO: this algorithm isn't optimal
75 * for example, two attachments with cpp = {1, 4}
76 * result: nblocks = {12, 52}, pixels = 196608
77 * optimal: nblocks = {13, 51}, pixels = 208896
78 */
79 uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
80 uint32_t offset = 0, pixels = ~0u;
81 for (uint32_t i = 0; i < pass->attachment_count; i++) {
82 struct tu_render_pass_attachment *att = &pass->attachments[i];
83 if (att->gmem_offset < 0)
84 continue;
85
86 att->gmem_offset = offset;
87
88 uint32_t align = MAX2(1, att->cpp >> block_align_shift);
89 uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
90
91 gmem_blocks -= nblocks;
92 cpp_total -= att->cpp;
93 offset += nblocks * gmem_align;
94 pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
95 }
96
97 pass->gmem_pixels = pixels;
98
99 for (uint32_t i = 0; i < pass->subpass_count; i++) {
100 struct tu_subpass *subpass = &pass->subpasses[i];
101
102 subpass->srgb_cntl = 0;
103
104 for (uint32_t i = 0; i < subpass->color_count; ++i) {
105 uint32_t a = subpass->color_attachments[i].attachment;
106 if (a == VK_ATTACHMENT_UNUSED)
107 continue;
108
109 if (vk_format_is_srgb(pass->attachments[a].format))
110 subpass->srgb_cntl |= 1 << i;
111 }
112 }
113
114 /* disable unused attachments */
115 for (uint32_t i = 0; i < pass->attachment_count; i++) {
116 struct tu_render_pass_attachment *att = &pass->attachments[i];
117 if (att->gmem_offset < 0) {
118 att->clear_mask = 0;
119 att->load = false;
120 }
121 }
122 }
123
124 static void
125 attachment_set_ops(struct tu_render_pass_attachment *att,
126 VkAttachmentLoadOp load_op,
127 VkAttachmentLoadOp stencil_load_op,
128 VkAttachmentStoreOp store_op,
129 VkAttachmentStoreOp stencil_store_op)
130 {
131 /* load/store ops */
132 att->clear_mask =
133 (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
134 att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
135 att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
136
137 bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
138 bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
139 bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
140
141 switch (att->format) {
142 case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
143 if (att->clear_mask)
144 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
145 if (stencil_clear)
146 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
147 if (stencil_load)
148 att->load = true;
149 if (stencil_store)
150 att->store = true;
151 break;
152 case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
153 att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
154 att->load = stencil_load;
155 att->store = stencil_store;
156 break;
157 default:
158 break;
159 }
160 }
161
162 VkResult
163 tu_CreateRenderPass(VkDevice _device,
164 const VkRenderPassCreateInfo *pCreateInfo,
165 const VkAllocationCallbacks *pAllocator,
166 VkRenderPass *pRenderPass)
167 {
168 TU_FROM_HANDLE(tu_device, device, _device);
169 struct tu_render_pass *pass;
170 size_t size;
171 size_t attachments_offset;
172
173 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
174
175 size = sizeof(*pass);
176 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
177 attachments_offset = size;
178 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
179
180 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
181 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
182 if (pass == NULL)
183 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
184
185 memset(pass, 0, size);
186 pass->attachment_count = pCreateInfo->attachmentCount;
187 pass->subpass_count = pCreateInfo->subpassCount;
188 pass->attachments = (void *) pass + attachments_offset;
189
190 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
191 struct tu_render_pass_attachment *att = &pass->attachments[i];
192
193 att->format = pCreateInfo->pAttachments[i].format;
194 att->samples = pCreateInfo->pAttachments[i].samples;
195 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
196 att->gmem_offset = -1;
197
198 attachment_set_ops(att,
199 pCreateInfo->pAttachments[i].loadOp,
200 pCreateInfo->pAttachments[i].stencilLoadOp,
201 pCreateInfo->pAttachments[i].storeOp,
202 pCreateInfo->pAttachments[i].stencilStoreOp);
203 }
204
205 uint32_t subpass_attachment_count = 0;
206 struct tu_subpass_attachment *p;
207 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
208 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
209
210 subpass_attachment_count +=
211 desc->inputAttachmentCount + desc->colorAttachmentCount +
212 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
213 }
214
215 if (subpass_attachment_count) {
216 pass->subpass_attachments = vk_alloc2(
217 &device->alloc, pAllocator,
218 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
219 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
220 if (pass->subpass_attachments == NULL) {
221 vk_free2(&device->alloc, pAllocator, pass);
222 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
223 }
224 } else
225 pass->subpass_attachments = NULL;
226
227 p = pass->subpass_attachments;
228 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
229 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
230 struct tu_subpass *subpass = &pass->subpasses[i];
231
232 subpass->input_count = desc->inputAttachmentCount;
233 subpass->color_count = desc->colorAttachmentCount;
234 subpass->samples = 0;
235
236 if (desc->inputAttachmentCount > 0) {
237 subpass->input_attachments = p;
238 p += desc->inputAttachmentCount;
239
240 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
241 uint32_t a = desc->pInputAttachments[j].attachment;
242 subpass->input_attachments[j].attachment = a;
243 if (a != VK_ATTACHMENT_UNUSED)
244 pass->attachments[a].gmem_offset = 0;
245 }
246 }
247
248 if (desc->colorAttachmentCount > 0) {
249 subpass->color_attachments = p;
250 p += desc->colorAttachmentCount;
251
252 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
253 uint32_t a = desc->pColorAttachments[j].attachment;
254 subpass->color_attachments[j].attachment = a;
255
256 if (a != VK_ATTACHMENT_UNUSED) {
257 pass->attachments[a].gmem_offset = 0;
258 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
259 }
260 }
261 }
262
263 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
264 if (desc->pResolveAttachments) {
265 p += desc->colorAttachmentCount;
266 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
267 subpass->resolve_attachments[j].attachment =
268 desc->pResolveAttachments[j].attachment;
269 }
270 }
271
272 uint32_t a = desc->pDepthStencilAttachment ?
273 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
274 subpass->depth_stencil_attachment.attachment = a;
275 if (a != VK_ATTACHMENT_UNUSED) {
276 pass->attachments[a].gmem_offset = 0;
277 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
278 }
279
280 subpass->samples = subpass->samples ?: 1;
281 }
282
283 *pRenderPass = tu_render_pass_to_handle(pass);
284
285 create_render_pass_common(pass, device->physical_device);
286
287 return VK_SUCCESS;
288 }
289
290 VkResult
291 tu_CreateRenderPass2(VkDevice _device,
292 const VkRenderPassCreateInfo2KHR *pCreateInfo,
293 const VkAllocationCallbacks *pAllocator,
294 VkRenderPass *pRenderPass)
295 {
296 TU_FROM_HANDLE(tu_device, device, _device);
297 struct tu_render_pass *pass;
298 size_t size;
299 size_t attachments_offset;
300
301 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
302
303 size = sizeof(*pass);
304 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
305 attachments_offset = size;
306 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
307
308 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
309 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
310 if (pass == NULL)
311 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
312
313 memset(pass, 0, size);
314 pass->attachment_count = pCreateInfo->attachmentCount;
315 pass->subpass_count = pCreateInfo->subpassCount;
316 pass->attachments = (void *) pass + attachments_offset;
317
318 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
319 struct tu_render_pass_attachment *att = &pass->attachments[i];
320
321 att->format = pCreateInfo->pAttachments[i].format;
322 att->samples = pCreateInfo->pAttachments[i].samples;
323 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
324 att->gmem_offset = -1;
325
326 attachment_set_ops(att,
327 pCreateInfo->pAttachments[i].loadOp,
328 pCreateInfo->pAttachments[i].stencilLoadOp,
329 pCreateInfo->pAttachments[i].storeOp,
330 pCreateInfo->pAttachments[i].stencilStoreOp);
331 }
332 uint32_t subpass_attachment_count = 0;
333 struct tu_subpass_attachment *p;
334 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
335 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
336
337 subpass_attachment_count +=
338 desc->inputAttachmentCount + desc->colorAttachmentCount +
339 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
340 }
341
342 if (subpass_attachment_count) {
343 pass->subpass_attachments = vk_alloc2(
344 &device->alloc, pAllocator,
345 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
346 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
347 if (pass->subpass_attachments == NULL) {
348 vk_free2(&device->alloc, pAllocator, pass);
349 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
350 }
351 } else
352 pass->subpass_attachments = NULL;
353
354 p = pass->subpass_attachments;
355 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
356 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
357 struct tu_subpass *subpass = &pass->subpasses[i];
358
359 subpass->input_count = desc->inputAttachmentCount;
360 subpass->color_count = desc->colorAttachmentCount;
361 subpass->samples = 0;
362
363 if (desc->inputAttachmentCount > 0) {
364 subpass->input_attachments = p;
365 p += desc->inputAttachmentCount;
366
367 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
368 uint32_t a = desc->pInputAttachments[j].attachment;
369 subpass->input_attachments[j].attachment = a;
370 if (a != VK_ATTACHMENT_UNUSED)
371 pass->attachments[a].gmem_offset = 0;
372 }
373 }
374
375 if (desc->colorAttachmentCount > 0) {
376 subpass->color_attachments = p;
377 p += desc->colorAttachmentCount;
378
379 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
380 uint32_t a = desc->pColorAttachments[j].attachment;
381 subpass->color_attachments[j].attachment = a;
382
383 if (a != VK_ATTACHMENT_UNUSED) {
384 pass->attachments[a].gmem_offset = 0;
385 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
386 }
387 }
388 }
389
390 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
391 if (desc->pResolveAttachments) {
392 p += desc->colorAttachmentCount;
393 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
394 subpass->resolve_attachments[j].attachment =
395 desc->pResolveAttachments[j].attachment;
396 }
397 }
398
399
400 uint32_t a = desc->pDepthStencilAttachment ?
401 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
402 subpass->depth_stencil_attachment.attachment = a;
403 if (a != VK_ATTACHMENT_UNUSED) {
404 pass->attachments[a].gmem_offset = 0;
405 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
406 }
407
408 subpass->samples = subpass->samples ?: 1;
409 }
410
411 *pRenderPass = tu_render_pass_to_handle(pass);
412
413 create_render_pass_common(pass, device->physical_device);
414
415 return VK_SUCCESS;
416 }
417
418 void
419 tu_DestroyRenderPass(VkDevice _device,
420 VkRenderPass _pass,
421 const VkAllocationCallbacks *pAllocator)
422 {
423 TU_FROM_HANDLE(tu_device, device, _device);
424 TU_FROM_HANDLE(tu_render_pass, pass, _pass);
425
426 if (!_pass)
427 return;
428
429 vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
430 vk_free2(&device->alloc, pAllocator, pass);
431 }
432
433 void
434 tu_GetRenderAreaGranularity(VkDevice _device,
435 VkRenderPass renderPass,
436 VkExtent2D *pGranularity)
437 {
438 pGranularity->width = GMEM_ALIGN_W;
439 pGranularity->height = GMEM_ALIGN_H;
440 }