turnip: minor warning fixes
[mesa.git] / src / freedreno / vulkan / tu_pass.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27 #include "tu_private.h"
28
29 #include "vk_util.h"
30 #include "vk_format.h"
31
32 static void update_samples(struct tu_subpass *subpass,
33 VkSampleCountFlagBits samples)
34 {
35 assert(subpass->samples == 0 || subpass->samples == samples);
36 subpass->samples = samples;
37 }
38
39 #define GMEM_ALIGN 0x4000
40
41 static void
42 compute_gmem_offsets(struct tu_render_pass *pass, uint32_t gmem_size)
43 {
44 /* calculate total bytes per pixel */
45 uint32_t cpp_total = 0;
46 for (uint32_t i = 0; i < pass->attachment_count; i++) {
47 struct tu_render_pass_attachment *att = &pass->attachments[i];
48 if (att->gmem_offset >= 0)
49 cpp_total += att->cpp;
50 }
51
52 /* no gmem attachments */
53 if (cpp_total == 0) {
54 /* any value non-zero value so tiling config works with no attachments */
55 pass->gmem_pixels = 1024*1024;
56 return;
57 }
58
59 /* TODO: this algorithm isn't optimal
60 * for example, two attachments with cpp = {1, 4}
61 * result: nblocks = {12, 52}, pixels = 196608
62 * optimal: nblocks = {13, 51}, pixels = 208896
63 */
64 uint32_t gmem_blocks = gmem_size / GMEM_ALIGN;
65 uint32_t offset = 0, pixels = ~0u;
66 for (uint32_t i = 0; i < pass->attachment_count; i++) {
67 struct tu_render_pass_attachment *att = &pass->attachments[i];
68 if (att->gmem_offset < 0)
69 continue;
70
71 att->gmem_offset = offset;
72
73 /* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */
74 uint32_t align = MAX2(1, att->cpp / 16);
75 uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
76
77 gmem_blocks -= nblocks;
78 cpp_total -= att->cpp;
79 offset += nblocks * GMEM_ALIGN;
80 pixels = MIN2(pixels, nblocks * GMEM_ALIGN / att->cpp);
81 }
82
83 pass->gmem_pixels = pixels;
84 assert(pixels);
85 }
86
87 VkResult
88 tu_CreateRenderPass(VkDevice _device,
89 const VkRenderPassCreateInfo *pCreateInfo,
90 const VkAllocationCallbacks *pAllocator,
91 VkRenderPass *pRenderPass)
92 {
93 TU_FROM_HANDLE(tu_device, device, _device);
94 struct tu_render_pass *pass;
95 size_t size;
96 size_t attachments_offset;
97
98 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
99
100 size = sizeof(*pass);
101 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
102 attachments_offset = size;
103 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
104
105 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
106 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
107 if (pass == NULL)
108 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
109
110 memset(pass, 0, size);
111 pass->attachment_count = pCreateInfo->attachmentCount;
112 pass->subpass_count = pCreateInfo->subpassCount;
113 pass->attachments = (void *) pass + attachments_offset;
114
115 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
116 struct tu_render_pass_attachment *att = &pass->attachments[i];
117
118 att->format = pCreateInfo->pAttachments[i].format;
119 att->cpp = vk_format_get_blocksize(att->format) *
120 pCreateInfo->pAttachments[i].samples;
121 att->load_op = pCreateInfo->pAttachments[i].loadOp;
122 att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
123 att->store_op = pCreateInfo->pAttachments[i].storeOp;
124 if (pCreateInfo->pAttachments[i].stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE &&
125 vk_format_has_stencil(att->format))
126 att->store_op = VK_ATTACHMENT_STORE_OP_STORE;
127 att->gmem_offset = -1;
128 }
129
130 uint32_t subpass_attachment_count = 0;
131 struct tu_subpass_attachment *p;
132 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
133 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
134
135 subpass_attachment_count +=
136 desc->inputAttachmentCount + desc->colorAttachmentCount +
137 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
138 }
139
140 if (subpass_attachment_count) {
141 pass->subpass_attachments = vk_alloc2(
142 &device->alloc, pAllocator,
143 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
144 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
145 if (pass->subpass_attachments == NULL) {
146 vk_free2(&device->alloc, pAllocator, pass);
147 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
148 }
149 } else
150 pass->subpass_attachments = NULL;
151
152 p = pass->subpass_attachments;
153 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
154 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
155 struct tu_subpass *subpass = &pass->subpasses[i];
156
157 subpass->input_count = desc->inputAttachmentCount;
158 subpass->color_count = desc->colorAttachmentCount;
159 subpass->samples = 0;
160
161 if (desc->inputAttachmentCount > 0) {
162 subpass->input_attachments = p;
163 p += desc->inputAttachmentCount;
164
165 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
166 uint32_t a = desc->pInputAttachments[j].attachment;
167 subpass->input_attachments[j].attachment = a;
168 if (a != VK_ATTACHMENT_UNUSED)
169 pass->attachments[a].gmem_offset = 0;
170 }
171 }
172
173 if (desc->colorAttachmentCount > 0) {
174 subpass->color_attachments = p;
175 p += desc->colorAttachmentCount;
176
177 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
178 uint32_t a = desc->pColorAttachments[j].attachment;
179 subpass->color_attachments[j].attachment = a;
180
181 if (a != VK_ATTACHMENT_UNUSED) {
182 pass->attachments[a].gmem_offset = 0;
183 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
184 }
185 }
186 }
187
188 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
189 if (desc->pResolveAttachments) {
190 p += desc->colorAttachmentCount;
191 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
192 subpass->resolve_attachments[j].attachment =
193 desc->pResolveAttachments[j].attachment;
194 }
195 }
196
197 uint32_t a = desc->pDepthStencilAttachment ?
198 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
199 subpass->depth_stencil_attachment.attachment = a;
200 if (a != VK_ATTACHMENT_UNUSED) {
201 pass->attachments[a].gmem_offset = 0;
202 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
203 }
204
205 subpass->samples = subpass->samples ?: 1;
206 }
207
208 *pRenderPass = tu_render_pass_to_handle(pass);
209
210 compute_gmem_offsets(pass, device->physical_device->gmem_size);
211
212 return VK_SUCCESS;
213 }
214
215 VkResult
216 tu_CreateRenderPass2KHR(VkDevice _device,
217 const VkRenderPassCreateInfo2KHR *pCreateInfo,
218 const VkAllocationCallbacks *pAllocator,
219 VkRenderPass *pRenderPass)
220 {
221 TU_FROM_HANDLE(tu_device, device, _device);
222 struct tu_render_pass *pass;
223 size_t size;
224 size_t attachments_offset;
225
226 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
227
228 size = sizeof(*pass);
229 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
230 attachments_offset = size;
231 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
232
233 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
234 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
235 if (pass == NULL)
236 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
237
238 memset(pass, 0, size);
239 pass->attachment_count = pCreateInfo->attachmentCount;
240 pass->subpass_count = pCreateInfo->subpassCount;
241 pass->attachments = (void *) pass + attachments_offset;
242
243 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
244 struct tu_render_pass_attachment *att = &pass->attachments[i];
245
246 att->format = pCreateInfo->pAttachments[i].format;
247 att->cpp = vk_format_get_blocksize(att->format) *
248 pCreateInfo->pAttachments[i].samples;
249 att->load_op = pCreateInfo->pAttachments[i].loadOp;
250 att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
251 att->store_op = pCreateInfo->pAttachments[i].storeOp;
252 att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
253 if (pCreateInfo->pAttachments[i].stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE &&
254 vk_format_has_stencil(att->format))
255 att->store_op = VK_ATTACHMENT_STORE_OP_STORE;
256 att->gmem_offset = -1;
257 }
258 uint32_t subpass_attachment_count = 0;
259 struct tu_subpass_attachment *p;
260 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
261 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
262
263 subpass_attachment_count +=
264 desc->inputAttachmentCount + desc->colorAttachmentCount +
265 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
266 }
267
268 if (subpass_attachment_count) {
269 pass->subpass_attachments = vk_alloc2(
270 &device->alloc, pAllocator,
271 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
272 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
273 if (pass->subpass_attachments == NULL) {
274 vk_free2(&device->alloc, pAllocator, pass);
275 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
276 }
277 } else
278 pass->subpass_attachments = NULL;
279
280 p = pass->subpass_attachments;
281 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
282 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
283 struct tu_subpass *subpass = &pass->subpasses[i];
284
285 subpass->input_count = desc->inputAttachmentCount;
286 subpass->color_count = desc->colorAttachmentCount;
287 subpass->samples = 0;
288
289 if (desc->inputAttachmentCount > 0) {
290 subpass->input_attachments = p;
291 p += desc->inputAttachmentCount;
292
293 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
294 uint32_t a = desc->pInputAttachments[j].attachment;
295 subpass->input_attachments[j].attachment = a;
296 if (a != VK_ATTACHMENT_UNUSED)
297 pass->attachments[a].gmem_offset = 0;
298 }
299 }
300
301 if (desc->colorAttachmentCount > 0) {
302 subpass->color_attachments = p;
303 p += desc->colorAttachmentCount;
304
305 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
306 uint32_t a = desc->pColorAttachments[j].attachment;
307 subpass->color_attachments[j].attachment = a;
308
309 if (a != VK_ATTACHMENT_UNUSED) {
310 pass->attachments[a].gmem_offset = 0;
311 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
312 }
313 }
314 }
315
316 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
317 if (desc->pResolveAttachments) {
318 p += desc->colorAttachmentCount;
319 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
320 subpass->resolve_attachments[j].attachment =
321 desc->pResolveAttachments[j].attachment;
322 }
323 }
324
325
326 uint32_t a = desc->pDepthStencilAttachment ?
327 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
328 subpass->depth_stencil_attachment.attachment = a;
329 if (a != VK_ATTACHMENT_UNUSED) {
330 pass->attachments[a].gmem_offset = 0;
331 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
332 }
333
334 subpass->samples = subpass->samples ?: 1;
335 }
336
337 *pRenderPass = tu_render_pass_to_handle(pass);
338
339 compute_gmem_offsets(pass, device->physical_device->gmem_size);
340
341 return VK_SUCCESS;
342 }
343
344 void
345 tu_DestroyRenderPass(VkDevice _device,
346 VkRenderPass _pass,
347 const VkAllocationCallbacks *pAllocator)
348 {
349 TU_FROM_HANDLE(tu_device, device, _device);
350 TU_FROM_HANDLE(tu_render_pass, pass, _pass);
351
352 if (!_pass)
353 return;
354 vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
355 vk_free2(&device->alloc, pAllocator, pass);
356 }
357
358 void
359 tu_GetRenderAreaGranularity(VkDevice _device,
360 VkRenderPass renderPass,
361 VkExtent2D *pGranularity)
362 {
363 TU_FROM_HANDLE(tu_device, device, _device);
364
365 pGranularity->width = device->physical_device->tile_align_w;
366 pGranularity->height = device->physical_device->tile_align_h;
367 }