turnip: Enable geometryShader device feature
[mesa.git] / src / freedreno / vulkan / tu_pass.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27 #include "tu_private.h"
28
29 #include "vk_util.h"
30 #include "vk_format.h"
31
32 static void update_samples(struct tu_subpass *subpass,
33 VkSampleCountFlagBits samples)
34 {
35 assert(subpass->samples == 0 || subpass->samples == samples);
36 subpass->samples = samples;
37 }
38
39 #define GMEM_ALIGN 0x4000
40
41 static void
42 compute_gmem_offsets(struct tu_render_pass *pass, uint32_t gmem_size)
43 {
44 /* calculate total bytes per pixel */
45 uint32_t cpp_total = 0;
46 for (uint32_t i = 0; i < pass->attachment_count; i++) {
47 struct tu_render_pass_attachment *att = &pass->attachments[i];
48 if (att->gmem_offset >= 0)
49 cpp_total += att->cpp;
50 }
51
52 /* no gmem attachments */
53 if (cpp_total == 0) {
54 /* any value non-zero value so tiling config works with no attachments */
55 pass->gmem_pixels = 1024*1024;
56 return;
57 }
58
59 /* TODO: this algorithm isn't optimal
60 * for example, two attachments with cpp = {1, 4}
61 * result: nblocks = {12, 52}, pixels = 196608
62 * optimal: nblocks = {13, 51}, pixels = 208896
63 */
64 uint32_t gmem_blocks = gmem_size / GMEM_ALIGN;
65 uint32_t offset = 0, pixels = ~0u;
66 for (uint32_t i = 0; i < pass->attachment_count; i++) {
67 struct tu_render_pass_attachment *att = &pass->attachments[i];
68 if (att->gmem_offset < 0)
69 continue;
70
71 att->gmem_offset = offset;
72
73 /* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */
74 uint32_t align = MAX2(1, att->cpp / 16);
75 uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
76
77 gmem_blocks -= nblocks;
78 cpp_total -= att->cpp;
79 offset += nblocks * GMEM_ALIGN;
80 pixels = MIN2(pixels, nblocks * GMEM_ALIGN / att->cpp);
81 }
82
83 pass->gmem_pixels = pixels;
84 }
85
86 VkResult
87 tu_CreateRenderPass(VkDevice _device,
88 const VkRenderPassCreateInfo *pCreateInfo,
89 const VkAllocationCallbacks *pAllocator,
90 VkRenderPass *pRenderPass)
91 {
92 TU_FROM_HANDLE(tu_device, device, _device);
93 struct tu_render_pass *pass;
94 size_t size;
95 size_t attachments_offset;
96
97 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
98
99 size = sizeof(*pass);
100 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
101 attachments_offset = size;
102 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
103
104 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
105 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
106 if (pass == NULL)
107 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
108
109 memset(pass, 0, size);
110 pass->attachment_count = pCreateInfo->attachmentCount;
111 pass->subpass_count = pCreateInfo->subpassCount;
112 pass->attachments = (void *) pass + attachments_offset;
113
114 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
115 struct tu_render_pass_attachment *att = &pass->attachments[i];
116
117 att->format = pCreateInfo->pAttachments[i].format;
118 att->cpp = vk_format_get_blocksize(att->format) *
119 pCreateInfo->pAttachments[i].samples;
120 att->load_op = pCreateInfo->pAttachments[i].loadOp;
121 att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
122 att->store_op = pCreateInfo->pAttachments[i].storeOp;
123 if (pCreateInfo->pAttachments[i].stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE &&
124 vk_format_has_stencil(att->format))
125 att->store_op = VK_ATTACHMENT_STORE_OP_STORE;
126 att->gmem_offset = -1;
127 }
128
129 uint32_t subpass_attachment_count = 0;
130 struct tu_subpass_attachment *p;
131 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
132 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
133
134 subpass_attachment_count +=
135 desc->inputAttachmentCount + desc->colorAttachmentCount +
136 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
137 }
138
139 if (subpass_attachment_count) {
140 pass->subpass_attachments = vk_alloc2(
141 &device->alloc, pAllocator,
142 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
143 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
144 if (pass->subpass_attachments == NULL) {
145 vk_free2(&device->alloc, pAllocator, pass);
146 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
147 }
148 } else
149 pass->subpass_attachments = NULL;
150
151 p = pass->subpass_attachments;
152 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
153 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
154 struct tu_subpass *subpass = &pass->subpasses[i];
155
156 subpass->input_count = desc->inputAttachmentCount;
157 subpass->color_count = desc->colorAttachmentCount;
158 subpass->samples = 0;
159
160 if (desc->inputAttachmentCount > 0) {
161 subpass->input_attachments = p;
162 p += desc->inputAttachmentCount;
163
164 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
165 uint32_t a = desc->pInputAttachments[j].attachment;
166 subpass->input_attachments[j].attachment = a;
167 if (a != VK_ATTACHMENT_UNUSED)
168 pass->attachments[a].gmem_offset = 0;
169 }
170 }
171
172 if (desc->colorAttachmentCount > 0) {
173 subpass->color_attachments = p;
174 p += desc->colorAttachmentCount;
175
176 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
177 uint32_t a = desc->pColorAttachments[j].attachment;
178 subpass->color_attachments[j].attachment = a;
179
180 if (a != VK_ATTACHMENT_UNUSED) {
181 pass->attachments[a].gmem_offset = 0;
182 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
183 }
184 }
185 }
186
187 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
188 if (desc->pResolveAttachments) {
189 p += desc->colorAttachmentCount;
190 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
191 subpass->resolve_attachments[j].attachment =
192 desc->pResolveAttachments[j].attachment;
193 }
194 }
195
196 uint32_t a = desc->pDepthStencilAttachment ?
197 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
198 subpass->depth_stencil_attachment.attachment = a;
199 if (a != VK_ATTACHMENT_UNUSED) {
200 pass->attachments[a].gmem_offset = 0;
201 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
202 }
203
204 subpass->samples = subpass->samples ?: 1;
205 }
206
207 *pRenderPass = tu_render_pass_to_handle(pass);
208
209 compute_gmem_offsets(pass, device->physical_device->gmem_size);
210
211 return VK_SUCCESS;
212 }
213
214 VkResult
215 tu_CreateRenderPass2(VkDevice _device,
216 const VkRenderPassCreateInfo2KHR *pCreateInfo,
217 const VkAllocationCallbacks *pAllocator,
218 VkRenderPass *pRenderPass)
219 {
220 TU_FROM_HANDLE(tu_device, device, _device);
221 struct tu_render_pass *pass;
222 size_t size;
223 size_t attachments_offset;
224
225 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
226
227 size = sizeof(*pass);
228 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
229 attachments_offset = size;
230 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
231
232 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
233 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
234 if (pass == NULL)
235 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
236
237 memset(pass, 0, size);
238 pass->attachment_count = pCreateInfo->attachmentCount;
239 pass->subpass_count = pCreateInfo->subpassCount;
240 pass->attachments = (void *) pass + attachments_offset;
241
242 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
243 struct tu_render_pass_attachment *att = &pass->attachments[i];
244
245 att->format = pCreateInfo->pAttachments[i].format;
246 att->cpp = vk_format_get_blocksize(att->format) *
247 pCreateInfo->pAttachments[i].samples;
248 att->load_op = pCreateInfo->pAttachments[i].loadOp;
249 att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
250 att->store_op = pCreateInfo->pAttachments[i].storeOp;
251 att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
252 if (pCreateInfo->pAttachments[i].stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE &&
253 vk_format_has_stencil(att->format))
254 att->store_op = VK_ATTACHMENT_STORE_OP_STORE;
255 att->gmem_offset = -1;
256 }
257 uint32_t subpass_attachment_count = 0;
258 struct tu_subpass_attachment *p;
259 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
260 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
261
262 subpass_attachment_count +=
263 desc->inputAttachmentCount + desc->colorAttachmentCount +
264 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
265 }
266
267 if (subpass_attachment_count) {
268 pass->subpass_attachments = vk_alloc2(
269 &device->alloc, pAllocator,
270 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
271 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
272 if (pass->subpass_attachments == NULL) {
273 vk_free2(&device->alloc, pAllocator, pass);
274 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
275 }
276 } else
277 pass->subpass_attachments = NULL;
278
279 p = pass->subpass_attachments;
280 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
281 const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
282 struct tu_subpass *subpass = &pass->subpasses[i];
283
284 subpass->input_count = desc->inputAttachmentCount;
285 subpass->color_count = desc->colorAttachmentCount;
286 subpass->samples = 0;
287
288 if (desc->inputAttachmentCount > 0) {
289 subpass->input_attachments = p;
290 p += desc->inputAttachmentCount;
291
292 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
293 uint32_t a = desc->pInputAttachments[j].attachment;
294 subpass->input_attachments[j].attachment = a;
295 if (a != VK_ATTACHMENT_UNUSED)
296 pass->attachments[a].gmem_offset = 0;
297 }
298 }
299
300 if (desc->colorAttachmentCount > 0) {
301 subpass->color_attachments = p;
302 p += desc->colorAttachmentCount;
303
304 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
305 uint32_t a = desc->pColorAttachments[j].attachment;
306 subpass->color_attachments[j].attachment = a;
307
308 if (a != VK_ATTACHMENT_UNUSED) {
309 pass->attachments[a].gmem_offset = 0;
310 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
311 }
312 }
313 }
314
315 subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
316 if (desc->pResolveAttachments) {
317 p += desc->colorAttachmentCount;
318 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
319 subpass->resolve_attachments[j].attachment =
320 desc->pResolveAttachments[j].attachment;
321 }
322 }
323
324
325 uint32_t a = desc->pDepthStencilAttachment ?
326 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
327 subpass->depth_stencil_attachment.attachment = a;
328 if (a != VK_ATTACHMENT_UNUSED) {
329 pass->attachments[a].gmem_offset = 0;
330 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
331 }
332
333 subpass->samples = subpass->samples ?: 1;
334 }
335
336 *pRenderPass = tu_render_pass_to_handle(pass);
337
338 compute_gmem_offsets(pass, device->physical_device->gmem_size);
339
340 return VK_SUCCESS;
341 }
342
343 void
344 tu_DestroyRenderPass(VkDevice _device,
345 VkRenderPass _pass,
346 const VkAllocationCallbacks *pAllocator)
347 {
348 TU_FROM_HANDLE(tu_device, device, _device);
349 TU_FROM_HANDLE(tu_render_pass, pass, _pass);
350
351 if (!_pass)
352 return;
353
354 vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
355 vk_free2(&device->alloc, pAllocator, pass);
356 }
357
358 void
359 tu_GetRenderAreaGranularity(VkDevice _device,
360 VkRenderPass renderPass,
361 VkExtent2D *pGranularity)
362 {
363 TU_FROM_HANDLE(tu_device, device, _device);
364
365 pGranularity->width = device->physical_device->tile_align_w;
366 pGranularity->height = device->physical_device->tile_align_h;
367 }