2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
27 #include "tu_private.h"
30 #include "vk_format.h"
32 static void update_samples(struct tu_subpass
*subpass
,
33 VkSampleCountFlagBits samples
)
35 assert(subpass
->samples
== 0 || subpass
->samples
== samples
);
36 subpass
->samples
= samples
;
39 #define GMEM_ALIGN 0x4000
42 compute_gmem_offsets(struct tu_render_pass
*pass
, uint32_t gmem_size
)
44 /* calculate total bytes per pixel */
45 uint32_t cpp_total
= 0;
46 for (uint32_t i
= 0; i
< pass
->attachment_count
; i
++) {
47 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
48 if (att
->gmem_offset
>= 0)
49 cpp_total
+= att
->cpp
;
52 /* no gmem attachments */
54 /* any value non-zero value so tiling config works with no attachments */
55 pass
->gmem_pixels
= 1024*1024;
59 /* TODO: this algorithm isn't optimal
60 * for example, two attachments with cpp = {1, 4}
61 * result: nblocks = {12, 52}, pixels = 196608
62 * optimal: nblocks = {13, 51}, pixels = 208896
64 uint32_t gmem_blocks
= gmem_size
/ GMEM_ALIGN
;
65 uint32_t offset
= 0, pixels
= ~0u;
66 for (uint32_t i
= 0; i
< pass
->attachment_count
; i
++) {
67 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
68 if (att
->gmem_offset
< 0)
71 att
->gmem_offset
= offset
;
73 /* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */
74 uint32_t align
= MAX2(1, att
->cpp
/ 16);
75 uint32_t nblocks
= MAX2((gmem_blocks
* att
->cpp
/ cpp_total
) & ~(align
- 1), align
);
77 gmem_blocks
-= nblocks
;
78 cpp_total
-= att
->cpp
;
79 offset
+= nblocks
* GMEM_ALIGN
;
80 pixels
= MIN2(pixels
, nblocks
* GMEM_ALIGN
/ att
->cpp
);
83 pass
->gmem_pixels
= pixels
;
87 tu_CreateRenderPass(VkDevice _device
,
88 const VkRenderPassCreateInfo
*pCreateInfo
,
89 const VkAllocationCallbacks
*pAllocator
,
90 VkRenderPass
*pRenderPass
)
92 TU_FROM_HANDLE(tu_device
, device
, _device
);
93 struct tu_render_pass
*pass
;
95 size_t attachments_offset
;
97 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO
);
100 size
+= pCreateInfo
->subpassCount
* sizeof(pass
->subpasses
[0]);
101 attachments_offset
= size
;
102 size
+= pCreateInfo
->attachmentCount
* sizeof(pass
->attachments
[0]);
104 pass
= vk_alloc2(&device
->alloc
, pAllocator
, size
, 8,
105 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
107 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
109 memset(pass
, 0, size
);
110 pass
->attachment_count
= pCreateInfo
->attachmentCount
;
111 pass
->subpass_count
= pCreateInfo
->subpassCount
;
112 pass
->attachments
= (void *) pass
+ attachments_offset
;
114 for (uint32_t i
= 0; i
< pCreateInfo
->attachmentCount
; i
++) {
115 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
117 att
->format
= pCreateInfo
->pAttachments
[i
].format
;
118 att
->cpp
= vk_format_get_blocksize(att
->format
) *
119 pCreateInfo
->pAttachments
[i
].samples
;
120 att
->load_op
= pCreateInfo
->pAttachments
[i
].loadOp
;
121 att
->stencil_load_op
= pCreateInfo
->pAttachments
[i
].stencilLoadOp
;
122 att
->store_op
= pCreateInfo
->pAttachments
[i
].storeOp
;
123 if (pCreateInfo
->pAttachments
[i
].stencilStoreOp
== VK_ATTACHMENT_STORE_OP_STORE
&&
124 vk_format_has_stencil(att
->format
))
125 att
->store_op
= VK_ATTACHMENT_STORE_OP_STORE
;
126 att
->gmem_offset
= -1;
129 uint32_t subpass_attachment_count
= 0;
130 struct tu_subpass_attachment
*p
;
131 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
132 const VkSubpassDescription
*desc
= &pCreateInfo
->pSubpasses
[i
];
134 subpass_attachment_count
+=
135 desc
->inputAttachmentCount
+ desc
->colorAttachmentCount
+
136 (desc
->pResolveAttachments
? desc
->colorAttachmentCount
: 0);
139 if (subpass_attachment_count
) {
140 pass
->subpass_attachments
= vk_alloc2(
141 &device
->alloc
, pAllocator
,
142 subpass_attachment_count
* sizeof(struct tu_subpass_attachment
), 8,
143 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
144 if (pass
->subpass_attachments
== NULL
) {
145 vk_free2(&device
->alloc
, pAllocator
, pass
);
146 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
149 pass
->subpass_attachments
= NULL
;
151 p
= pass
->subpass_attachments
;
152 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
153 const VkSubpassDescription
*desc
= &pCreateInfo
->pSubpasses
[i
];
154 struct tu_subpass
*subpass
= &pass
->subpasses
[i
];
156 subpass
->input_count
= desc
->inputAttachmentCount
;
157 subpass
->color_count
= desc
->colorAttachmentCount
;
158 subpass
->samples
= 0;
160 if (desc
->inputAttachmentCount
> 0) {
161 subpass
->input_attachments
= p
;
162 p
+= desc
->inputAttachmentCount
;
164 for (uint32_t j
= 0; j
< desc
->inputAttachmentCount
; j
++) {
165 uint32_t a
= desc
->pInputAttachments
[j
].attachment
;
166 subpass
->input_attachments
[j
].attachment
= a
;
167 if (a
!= VK_ATTACHMENT_UNUSED
)
168 pass
->attachments
[a
].gmem_offset
= 0;
172 if (desc
->colorAttachmentCount
> 0) {
173 subpass
->color_attachments
= p
;
174 p
+= desc
->colorAttachmentCount
;
176 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
177 uint32_t a
= desc
->pColorAttachments
[j
].attachment
;
178 subpass
->color_attachments
[j
].attachment
= a
;
180 if (a
!= VK_ATTACHMENT_UNUSED
) {
181 pass
->attachments
[a
].gmem_offset
= 0;
182 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
187 subpass
->resolve_attachments
= desc
->pResolveAttachments
? p
: NULL
;
188 if (desc
->pResolveAttachments
) {
189 p
+= desc
->colorAttachmentCount
;
190 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
191 subpass
->resolve_attachments
[j
].attachment
=
192 desc
->pResolveAttachments
[j
].attachment
;
196 uint32_t a
= desc
->pDepthStencilAttachment
?
197 desc
->pDepthStencilAttachment
->attachment
: VK_ATTACHMENT_UNUSED
;
198 subpass
->depth_stencil_attachment
.attachment
= a
;
199 if (a
!= VK_ATTACHMENT_UNUSED
) {
200 pass
->attachments
[a
].gmem_offset
= 0;
201 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
204 subpass
->samples
= subpass
->samples
?: 1;
207 *pRenderPass
= tu_render_pass_to_handle(pass
);
209 compute_gmem_offsets(pass
, device
->physical_device
->gmem_size
);
215 tu_CreateRenderPass2(VkDevice _device
,
216 const VkRenderPassCreateInfo2KHR
*pCreateInfo
,
217 const VkAllocationCallbacks
*pAllocator
,
218 VkRenderPass
*pRenderPass
)
220 TU_FROM_HANDLE(tu_device
, device
, _device
);
221 struct tu_render_pass
*pass
;
223 size_t attachments_offset
;
225 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR
);
227 size
= sizeof(*pass
);
228 size
+= pCreateInfo
->subpassCount
* sizeof(pass
->subpasses
[0]);
229 attachments_offset
= size
;
230 size
+= pCreateInfo
->attachmentCount
* sizeof(pass
->attachments
[0]);
232 pass
= vk_alloc2(&device
->alloc
, pAllocator
, size
, 8,
233 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
235 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
237 memset(pass
, 0, size
);
238 pass
->attachment_count
= pCreateInfo
->attachmentCount
;
239 pass
->subpass_count
= pCreateInfo
->subpassCount
;
240 pass
->attachments
= (void *) pass
+ attachments_offset
;
242 for (uint32_t i
= 0; i
< pCreateInfo
->attachmentCount
; i
++) {
243 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
245 att
->format
= pCreateInfo
->pAttachments
[i
].format
;
246 att
->cpp
= vk_format_get_blocksize(att
->format
) *
247 pCreateInfo
->pAttachments
[i
].samples
;
248 att
->load_op
= pCreateInfo
->pAttachments
[i
].loadOp
;
249 att
->stencil_load_op
= pCreateInfo
->pAttachments
[i
].stencilLoadOp
;
250 att
->store_op
= pCreateInfo
->pAttachments
[i
].storeOp
;
251 att
->stencil_store_op
= pCreateInfo
->pAttachments
[i
].stencilStoreOp
;
252 if (pCreateInfo
->pAttachments
[i
].stencilStoreOp
== VK_ATTACHMENT_STORE_OP_STORE
&&
253 vk_format_has_stencil(att
->format
))
254 att
->store_op
= VK_ATTACHMENT_STORE_OP_STORE
;
255 att
->gmem_offset
= -1;
257 uint32_t subpass_attachment_count
= 0;
258 struct tu_subpass_attachment
*p
;
259 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
260 const VkSubpassDescription2KHR
*desc
= &pCreateInfo
->pSubpasses
[i
];
262 subpass_attachment_count
+=
263 desc
->inputAttachmentCount
+ desc
->colorAttachmentCount
+
264 (desc
->pResolveAttachments
? desc
->colorAttachmentCount
: 0);
267 if (subpass_attachment_count
) {
268 pass
->subpass_attachments
= vk_alloc2(
269 &device
->alloc
, pAllocator
,
270 subpass_attachment_count
* sizeof(struct tu_subpass_attachment
), 8,
271 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
272 if (pass
->subpass_attachments
== NULL
) {
273 vk_free2(&device
->alloc
, pAllocator
, pass
);
274 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
277 pass
->subpass_attachments
= NULL
;
279 p
= pass
->subpass_attachments
;
280 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
281 const VkSubpassDescription2KHR
*desc
= &pCreateInfo
->pSubpasses
[i
];
282 struct tu_subpass
*subpass
= &pass
->subpasses
[i
];
284 subpass
->input_count
= desc
->inputAttachmentCount
;
285 subpass
->color_count
= desc
->colorAttachmentCount
;
286 subpass
->samples
= 0;
288 if (desc
->inputAttachmentCount
> 0) {
289 subpass
->input_attachments
= p
;
290 p
+= desc
->inputAttachmentCount
;
292 for (uint32_t j
= 0; j
< desc
->inputAttachmentCount
; j
++) {
293 uint32_t a
= desc
->pInputAttachments
[j
].attachment
;
294 subpass
->input_attachments
[j
].attachment
= a
;
295 if (a
!= VK_ATTACHMENT_UNUSED
)
296 pass
->attachments
[a
].gmem_offset
= 0;
300 if (desc
->colorAttachmentCount
> 0) {
301 subpass
->color_attachments
= p
;
302 p
+= desc
->colorAttachmentCount
;
304 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
305 uint32_t a
= desc
->pColorAttachments
[j
].attachment
;
306 subpass
->color_attachments
[j
].attachment
= a
;
308 if (a
!= VK_ATTACHMENT_UNUSED
) {
309 pass
->attachments
[a
].gmem_offset
= 0;
310 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
315 subpass
->resolve_attachments
= desc
->pResolveAttachments
? p
: NULL
;
316 if (desc
->pResolveAttachments
) {
317 p
+= desc
->colorAttachmentCount
;
318 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
319 subpass
->resolve_attachments
[j
].attachment
=
320 desc
->pResolveAttachments
[j
].attachment
;
325 uint32_t a
= desc
->pDepthStencilAttachment
?
326 desc
->pDepthStencilAttachment
->attachment
: VK_ATTACHMENT_UNUSED
;
327 subpass
->depth_stencil_attachment
.attachment
= a
;
328 if (a
!= VK_ATTACHMENT_UNUSED
) {
329 pass
->attachments
[a
].gmem_offset
= 0;
330 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
333 subpass
->samples
= subpass
->samples
?: 1;
336 *pRenderPass
= tu_render_pass_to_handle(pass
);
338 compute_gmem_offsets(pass
, device
->physical_device
->gmem_size
);
344 tu_DestroyRenderPass(VkDevice _device
,
346 const VkAllocationCallbacks
*pAllocator
)
348 TU_FROM_HANDLE(tu_device
, device
, _device
);
349 TU_FROM_HANDLE(tu_render_pass
, pass
, _pass
);
354 vk_free2(&device
->alloc
, pAllocator
, pass
->subpass_attachments
);
355 vk_free2(&device
->alloc
, pAllocator
, pass
);
359 tu_GetRenderAreaGranularity(VkDevice _device
,
360 VkRenderPass renderPass
,
361 VkExtent2D
*pGranularity
)
363 TU_FROM_HANDLE(tu_device
, device
, _device
);
365 pGranularity
->width
= device
->physical_device
->tile_align_w
;
366 pGranularity
->height
= device
->physical_device
->tile_align_h
;