2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
27 #include "tu_private.h"
30 #include "vk_format.h"
32 static void update_samples(struct tu_subpass
*subpass
,
33 VkSampleCountFlagBits samples
)
35 assert(subpass
->samples
== 0 || subpass
->samples
== samples
);
36 subpass
->samples
= samples
;
39 #define GMEM_ALIGN 0x4000
42 compute_gmem_offsets(struct tu_render_pass
*pass
, uint32_t gmem_size
)
44 /* calculate total bytes per pixel */
45 uint32_t cpp_total
= 0;
46 for (uint32_t i
= 0; i
< pass
->attachment_count
; i
++) {
47 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
48 if (att
->gmem_offset
>= 0)
49 cpp_total
+= att
->cpp
;
52 /* no gmem attachments */
54 /* any value non-zero value so tiling config works with no attachments */
55 pass
->gmem_pixels
= 1024*1024;
59 /* TODO: this algorithm isn't optimal
60 * for example, two attachments with cpp = {1, 4}
61 * result: nblocks = {12, 52}, pixels = 196608
62 * optimal: nblocks = {13, 51}, pixels = 208896
64 uint32_t gmem_blocks
= gmem_size
/ GMEM_ALIGN
;
65 uint32_t offset
= 0, pixels
= ~0u;
66 for (uint32_t i
= 0; i
< pass
->attachment_count
; i
++) {
67 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
68 if (att
->gmem_offset
< 0)
71 att
->gmem_offset
= offset
;
73 /* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */
74 uint32_t align
= MAX2(1, att
->cpp
/ 16);
75 uint32_t nblocks
= MAX2((gmem_blocks
* att
->cpp
/ cpp_total
) & ~(align
- 1), align
);
77 gmem_blocks
-= nblocks
;
78 cpp_total
-= att
->cpp
;
79 offset
+= nblocks
* GMEM_ALIGN
;
80 pixels
= MIN2(pixels
, nblocks
* GMEM_ALIGN
/ att
->cpp
);
83 pass
->gmem_pixels
= pixels
;
88 tu_CreateRenderPass(VkDevice _device
,
89 const VkRenderPassCreateInfo
*pCreateInfo
,
90 const VkAllocationCallbacks
*pAllocator
,
91 VkRenderPass
*pRenderPass
)
93 TU_FROM_HANDLE(tu_device
, device
, _device
);
94 struct tu_render_pass
*pass
;
96 size_t attachments_offset
;
98 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO
);
100 size
= sizeof(*pass
);
101 size
+= pCreateInfo
->subpassCount
* sizeof(pass
->subpasses
[0]);
102 attachments_offset
= size
;
103 size
+= pCreateInfo
->attachmentCount
* sizeof(pass
->attachments
[0]);
105 pass
= vk_alloc2(&device
->alloc
, pAllocator
, size
, 8,
106 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
108 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
110 memset(pass
, 0, size
);
111 pass
->attachment_count
= pCreateInfo
->attachmentCount
;
112 pass
->subpass_count
= pCreateInfo
->subpassCount
;
113 pass
->attachments
= (void *) pass
+ attachments_offset
;
115 for (uint32_t i
= 0; i
< pCreateInfo
->attachmentCount
; i
++) {
116 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
118 att
->format
= pCreateInfo
->pAttachments
[i
].format
;
119 att
->cpp
= vk_format_get_blocksize(att
->format
) *
120 pCreateInfo
->pAttachments
[i
].samples
;
121 att
->load_op
= pCreateInfo
->pAttachments
[i
].loadOp
;
122 att
->stencil_load_op
= pCreateInfo
->pAttachments
[i
].stencilLoadOp
;
123 att
->store_op
= pCreateInfo
->pAttachments
[i
].storeOp
;
124 if (pCreateInfo
->pAttachments
[i
].stencilStoreOp
== VK_ATTACHMENT_STORE_OP_STORE
&&
125 vk_format_has_stencil(att
->format
))
126 att
->store_op
= VK_ATTACHMENT_STORE_OP_STORE
;
127 att
->gmem_offset
= -1;
130 uint32_t subpass_attachment_count
= 0;
131 struct tu_subpass_attachment
*p
;
132 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
133 const VkSubpassDescription
*desc
= &pCreateInfo
->pSubpasses
[i
];
135 subpass_attachment_count
+=
136 desc
->inputAttachmentCount
+ desc
->colorAttachmentCount
+
137 (desc
->pResolveAttachments
? desc
->colorAttachmentCount
: 0);
140 if (subpass_attachment_count
) {
141 pass
->subpass_attachments
= vk_alloc2(
142 &device
->alloc
, pAllocator
,
143 subpass_attachment_count
* sizeof(struct tu_subpass_attachment
), 8,
144 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
145 if (pass
->subpass_attachments
== NULL
) {
146 vk_free2(&device
->alloc
, pAllocator
, pass
);
147 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
150 pass
->subpass_attachments
= NULL
;
152 p
= pass
->subpass_attachments
;
153 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
154 const VkSubpassDescription
*desc
= &pCreateInfo
->pSubpasses
[i
];
155 struct tu_subpass
*subpass
= &pass
->subpasses
[i
];
157 subpass
->input_count
= desc
->inputAttachmentCount
;
158 subpass
->color_count
= desc
->colorAttachmentCount
;
159 subpass
->samples
= 0;
161 if (desc
->inputAttachmentCount
> 0) {
162 subpass
->input_attachments
= p
;
163 p
+= desc
->inputAttachmentCount
;
165 for (uint32_t j
= 0; j
< desc
->inputAttachmentCount
; j
++) {
166 uint32_t a
= desc
->pInputAttachments
[j
].attachment
;
167 subpass
->input_attachments
[j
].attachment
= a
;
168 if (a
!= VK_ATTACHMENT_UNUSED
)
169 pass
->attachments
[a
].gmem_offset
= 0;
173 if (desc
->colorAttachmentCount
> 0) {
174 subpass
->color_attachments
= p
;
175 p
+= desc
->colorAttachmentCount
;
177 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
178 uint32_t a
= desc
->pColorAttachments
[j
].attachment
;
179 subpass
->color_attachments
[j
].attachment
= a
;
181 if (a
!= VK_ATTACHMENT_UNUSED
) {
182 pass
->attachments
[a
].gmem_offset
= 0;
183 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
188 subpass
->resolve_attachments
= desc
->pResolveAttachments
? p
: NULL
;
189 if (desc
->pResolveAttachments
) {
190 p
+= desc
->colorAttachmentCount
;
191 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
192 subpass
->resolve_attachments
[j
].attachment
=
193 desc
->pResolveAttachments
[j
].attachment
;
197 uint32_t a
= desc
->pDepthStencilAttachment
?
198 desc
->pDepthStencilAttachment
->attachment
: VK_ATTACHMENT_UNUSED
;
199 subpass
->depth_stencil_attachment
.attachment
= a
;
200 if (a
!= VK_ATTACHMENT_UNUSED
) {
201 pass
->attachments
[a
].gmem_offset
= 0;
202 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
205 subpass
->samples
= subpass
->samples
?: 1;
208 *pRenderPass
= tu_render_pass_to_handle(pass
);
210 compute_gmem_offsets(pass
, device
->physical_device
->gmem_size
);
216 tu_CreateRenderPass2(VkDevice _device
,
217 const VkRenderPassCreateInfo2KHR
*pCreateInfo
,
218 const VkAllocationCallbacks
*pAllocator
,
219 VkRenderPass
*pRenderPass
)
221 TU_FROM_HANDLE(tu_device
, device
, _device
);
222 struct tu_render_pass
*pass
;
224 size_t attachments_offset
;
226 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR
);
228 size
= sizeof(*pass
);
229 size
+= pCreateInfo
->subpassCount
* sizeof(pass
->subpasses
[0]);
230 attachments_offset
= size
;
231 size
+= pCreateInfo
->attachmentCount
* sizeof(pass
->attachments
[0]);
233 pass
= vk_alloc2(&device
->alloc
, pAllocator
, size
, 8,
234 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
236 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
238 memset(pass
, 0, size
);
239 pass
->attachment_count
= pCreateInfo
->attachmentCount
;
240 pass
->subpass_count
= pCreateInfo
->subpassCount
;
241 pass
->attachments
= (void *) pass
+ attachments_offset
;
243 for (uint32_t i
= 0; i
< pCreateInfo
->attachmentCount
; i
++) {
244 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
246 att
->format
= pCreateInfo
->pAttachments
[i
].format
;
247 att
->cpp
= vk_format_get_blocksize(att
->format
) *
248 pCreateInfo
->pAttachments
[i
].samples
;
249 att
->load_op
= pCreateInfo
->pAttachments
[i
].loadOp
;
250 att
->stencil_load_op
= pCreateInfo
->pAttachments
[i
].stencilLoadOp
;
251 att
->store_op
= pCreateInfo
->pAttachments
[i
].storeOp
;
252 att
->stencil_store_op
= pCreateInfo
->pAttachments
[i
].stencilStoreOp
;
253 if (pCreateInfo
->pAttachments
[i
].stencilStoreOp
== VK_ATTACHMENT_STORE_OP_STORE
&&
254 vk_format_has_stencil(att
->format
))
255 att
->store_op
= VK_ATTACHMENT_STORE_OP_STORE
;
256 att
->gmem_offset
= -1;
258 uint32_t subpass_attachment_count
= 0;
259 struct tu_subpass_attachment
*p
;
260 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
261 const VkSubpassDescription2KHR
*desc
= &pCreateInfo
->pSubpasses
[i
];
263 subpass_attachment_count
+=
264 desc
->inputAttachmentCount
+ desc
->colorAttachmentCount
+
265 (desc
->pResolveAttachments
? desc
->colorAttachmentCount
: 0);
268 if (subpass_attachment_count
) {
269 pass
->subpass_attachments
= vk_alloc2(
270 &device
->alloc
, pAllocator
,
271 subpass_attachment_count
* sizeof(struct tu_subpass_attachment
), 8,
272 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
273 if (pass
->subpass_attachments
== NULL
) {
274 vk_free2(&device
->alloc
, pAllocator
, pass
);
275 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
278 pass
->subpass_attachments
= NULL
;
280 p
= pass
->subpass_attachments
;
281 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
282 const VkSubpassDescription2KHR
*desc
= &pCreateInfo
->pSubpasses
[i
];
283 struct tu_subpass
*subpass
= &pass
->subpasses
[i
];
285 subpass
->input_count
= desc
->inputAttachmentCount
;
286 subpass
->color_count
= desc
->colorAttachmentCount
;
287 subpass
->samples
= 0;
289 if (desc
->inputAttachmentCount
> 0) {
290 subpass
->input_attachments
= p
;
291 p
+= desc
->inputAttachmentCount
;
293 for (uint32_t j
= 0; j
< desc
->inputAttachmentCount
; j
++) {
294 uint32_t a
= desc
->pInputAttachments
[j
].attachment
;
295 subpass
->input_attachments
[j
].attachment
= a
;
296 if (a
!= VK_ATTACHMENT_UNUSED
)
297 pass
->attachments
[a
].gmem_offset
= 0;
301 if (desc
->colorAttachmentCount
> 0) {
302 subpass
->color_attachments
= p
;
303 p
+= desc
->colorAttachmentCount
;
305 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
306 uint32_t a
= desc
->pColorAttachments
[j
].attachment
;
307 subpass
->color_attachments
[j
].attachment
= a
;
309 if (a
!= VK_ATTACHMENT_UNUSED
) {
310 pass
->attachments
[a
].gmem_offset
= 0;
311 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
316 subpass
->resolve_attachments
= desc
->pResolveAttachments
? p
: NULL
;
317 if (desc
->pResolveAttachments
) {
318 p
+= desc
->colorAttachmentCount
;
319 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
320 subpass
->resolve_attachments
[j
].attachment
=
321 desc
->pResolveAttachments
[j
].attachment
;
326 uint32_t a
= desc
->pDepthStencilAttachment
?
327 desc
->pDepthStencilAttachment
->attachment
: VK_ATTACHMENT_UNUSED
;
328 subpass
->depth_stencil_attachment
.attachment
= a
;
329 if (a
!= VK_ATTACHMENT_UNUSED
) {
330 pass
->attachments
[a
].gmem_offset
= 0;
331 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
334 subpass
->samples
= subpass
->samples
?: 1;
337 *pRenderPass
= tu_render_pass_to_handle(pass
);
339 compute_gmem_offsets(pass
, device
->physical_device
->gmem_size
);
345 tu_DestroyRenderPass(VkDevice _device
,
347 const VkAllocationCallbacks
*pAllocator
)
349 TU_FROM_HANDLE(tu_device
, device
, _device
);
350 TU_FROM_HANDLE(tu_render_pass
, pass
, _pass
);
355 vk_free2(&device
->alloc
, pAllocator
, pass
->subpass_attachments
);
356 vk_free2(&device
->alloc
, pAllocator
, pass
);
360 tu_GetRenderAreaGranularity(VkDevice _device
,
361 VkRenderPass renderPass
,
362 VkExtent2D
*pGranularity
)
364 TU_FROM_HANDLE(tu_device
, device
, _device
);
366 pGranularity
->width
= device
->physical_device
->tile_align_w
;
367 pGranularity
->height
= device
->physical_device
->tile_align_h
;