2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
27 #include "tu_private.h"
30 #include "vk_format.h"
32 static void update_samples(struct tu_subpass
*subpass
,
33 VkSampleCountFlagBits samples
)
35 assert(subpass
->samples
== 0 || subpass
->samples
== samples
);
36 subpass
->samples
= samples
;
40 create_render_pass_common(struct tu_render_pass
*pass
,
41 const struct tu_physical_device
*phys_dev
)
43 uint32_t block_align_shift
= 4; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
44 uint32_t tile_align_w
= phys_dev
->tile_align_w
;
45 uint32_t gmem_align
= (1 << block_align_shift
) * tile_align_w
* TILE_ALIGN_H
;
47 /* calculate total bytes per pixel */
48 uint32_t cpp_total
= 0;
49 for (uint32_t i
= 0; i
< pass
->attachment_count
; i
++) {
50 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
51 if (att
->gmem_offset
>= 0) {
52 cpp_total
+= att
->cpp
;
53 /* texture pitch must be aligned to 64, use a tile_align_w that is
54 * a multiple of 64 for cpp==1 attachment to work as input attachment
56 if (att
->cpp
== 1 && tile_align_w
% 64 != 0) {
58 block_align_shift
-= 1;
63 pass
->tile_align_w
= tile_align_w
;
65 /* no gmem attachments */
67 /* any value non-zero value so tiling config works with no attachments */
68 pass
->gmem_pixels
= 1024*1024;
72 /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
73 * doesn't break things. maybe there is a better solution?
74 * TODO: this algorithm isn't optimal
75 * for example, two attachments with cpp = {1, 4}
76 * result: nblocks = {12, 52}, pixels = 196608
77 * optimal: nblocks = {13, 51}, pixels = 208896
79 uint32_t gmem_blocks
= phys_dev
->ccu_offset_gmem
/ gmem_align
;
80 uint32_t offset
= 0, pixels
= ~0u;
81 for (uint32_t i
= 0; i
< pass
->attachment_count
; i
++) {
82 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
83 if (att
->gmem_offset
< 0)
86 att
->gmem_offset
= offset
;
88 uint32_t align
= MAX2(1, att
->cpp
>> block_align_shift
);
89 uint32_t nblocks
= MAX2((gmem_blocks
* att
->cpp
/ cpp_total
) & ~(align
- 1), align
);
91 gmem_blocks
-= nblocks
;
92 cpp_total
-= att
->cpp
;
93 offset
+= nblocks
* gmem_align
;
94 pixels
= MIN2(pixels
, nblocks
* gmem_align
/ att
->cpp
);
97 pass
->gmem_pixels
= pixels
;
99 for (uint32_t i
= 0; i
< pass
->subpass_count
; i
++) {
100 struct tu_subpass
*subpass
= &pass
->subpasses
[i
];
102 subpass
->srgb_cntl
= 0;
104 for (uint32_t i
= 0; i
< subpass
->color_count
; ++i
) {
105 uint32_t a
= subpass
->color_attachments
[i
].attachment
;
106 if (a
== VK_ATTACHMENT_UNUSED
)
109 if (vk_format_is_srgb(pass
->attachments
[a
].format
))
110 subpass
->srgb_cntl
|= 1 << i
;
114 /* disable unused attachments */
115 for (uint32_t i
= 0; i
< pass
->attachment_count
; i
++) {
116 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
117 if (att
->gmem_offset
< 0) {
125 attachment_set_ops(struct tu_render_pass_attachment
*att
,
126 VkAttachmentLoadOp load_op
,
127 VkAttachmentLoadOp stencil_load_op
,
128 VkAttachmentStoreOp store_op
,
129 VkAttachmentStoreOp stencil_store_op
)
133 (load_op
== VK_ATTACHMENT_LOAD_OP_CLEAR
) ? VK_IMAGE_ASPECT_COLOR_BIT
: 0;
134 att
->load
= (load_op
== VK_ATTACHMENT_LOAD_OP_LOAD
);
135 att
->store
= (store_op
== VK_ATTACHMENT_STORE_OP_STORE
);
137 bool stencil_clear
= (stencil_load_op
== VK_ATTACHMENT_LOAD_OP_CLEAR
);
138 bool stencil_load
= (stencil_load_op
== VK_ATTACHMENT_LOAD_OP_LOAD
);
139 bool stencil_store
= (stencil_store_op
== VK_ATTACHMENT_STORE_OP_STORE
);
141 switch (att
->format
) {
142 case VK_FORMAT_D24_UNORM_S8_UINT
: /* || stencil load/store */
144 att
->clear_mask
= VK_IMAGE_ASPECT_DEPTH_BIT
;
146 att
->clear_mask
|= VK_IMAGE_ASPECT_STENCIL_BIT
;
152 case VK_FORMAT_S8_UINT
: /* replace load/store with stencil load/store */
153 att
->clear_mask
= stencil_clear
? VK_IMAGE_ASPECT_COLOR_BIT
: 0;
154 att
->load
= stencil_load
;
155 att
->store
= stencil_store
;
163 tu_CreateRenderPass(VkDevice _device
,
164 const VkRenderPassCreateInfo
*pCreateInfo
,
165 const VkAllocationCallbacks
*pAllocator
,
166 VkRenderPass
*pRenderPass
)
168 TU_FROM_HANDLE(tu_device
, device
, _device
);
169 struct tu_render_pass
*pass
;
171 size_t attachments_offset
;
173 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO
);
175 size
= sizeof(*pass
);
176 size
+= pCreateInfo
->subpassCount
* sizeof(pass
->subpasses
[0]);
177 attachments_offset
= size
;
178 size
+= pCreateInfo
->attachmentCount
* sizeof(pass
->attachments
[0]);
180 pass
= vk_alloc2(&device
->alloc
, pAllocator
, size
, 8,
181 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
183 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
185 memset(pass
, 0, size
);
186 pass
->attachment_count
= pCreateInfo
->attachmentCount
;
187 pass
->subpass_count
= pCreateInfo
->subpassCount
;
188 pass
->attachments
= (void *) pass
+ attachments_offset
;
190 for (uint32_t i
= 0; i
< pCreateInfo
->attachmentCount
; i
++) {
191 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
193 att
->format
= pCreateInfo
->pAttachments
[i
].format
;
194 att
->samples
= pCreateInfo
->pAttachments
[i
].samples
;
195 att
->cpp
= vk_format_get_blocksize(att
->format
) * att
->samples
;
196 att
->gmem_offset
= -1;
198 attachment_set_ops(att
,
199 pCreateInfo
->pAttachments
[i
].loadOp
,
200 pCreateInfo
->pAttachments
[i
].stencilLoadOp
,
201 pCreateInfo
->pAttachments
[i
].storeOp
,
202 pCreateInfo
->pAttachments
[i
].stencilStoreOp
);
205 uint32_t subpass_attachment_count
= 0;
206 struct tu_subpass_attachment
*p
;
207 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
208 const VkSubpassDescription
*desc
= &pCreateInfo
->pSubpasses
[i
];
210 subpass_attachment_count
+=
211 desc
->inputAttachmentCount
+ desc
->colorAttachmentCount
+
212 (desc
->pResolveAttachments
? desc
->colorAttachmentCount
: 0);
215 if (subpass_attachment_count
) {
216 pass
->subpass_attachments
= vk_alloc2(
217 &device
->alloc
, pAllocator
,
218 subpass_attachment_count
* sizeof(struct tu_subpass_attachment
), 8,
219 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
220 if (pass
->subpass_attachments
== NULL
) {
221 vk_free2(&device
->alloc
, pAllocator
, pass
);
222 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
225 pass
->subpass_attachments
= NULL
;
227 p
= pass
->subpass_attachments
;
228 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
229 const VkSubpassDescription
*desc
= &pCreateInfo
->pSubpasses
[i
];
230 struct tu_subpass
*subpass
= &pass
->subpasses
[i
];
232 subpass
->input_count
= desc
->inputAttachmentCount
;
233 subpass
->color_count
= desc
->colorAttachmentCount
;
234 subpass
->samples
= 0;
236 if (desc
->inputAttachmentCount
> 0) {
237 subpass
->input_attachments
= p
;
238 p
+= desc
->inputAttachmentCount
;
240 for (uint32_t j
= 0; j
< desc
->inputAttachmentCount
; j
++) {
241 uint32_t a
= desc
->pInputAttachments
[j
].attachment
;
242 subpass
->input_attachments
[j
].attachment
= a
;
243 if (a
!= VK_ATTACHMENT_UNUSED
)
244 pass
->attachments
[a
].gmem_offset
= 0;
248 if (desc
->colorAttachmentCount
> 0) {
249 subpass
->color_attachments
= p
;
250 p
+= desc
->colorAttachmentCount
;
252 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
253 uint32_t a
= desc
->pColorAttachments
[j
].attachment
;
254 subpass
->color_attachments
[j
].attachment
= a
;
256 if (a
!= VK_ATTACHMENT_UNUSED
) {
257 pass
->attachments
[a
].gmem_offset
= 0;
258 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
263 subpass
->resolve_attachments
= desc
->pResolveAttachments
? p
: NULL
;
264 if (desc
->pResolveAttachments
) {
265 p
+= desc
->colorAttachmentCount
;
266 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
267 subpass
->resolve_attachments
[j
].attachment
=
268 desc
->pResolveAttachments
[j
].attachment
;
272 uint32_t a
= desc
->pDepthStencilAttachment
?
273 desc
->pDepthStencilAttachment
->attachment
: VK_ATTACHMENT_UNUSED
;
274 subpass
->depth_stencil_attachment
.attachment
= a
;
275 if (a
!= VK_ATTACHMENT_UNUSED
) {
276 pass
->attachments
[a
].gmem_offset
= 0;
277 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
280 subpass
->samples
= subpass
->samples
?: 1;
283 *pRenderPass
= tu_render_pass_to_handle(pass
);
285 create_render_pass_common(pass
, device
->physical_device
);
291 tu_CreateRenderPass2(VkDevice _device
,
292 const VkRenderPassCreateInfo2KHR
*pCreateInfo
,
293 const VkAllocationCallbacks
*pAllocator
,
294 VkRenderPass
*pRenderPass
)
296 TU_FROM_HANDLE(tu_device
, device
, _device
);
297 struct tu_render_pass
*pass
;
299 size_t attachments_offset
;
301 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR
);
303 size
= sizeof(*pass
);
304 size
+= pCreateInfo
->subpassCount
* sizeof(pass
->subpasses
[0]);
305 attachments_offset
= size
;
306 size
+= pCreateInfo
->attachmentCount
* sizeof(pass
->attachments
[0]);
308 pass
= vk_alloc2(&device
->alloc
, pAllocator
, size
, 8,
309 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
311 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
313 memset(pass
, 0, size
);
314 pass
->attachment_count
= pCreateInfo
->attachmentCount
;
315 pass
->subpass_count
= pCreateInfo
->subpassCount
;
316 pass
->attachments
= (void *) pass
+ attachments_offset
;
318 for (uint32_t i
= 0; i
< pCreateInfo
->attachmentCount
; i
++) {
319 struct tu_render_pass_attachment
*att
= &pass
->attachments
[i
];
321 att
->format
= pCreateInfo
->pAttachments
[i
].format
;
322 att
->samples
= pCreateInfo
->pAttachments
[i
].samples
;
323 att
->cpp
= vk_format_get_blocksize(att
->format
) * att
->samples
;
324 att
->gmem_offset
= -1;
326 attachment_set_ops(att
,
327 pCreateInfo
->pAttachments
[i
].loadOp
,
328 pCreateInfo
->pAttachments
[i
].stencilLoadOp
,
329 pCreateInfo
->pAttachments
[i
].storeOp
,
330 pCreateInfo
->pAttachments
[i
].stencilStoreOp
);
332 uint32_t subpass_attachment_count
= 0;
333 struct tu_subpass_attachment
*p
;
334 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
335 const VkSubpassDescription2KHR
*desc
= &pCreateInfo
->pSubpasses
[i
];
337 subpass_attachment_count
+=
338 desc
->inputAttachmentCount
+ desc
->colorAttachmentCount
+
339 (desc
->pResolveAttachments
? desc
->colorAttachmentCount
: 0);
342 if (subpass_attachment_count
) {
343 pass
->subpass_attachments
= vk_alloc2(
344 &device
->alloc
, pAllocator
,
345 subpass_attachment_count
* sizeof(struct tu_subpass_attachment
), 8,
346 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
347 if (pass
->subpass_attachments
== NULL
) {
348 vk_free2(&device
->alloc
, pAllocator
, pass
);
349 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
352 pass
->subpass_attachments
= NULL
;
354 p
= pass
->subpass_attachments
;
355 for (uint32_t i
= 0; i
< pCreateInfo
->subpassCount
; i
++) {
356 const VkSubpassDescription2KHR
*desc
= &pCreateInfo
->pSubpasses
[i
];
357 struct tu_subpass
*subpass
= &pass
->subpasses
[i
];
359 subpass
->input_count
= desc
->inputAttachmentCount
;
360 subpass
->color_count
= desc
->colorAttachmentCount
;
361 subpass
->samples
= 0;
363 if (desc
->inputAttachmentCount
> 0) {
364 subpass
->input_attachments
= p
;
365 p
+= desc
->inputAttachmentCount
;
367 for (uint32_t j
= 0; j
< desc
->inputAttachmentCount
; j
++) {
368 uint32_t a
= desc
->pInputAttachments
[j
].attachment
;
369 subpass
->input_attachments
[j
].attachment
= a
;
370 if (a
!= VK_ATTACHMENT_UNUSED
)
371 pass
->attachments
[a
].gmem_offset
= 0;
375 if (desc
->colorAttachmentCount
> 0) {
376 subpass
->color_attachments
= p
;
377 p
+= desc
->colorAttachmentCount
;
379 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
380 uint32_t a
= desc
->pColorAttachments
[j
].attachment
;
381 subpass
->color_attachments
[j
].attachment
= a
;
383 if (a
!= VK_ATTACHMENT_UNUSED
) {
384 pass
->attachments
[a
].gmem_offset
= 0;
385 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
390 subpass
->resolve_attachments
= desc
->pResolveAttachments
? p
: NULL
;
391 if (desc
->pResolveAttachments
) {
392 p
+= desc
->colorAttachmentCount
;
393 for (uint32_t j
= 0; j
< desc
->colorAttachmentCount
; j
++) {
394 subpass
->resolve_attachments
[j
].attachment
=
395 desc
->pResolveAttachments
[j
].attachment
;
400 uint32_t a
= desc
->pDepthStencilAttachment
?
401 desc
->pDepthStencilAttachment
->attachment
: VK_ATTACHMENT_UNUSED
;
402 subpass
->depth_stencil_attachment
.attachment
= a
;
403 if (a
!= VK_ATTACHMENT_UNUSED
) {
404 pass
->attachments
[a
].gmem_offset
= 0;
405 update_samples(subpass
, pCreateInfo
->pAttachments
[a
].samples
);
408 subpass
->samples
= subpass
->samples
?: 1;
411 *pRenderPass
= tu_render_pass_to_handle(pass
);
413 create_render_pass_common(pass
, device
->physical_device
);
419 tu_DestroyRenderPass(VkDevice _device
,
421 const VkAllocationCallbacks
*pAllocator
)
423 TU_FROM_HANDLE(tu_device
, device
, _device
);
424 TU_FROM_HANDLE(tu_render_pass
, pass
, _pass
);
429 vk_free2(&device
->alloc
, pAllocator
, pass
->subpass_attachments
);
430 vk_free2(&device
->alloc
, pAllocator
, pass
);
434 tu_GetRenderAreaGranularity(VkDevice _device
,
435 VkRenderPass renderPass
,
436 VkExtent2D
*pGranularity
)
438 pGranularity
->width
= GMEM_ALIGN_W
;
439 pGranularity
->height
= GMEM_ALIGN_H
;