2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "anv_private.h"
27 lookup_blorp_shader(struct blorp_context
*blorp
,
28 const void *key
, uint32_t key_size
,
29 uint32_t *kernel_out
, void *prog_data_out
)
31 struct anv_device
*device
= blorp
->driver_ctx
;
33 /* The blorp cache must be a real cache */
34 assert(device
->blorp_shader_cache
.cache
);
36 struct anv_shader_bin
*bin
=
37 anv_pipeline_cache_search(&device
->blorp_shader_cache
, key
, key_size
);
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
44 anv_shader_bin_unref(device
, bin
);
46 *kernel_out
= bin
->kernel
.offset
;
47 *(const struct brw_stage_prog_data
**)prog_data_out
= bin
->prog_data
;
53 upload_blorp_shader(struct blorp_context
*blorp
,
54 const void *key
, uint32_t key_size
,
55 const void *kernel
, uint32_t kernel_size
,
56 const struct brw_stage_prog_data
*prog_data
,
57 uint32_t prog_data_size
,
58 uint32_t *kernel_out
, void *prog_data_out
)
60 struct anv_device
*device
= blorp
->driver_ctx
;
62 /* The blorp cache must be a real cache */
63 assert(device
->blorp_shader_cache
.cache
);
65 struct anv_pipeline_bind_map bind_map
= {
70 struct anv_shader_bin
*bin
=
71 anv_pipeline_cache_upload_kernel(&device
->blorp_shader_cache
,
72 key
, key_size
, kernel
, kernel_size
,
73 prog_data
, prog_data_size
, &bind_map
);
78 /* The cache already has a reference and it's not going anywhere so there
79 * is no need to hold a second reference.
81 anv_shader_bin_unref(device
, bin
);
83 *kernel_out
= bin
->kernel
.offset
;
84 *(const struct brw_stage_prog_data
**)prog_data_out
= bin
->prog_data
;
90 anv_device_init_blorp(struct anv_device
*device
)
92 anv_pipeline_cache_init(&device
->blorp_shader_cache
, device
, true);
93 blorp_init(&device
->blorp
, device
, &device
->isl_dev
);
94 device
->blorp
.compiler
= device
->instance
->physicalDevice
.compiler
;
95 device
->blorp
.mocs
.tex
= device
->default_mocs
;
96 device
->blorp
.mocs
.rb
= device
->default_mocs
;
97 device
->blorp
.mocs
.vb
= device
->default_mocs
;
98 device
->blorp
.lookup_shader
= lookup_blorp_shader
;
99 device
->blorp
.upload_shader
= upload_blorp_shader
;
100 switch (device
->info
.gen
) {
102 if (device
->info
.is_haswell
) {
103 device
->blorp
.exec
= gen75_blorp_exec
;
105 device
->blorp
.exec
= gen7_blorp_exec
;
109 device
->blorp
.exec
= gen8_blorp_exec
;
112 device
->blorp
.exec
= gen9_blorp_exec
;
115 unreachable("Unknown hardware generation");
120 anv_device_finish_blorp(struct anv_device
*device
)
122 blorp_finish(&device
->blorp
);
123 anv_pipeline_cache_finish(&device
->blorp_shader_cache
);
127 get_blorp_surf_for_anv_buffer(struct anv_device
*device
,
128 struct anv_buffer
*buffer
, uint64_t offset
,
129 uint32_t width
, uint32_t height
,
130 uint32_t row_pitch
, enum isl_format format
,
131 struct blorp_surf
*blorp_surf
,
132 struct isl_surf
*isl_surf
)
134 const struct isl_format_layout
*fmtl
=
135 isl_format_get_layout(format
);
138 /* ASTC is the only format which doesn't support linear layouts.
139 * Create an equivalently sized surface with ISL to get around this.
141 if (fmtl
->txc
== ISL_TXC_ASTC
) {
142 /* Use an equivalently sized format */
143 format
= ISL_FORMAT_R32G32B32A32_UINT
;
144 assert(fmtl
->bpb
== isl_format_get_layout(format
)->bpb
);
146 /* Shrink the dimensions for the new format */
147 width
= DIV_ROUND_UP(width
, fmtl
->bw
);
148 height
= DIV_ROUND_UP(height
, fmtl
->bh
);
151 *blorp_surf
= (struct blorp_surf
) {
154 .buffer
= buffer
->bo
,
155 .offset
= buffer
->offset
+ offset
,
159 ok
= isl_surf_init(&device
->isl_dev
, isl_surf
,
160 .dim
= ISL_SURF_DIM_2D
,
168 .row_pitch
= row_pitch
,
169 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
|
170 ISL_SURF_USAGE_RENDER_TARGET_BIT
,
171 .tiling_flags
= ISL_TILING_LINEAR_BIT
);
176 get_blorp_surf_for_anv_image(const struct anv_image
*image
,
177 VkImageAspectFlags aspect
,
178 enum isl_aux_usage aux_usage
,
179 struct blorp_surf
*blorp_surf
)
181 if (aspect
== VK_IMAGE_ASPECT_STENCIL_BIT
||
182 aux_usage
== ISL_AUX_USAGE_HIZ
)
183 aux_usage
= ISL_AUX_USAGE_NONE
;
185 const struct anv_surface
*surface
=
186 anv_image_get_surface_for_aspect_mask(image
, aspect
);
188 *blorp_surf
= (struct blorp_surf
) {
189 .surf
= &surface
->isl
,
192 .offset
= image
->offset
+ surface
->offset
,
196 if (aux_usage
!= ISL_AUX_USAGE_NONE
) {
197 blorp_surf
->aux_surf
= &image
->aux_surface
.isl
,
198 blorp_surf
->aux_addr
= (struct blorp_address
) {
200 .offset
= image
->offset
+ image
->aux_surface
.offset
,
202 blorp_surf
->aux_usage
= aux_usage
;
206 void anv_CmdCopyImage(
207 VkCommandBuffer commandBuffer
,
209 VkImageLayout srcImageLayout
,
211 VkImageLayout dstImageLayout
,
212 uint32_t regionCount
,
213 const VkImageCopy
* pRegions
)
215 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
216 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
217 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
219 struct blorp_batch batch
;
220 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
222 for (unsigned r
= 0; r
< regionCount
; r
++) {
223 VkOffset3D srcOffset
=
224 anv_sanitize_image_offset(src_image
->type
, pRegions
[r
].srcOffset
);
225 VkOffset3D dstOffset
=
226 anv_sanitize_image_offset(dst_image
->type
, pRegions
[r
].dstOffset
);
228 anv_sanitize_image_extent(src_image
->type
, pRegions
[r
].extent
);
230 unsigned dst_base_layer
, layer_count
;
231 if (dst_image
->type
== VK_IMAGE_TYPE_3D
) {
232 dst_base_layer
= pRegions
[r
].dstOffset
.z
;
233 layer_count
= pRegions
[r
].extent
.depth
;
235 dst_base_layer
= pRegions
[r
].dstSubresource
.baseArrayLayer
;
237 anv_get_layerCount(dst_image
, &pRegions
[r
].dstSubresource
);
240 unsigned src_base_layer
;
241 if (src_image
->type
== VK_IMAGE_TYPE_3D
) {
242 src_base_layer
= pRegions
[r
].srcOffset
.z
;
244 src_base_layer
= pRegions
[r
].srcSubresource
.baseArrayLayer
;
245 assert(layer_count
==
246 anv_get_layerCount(src_image
, &pRegions
[r
].srcSubresource
));
249 assert(pRegions
[r
].srcSubresource
.aspectMask
==
250 pRegions
[r
].dstSubresource
.aspectMask
);
253 for_each_bit(a
, pRegions
[r
].dstSubresource
.aspectMask
) {
254 VkImageAspectFlagBits aspect
= (1 << a
);
256 struct blorp_surf src_surf
, dst_surf
;
257 get_blorp_surf_for_anv_image(src_image
, aspect
, src_image
->aux_usage
,
259 get_blorp_surf_for_anv_image(dst_image
, aspect
, dst_image
->aux_usage
,
262 for (unsigned i
= 0; i
< layer_count
; i
++) {
263 blorp_copy(&batch
, &src_surf
, pRegions
[r
].srcSubresource
.mipLevel
,
265 &dst_surf
, pRegions
[r
].dstSubresource
.mipLevel
,
267 srcOffset
.x
, srcOffset
.y
,
268 dstOffset
.x
, dstOffset
.y
,
269 extent
.width
, extent
.height
);
274 blorp_batch_finish(&batch
);
278 copy_buffer_to_image(struct anv_cmd_buffer
*cmd_buffer
,
279 struct anv_buffer
*anv_buffer
,
280 struct anv_image
*anv_image
,
281 uint32_t regionCount
,
282 const VkBufferImageCopy
* pRegions
,
283 bool buffer_to_image
)
285 struct blorp_batch batch
;
286 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
289 struct blorp_surf surf
;
292 } image
, buffer
, *src
, *dst
;
295 buffer
.offset
= (VkOffset3D
) { 0, 0, 0 };
297 if (buffer_to_image
) {
305 for (unsigned r
= 0; r
< regionCount
; r
++) {
306 const VkImageAspectFlags aspect
= pRegions
[r
].imageSubresource
.aspectMask
;
308 get_blorp_surf_for_anv_image(anv_image
, aspect
, anv_image
->aux_usage
,
311 anv_sanitize_image_offset(anv_image
->type
, pRegions
[r
].imageOffset
);
312 image
.level
= pRegions
[r
].imageSubresource
.mipLevel
;
315 anv_sanitize_image_extent(anv_image
->type
, pRegions
[r
].imageExtent
);
316 if (anv_image
->type
!= VK_IMAGE_TYPE_3D
) {
317 image
.offset
.z
= pRegions
[r
].imageSubresource
.baseArrayLayer
;
319 anv_get_layerCount(anv_image
, &pRegions
[r
].imageSubresource
);
322 const enum isl_format buffer_format
=
323 anv_get_isl_format(&cmd_buffer
->device
->info
, anv_image
->vk_format
,
324 aspect
, VK_IMAGE_TILING_LINEAR
);
326 const VkExtent3D bufferImageExtent
= {
327 .width
= pRegions
[r
].bufferRowLength
?
328 pRegions
[r
].bufferRowLength
: extent
.width
,
329 .height
= pRegions
[r
].bufferImageHeight
?
330 pRegions
[r
].bufferImageHeight
: extent
.height
,
333 const struct isl_format_layout
*buffer_fmtl
=
334 isl_format_get_layout(buffer_format
);
336 const uint32_t buffer_row_pitch
=
337 DIV_ROUND_UP(bufferImageExtent
.width
, buffer_fmtl
->bw
) *
338 (buffer_fmtl
->bpb
/ 8);
340 const uint32_t buffer_layer_stride
=
341 DIV_ROUND_UP(bufferImageExtent
.height
, buffer_fmtl
->bh
) *
344 struct isl_surf buffer_isl_surf
;
345 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
346 anv_buffer
, pRegions
[r
].bufferOffset
,
347 extent
.width
, extent
.height
,
348 buffer_row_pitch
, buffer_format
,
349 &buffer
.surf
, &buffer_isl_surf
);
351 for (unsigned z
= 0; z
< extent
.depth
; z
++) {
352 blorp_copy(&batch
, &src
->surf
, src
->level
, src
->offset
.z
,
353 &dst
->surf
, dst
->level
, dst
->offset
.z
,
354 src
->offset
.x
, src
->offset
.y
, dst
->offset
.x
, dst
->offset
.y
,
355 extent
.width
, extent
.height
);
358 buffer
.surf
.addr
.offset
+= buffer_layer_stride
;
362 blorp_batch_finish(&batch
);
365 void anv_CmdCopyBufferToImage(
366 VkCommandBuffer commandBuffer
,
369 VkImageLayout dstImageLayout
,
370 uint32_t regionCount
,
371 const VkBufferImageCopy
* pRegions
)
373 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
374 ANV_FROM_HANDLE(anv_buffer
, src_buffer
, srcBuffer
);
375 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
377 copy_buffer_to_image(cmd_buffer
, src_buffer
, dst_image
,
378 regionCount
, pRegions
, true);
381 void anv_CmdCopyImageToBuffer(
382 VkCommandBuffer commandBuffer
,
384 VkImageLayout srcImageLayout
,
386 uint32_t regionCount
,
387 const VkBufferImageCopy
* pRegions
)
389 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
390 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
391 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
393 copy_buffer_to_image(cmd_buffer
, dst_buffer
, src_image
,
394 regionCount
, pRegions
, false);
398 flip_coords(unsigned *src0
, unsigned *src1
, unsigned *dst0
, unsigned *dst1
)
402 unsigned tmp
= *src0
;
409 unsigned tmp
= *dst0
;
418 void anv_CmdBlitImage(
419 VkCommandBuffer commandBuffer
,
421 VkImageLayout srcImageLayout
,
423 VkImageLayout dstImageLayout
,
424 uint32_t regionCount
,
425 const VkImageBlit
* pRegions
,
429 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
430 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
431 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
433 struct blorp_surf src
, dst
;
437 case VK_FILTER_NEAREST
:
438 gl_filter
= 0x2600; /* GL_NEAREST */
440 case VK_FILTER_LINEAR
:
441 gl_filter
= 0x2601; /* GL_LINEAR */
444 unreachable("Invalid filter");
447 struct blorp_batch batch
;
448 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
450 for (unsigned r
= 0; r
< regionCount
; r
++) {
451 const VkImageSubresourceLayers
*src_res
= &pRegions
[r
].srcSubresource
;
452 const VkImageSubresourceLayers
*dst_res
= &pRegions
[r
].dstSubresource
;
454 get_blorp_surf_for_anv_image(src_image
, src_res
->aspectMask
,
455 src_image
->aux_usage
, &src
);
456 get_blorp_surf_for_anv_image(dst_image
, dst_res
->aspectMask
,
457 dst_image
->aux_usage
, &dst
);
459 struct anv_format src_format
=
460 anv_get_format(&cmd_buffer
->device
->info
, src_image
->vk_format
,
461 src_res
->aspectMask
, src_image
->tiling
);
462 struct anv_format dst_format
=
463 anv_get_format(&cmd_buffer
->device
->info
, dst_image
->vk_format
,
464 dst_res
->aspectMask
, dst_image
->tiling
);
466 unsigned dst_start
, dst_end
;
467 if (dst_image
->type
== VK_IMAGE_TYPE_3D
) {
468 assert(dst_res
->baseArrayLayer
== 0);
469 dst_start
= pRegions
[r
].dstOffsets
[0].z
;
470 dst_end
= pRegions
[r
].dstOffsets
[1].z
;
472 dst_start
= dst_res
->baseArrayLayer
;
473 dst_end
= dst_start
+ anv_get_layerCount(dst_image
, dst_res
);
476 unsigned src_start
, src_end
;
477 if (src_image
->type
== VK_IMAGE_TYPE_3D
) {
478 assert(src_res
->baseArrayLayer
== 0);
479 src_start
= pRegions
[r
].srcOffsets
[0].z
;
480 src_end
= pRegions
[r
].srcOffsets
[1].z
;
482 src_start
= src_res
->baseArrayLayer
;
483 src_end
= src_start
+ anv_get_layerCount(src_image
, src_res
);
486 bool flip_z
= flip_coords(&src_start
, &src_end
, &dst_start
, &dst_end
);
487 float src_z_step
= (float)(src_end
+ 1 - src_start
) /
488 (float)(dst_end
+ 1 - dst_start
);
495 unsigned src_x0
= pRegions
[r
].srcOffsets
[0].x
;
496 unsigned src_x1
= pRegions
[r
].srcOffsets
[1].x
;
497 unsigned dst_x0
= pRegions
[r
].dstOffsets
[0].x
;
498 unsigned dst_x1
= pRegions
[r
].dstOffsets
[1].x
;
499 bool flip_x
= flip_coords(&src_x0
, &src_x1
, &dst_x0
, &dst_x1
);
501 unsigned src_y0
= pRegions
[r
].srcOffsets
[0].y
;
502 unsigned src_y1
= pRegions
[r
].srcOffsets
[1].y
;
503 unsigned dst_y0
= pRegions
[r
].dstOffsets
[0].y
;
504 unsigned dst_y1
= pRegions
[r
].dstOffsets
[1].y
;
505 bool flip_y
= flip_coords(&src_y0
, &src_y1
, &dst_y0
, &dst_y1
);
507 const unsigned num_layers
= dst_end
- dst_start
;
508 for (unsigned i
= 0; i
< num_layers
; i
++) {
509 unsigned dst_z
= dst_start
+ i
;
510 unsigned src_z
= src_start
+ i
* src_z_step
;
512 blorp_blit(&batch
, &src
, src_res
->mipLevel
, src_z
,
513 src_format
.isl_format
, src_format
.swizzle
,
514 &dst
, dst_res
->mipLevel
, dst_z
,
515 dst_format
.isl_format
,
516 anv_swizzle_for_render(dst_format
.swizzle
),
517 src_x0
, src_y0
, src_x1
, src_y1
,
518 dst_x0
, dst_y0
, dst_x1
, dst_y1
,
519 gl_filter
, flip_x
, flip_y
);
524 blorp_batch_finish(&batch
);
527 static enum isl_format
528 isl_format_for_size(unsigned size_B
)
531 case 1: return ISL_FORMAT_R8_UINT
;
532 case 2: return ISL_FORMAT_R8G8_UINT
;
533 case 4: return ISL_FORMAT_R8G8B8A8_UINT
;
534 case 8: return ISL_FORMAT_R16G16B16A16_UINT
;
535 case 16: return ISL_FORMAT_R32G32B32A32_UINT
;
537 unreachable("Not a power-of-two format size");
542 do_buffer_copy(struct blorp_batch
*batch
,
543 struct anv_bo
*src
, uint64_t src_offset
,
544 struct anv_bo
*dst
, uint64_t dst_offset
,
545 int width
, int height
, int block_size
)
547 struct anv_device
*device
= batch
->blorp
->driver_ctx
;
549 /* The actual format we pick doesn't matter as blorp will throw it away.
550 * The only thing that actually matters is the size.
552 enum isl_format format
= isl_format_for_size(block_size
);
554 struct isl_surf surf
;
555 isl_surf_init(&device
->isl_dev
, &surf
,
556 .dim
= ISL_SURF_DIM_2D
,
564 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
|
565 ISL_SURF_USAGE_RENDER_TARGET_BIT
,
566 .tiling_flags
= ISL_TILING_LINEAR_BIT
);
567 assert(surf
.row_pitch
== width
* block_size
);
569 struct blorp_surf src_blorp_surf
= {
573 .offset
= src_offset
,
577 struct blorp_surf dst_blorp_surf
= {
581 .offset
= dst_offset
,
585 blorp_copy(batch
, &src_blorp_surf
, 0, 0, &dst_blorp_surf
, 0, 0,
586 0, 0, 0, 0, width
, height
);
590 * Returns the greatest common divisor of a and b that is a power of two.
592 static inline uint64_t
593 gcd_pow2_u64(uint64_t a
, uint64_t b
)
595 assert(a
> 0 || b
> 0);
597 unsigned a_log2
= ffsll(a
) - 1;
598 unsigned b_log2
= ffsll(b
) - 1;
600 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
601 * case, the MIN2() will take the other one. If both are 0 then we will
602 * hit the assert above.
604 return 1 << MIN2(a_log2
, b_log2
);
607 /* This is maximum possible width/height our HW can handle */
608 #define MAX_SURFACE_DIM (1ull << 14)
610 void anv_CmdCopyBuffer(
611 VkCommandBuffer commandBuffer
,
614 uint32_t regionCount
,
615 const VkBufferCopy
* pRegions
)
617 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
618 ANV_FROM_HANDLE(anv_buffer
, src_buffer
, srcBuffer
);
619 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
621 struct blorp_batch batch
;
622 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
624 for (unsigned r
= 0; r
< regionCount
; r
++) {
625 uint64_t src_offset
= src_buffer
->offset
+ pRegions
[r
].srcOffset
;
626 uint64_t dst_offset
= dst_buffer
->offset
+ pRegions
[r
].dstOffset
;
627 uint64_t copy_size
= pRegions
[r
].size
;
629 /* First, we compute the biggest format that can be used with the
630 * given offsets and size.
633 bs
= gcd_pow2_u64(bs
, src_offset
);
634 bs
= gcd_pow2_u64(bs
, dst_offset
);
635 bs
= gcd_pow2_u64(bs
, pRegions
[r
].size
);
637 /* First, we make a bunch of max-sized copies */
638 uint64_t max_copy_size
= MAX_SURFACE_DIM
* MAX_SURFACE_DIM
* bs
;
639 while (copy_size
>= max_copy_size
) {
640 do_buffer_copy(&batch
, src_buffer
->bo
, src_offset
,
641 dst_buffer
->bo
, dst_offset
,
642 MAX_SURFACE_DIM
, MAX_SURFACE_DIM
, bs
);
643 copy_size
-= max_copy_size
;
644 src_offset
+= max_copy_size
;
645 dst_offset
+= max_copy_size
;
648 /* Now make a max-width copy */
649 uint64_t height
= copy_size
/ (MAX_SURFACE_DIM
* bs
);
650 assert(height
< MAX_SURFACE_DIM
);
652 uint64_t rect_copy_size
= height
* MAX_SURFACE_DIM
* bs
;
653 do_buffer_copy(&batch
, src_buffer
->bo
, src_offset
,
654 dst_buffer
->bo
, dst_offset
,
655 MAX_SURFACE_DIM
, height
, bs
);
656 copy_size
-= rect_copy_size
;
657 src_offset
+= rect_copy_size
;
658 dst_offset
+= rect_copy_size
;
661 /* Finally, make a small copy to finish it off */
662 if (copy_size
!= 0) {
663 do_buffer_copy(&batch
, src_buffer
->bo
, src_offset
,
664 dst_buffer
->bo
, dst_offset
,
665 copy_size
/ bs
, 1, bs
);
669 blorp_batch_finish(&batch
);
672 void anv_CmdUpdateBuffer(
673 VkCommandBuffer commandBuffer
,
675 VkDeviceSize dstOffset
,
676 VkDeviceSize dataSize
,
679 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
680 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
682 struct blorp_batch batch
;
683 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
685 /* We can't quite grab a full block because the state stream needs a
686 * little data at the top to build its linked list.
688 const uint32_t max_update_size
=
689 cmd_buffer
->device
->dynamic_state_pool
.block_size
- 64;
691 assert(max_update_size
< MAX_SURFACE_DIM
* 4);
693 /* We're about to read data that was written from the CPU. Flush the
694 * texture cache so we don't get anything stale.
696 cmd_buffer
->state
.pending_pipe_bits
|= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT
;
699 const uint32_t copy_size
= MIN2(dataSize
, max_update_size
);
701 struct anv_state tmp_data
=
702 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
, copy_size
, 64);
704 memcpy(tmp_data
.map
, pData
, copy_size
);
706 anv_state_flush(cmd_buffer
->device
, tmp_data
);
709 bs
= gcd_pow2_u64(bs
, dstOffset
);
710 bs
= gcd_pow2_u64(bs
, copy_size
);
712 do_buffer_copy(&batch
,
713 &cmd_buffer
->device
->dynamic_state_pool
.block_pool
.bo
,
715 dst_buffer
->bo
, dst_buffer
->offset
+ dstOffset
,
716 copy_size
/ bs
, 1, bs
);
718 dataSize
-= copy_size
;
719 dstOffset
+= copy_size
;
720 pData
= (void *)pData
+ copy_size
;
723 blorp_batch_finish(&batch
);
726 void anv_CmdFillBuffer(
727 VkCommandBuffer commandBuffer
,
729 VkDeviceSize dstOffset
,
730 VkDeviceSize fillSize
,
733 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
734 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
735 struct blorp_surf surf
;
736 struct isl_surf isl_surf
;
738 struct blorp_batch batch
;
739 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
741 fillSize
= anv_buffer_get_range(dst_buffer
, dstOffset
, fillSize
);
743 /* From the Vulkan spec:
745 * "size is the number of bytes to fill, and must be either a multiple
746 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
747 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
748 * buffer is not a multiple of 4, then the nearest smaller multiple is
753 /* First, we compute the biggest format that can be used with the
754 * given offsets and size.
757 bs
= gcd_pow2_u64(bs
, dstOffset
);
758 bs
= gcd_pow2_u64(bs
, fillSize
);
759 enum isl_format isl_format
= isl_format_for_size(bs
);
761 union isl_color_value color
= {
762 .u32
= { data
, data
, data
, data
},
765 const uint64_t max_fill_size
= MAX_SURFACE_DIM
* MAX_SURFACE_DIM
* bs
;
766 while (fillSize
>= max_fill_size
) {
767 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
768 dst_buffer
, dstOffset
,
769 MAX_SURFACE_DIM
, MAX_SURFACE_DIM
,
770 MAX_SURFACE_DIM
* bs
, isl_format
,
773 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
774 0, 0, 1, 0, 0, MAX_SURFACE_DIM
, MAX_SURFACE_DIM
,
776 fillSize
-= max_fill_size
;
777 dstOffset
+= max_fill_size
;
780 uint64_t height
= fillSize
/ (MAX_SURFACE_DIM
* bs
);
781 assert(height
< MAX_SURFACE_DIM
);
783 const uint64_t rect_fill_size
= height
* MAX_SURFACE_DIM
* bs
;
784 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
785 dst_buffer
, dstOffset
,
786 MAX_SURFACE_DIM
, height
,
787 MAX_SURFACE_DIM
* bs
, isl_format
,
790 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
791 0, 0, 1, 0, 0, MAX_SURFACE_DIM
, height
,
793 fillSize
-= rect_fill_size
;
794 dstOffset
+= rect_fill_size
;
798 const uint32_t width
= fillSize
/ bs
;
799 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
800 dst_buffer
, dstOffset
,
802 width
* bs
, isl_format
,
805 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
806 0, 0, 1, 0, 0, width
, 1,
810 blorp_batch_finish(&batch
);
813 void anv_CmdClearColorImage(
814 VkCommandBuffer commandBuffer
,
816 VkImageLayout imageLayout
,
817 const VkClearColorValue
* pColor
,
819 const VkImageSubresourceRange
* pRanges
)
821 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
822 ANV_FROM_HANDLE(anv_image
, image
, _image
);
824 static const bool color_write_disable
[4] = { false, false, false, false };
826 struct blorp_batch batch
;
827 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
829 struct blorp_surf surf
;
830 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_COLOR_BIT
,
831 image
->aux_usage
, &surf
);
833 for (unsigned r
= 0; r
< rangeCount
; r
++) {
834 if (pRanges
[r
].aspectMask
== 0)
837 assert(pRanges
[r
].aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
);
839 struct anv_format src_format
=
840 anv_get_format(&cmd_buffer
->device
->info
, image
->vk_format
,
841 VK_IMAGE_ASPECT_COLOR_BIT
, image
->tiling
);
843 unsigned base_layer
= pRanges
[r
].baseArrayLayer
;
844 unsigned layer_count
= anv_get_layerCount(image
, &pRanges
[r
]);
846 for (unsigned i
= 0; i
< anv_get_levelCount(image
, &pRanges
[r
]); i
++) {
847 const unsigned level
= pRanges
[r
].baseMipLevel
+ i
;
848 const unsigned level_width
= anv_minify(image
->extent
.width
, level
);
849 const unsigned level_height
= anv_minify(image
->extent
.height
, level
);
851 if (image
->type
== VK_IMAGE_TYPE_3D
) {
853 layer_count
= anv_minify(image
->extent
.depth
, level
);
856 blorp_clear(&batch
, &surf
,
857 src_format
.isl_format
, src_format
.swizzle
,
858 level
, base_layer
, layer_count
,
859 0, 0, level_width
, level_height
,
860 vk_to_isl_color(*pColor
), color_write_disable
);
864 blorp_batch_finish(&batch
);
867 void anv_CmdClearDepthStencilImage(
868 VkCommandBuffer commandBuffer
,
870 VkImageLayout imageLayout
,
871 const VkClearDepthStencilValue
* pDepthStencil
,
873 const VkImageSubresourceRange
* pRanges
)
875 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
876 ANV_FROM_HANDLE(anv_image
, image
, image_h
);
878 struct blorp_batch batch
;
879 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
881 struct blorp_surf depth
, stencil
;
882 if (image
->aspects
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
883 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_DEPTH_BIT
,
884 ISL_AUX_USAGE_NONE
, &depth
);
886 memset(&depth
, 0, sizeof(depth
));
889 if (image
->aspects
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
890 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_STENCIL_BIT
,
891 ISL_AUX_USAGE_NONE
, &stencil
);
893 memset(&stencil
, 0, sizeof(stencil
));
896 for (unsigned r
= 0; r
< rangeCount
; r
++) {
897 if (pRanges
[r
].aspectMask
== 0)
900 bool clear_depth
= pRanges
[r
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
;
901 bool clear_stencil
= pRanges
[r
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
;
903 unsigned base_layer
= pRanges
[r
].baseArrayLayer
;
904 unsigned layer_count
= anv_get_layerCount(image
, &pRanges
[r
]);
906 for (unsigned i
= 0; i
< anv_get_levelCount(image
, &pRanges
[r
]); i
++) {
907 const unsigned level
= pRanges
[r
].baseMipLevel
+ i
;
908 const unsigned level_width
= anv_minify(image
->extent
.width
, level
);
909 const unsigned level_height
= anv_minify(image
->extent
.height
, level
);
911 if (image
->type
== VK_IMAGE_TYPE_3D
)
912 layer_count
= anv_minify(image
->extent
.depth
, level
);
914 blorp_clear_depth_stencil(&batch
, &depth
, &stencil
,
915 level
, base_layer
, layer_count
,
916 0, 0, level_width
, level_height
,
917 clear_depth
, pDepthStencil
->depth
,
918 clear_stencil
? 0xff : 0,
919 pDepthStencil
->stencil
);
923 blorp_batch_finish(&batch
);
927 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer
*cmd_buffer
,
928 uint32_t num_entries
,
929 uint32_t *state_offset
,
930 struct anv_state
*bt_state
)
932 *bt_state
= anv_cmd_buffer_alloc_binding_table(cmd_buffer
, num_entries
,
934 if (bt_state
->map
== NULL
) {
935 /* We ran out of space. Grab a new binding table block. */
936 VkResult result
= anv_cmd_buffer_new_binding_table_block(cmd_buffer
);
937 if (result
!= VK_SUCCESS
)
940 /* Re-emit state base addresses so we get the new surface state base
941 * address before we start emitting binding tables etc.
943 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
945 *bt_state
= anv_cmd_buffer_alloc_binding_table(cmd_buffer
, num_entries
,
947 assert(bt_state
->map
!= NULL
);
954 binding_table_for_surface_state(struct anv_cmd_buffer
*cmd_buffer
,
955 struct anv_state surface_state
,
958 uint32_t state_offset
;
959 struct anv_state bt_state
;
962 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer
, 1, &state_offset
,
964 if (result
!= VK_SUCCESS
)
967 uint32_t *bt_map
= bt_state
.map
;
968 bt_map
[0] = surface_state
.offset
+ state_offset
;
970 *bt_offset
= bt_state
.offset
;
975 clear_color_attachment(struct anv_cmd_buffer
*cmd_buffer
,
976 struct blorp_batch
*batch
,
977 const VkClearAttachment
*attachment
,
978 uint32_t rectCount
, const VkClearRect
*pRects
)
980 const struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
981 const uint32_t color_att
= attachment
->colorAttachment
;
982 const uint32_t att_idx
= subpass
->color_attachments
[color_att
].attachment
;
984 if (att_idx
== VK_ATTACHMENT_UNUSED
)
987 struct anv_render_pass_attachment
*pass_att
=
988 &cmd_buffer
->state
.pass
->attachments
[att_idx
];
989 struct anv_attachment_state
*att_state
=
990 &cmd_buffer
->state
.attachments
[att_idx
];
992 uint32_t binding_table
;
994 binding_table_for_surface_state(cmd_buffer
, att_state
->color_rt_state
,
996 if (result
!= VK_SUCCESS
)
999 union isl_color_value clear_color
=
1000 vk_to_isl_color(attachment
->clearValue
.color
);
1002 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1003 if (subpass
->view_mask
) {
1005 for_each_bit(view_idx
, subpass
->view_mask
) {
1006 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
1007 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
1008 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
1009 blorp_clear_attachments(batch
, binding_table
,
1010 ISL_FORMAT_UNSUPPORTED
, pass_att
->samples
,
1013 offset
.x
+ extent
.width
,
1014 offset
.y
+ extent
.height
,
1015 true, clear_color
, false, 0.0f
, 0, 0);
1021 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
1022 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
1023 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
1024 blorp_clear_attachments(batch
, binding_table
,
1025 ISL_FORMAT_UNSUPPORTED
, pass_att
->samples
,
1026 pRects
[r
].baseArrayLayer
,
1027 pRects
[r
].layerCount
,
1029 offset
.x
+ extent
.width
, offset
.y
+ extent
.height
,
1030 true, clear_color
, false, 0.0f
, 0, 0);
1035 clear_depth_stencil_attachment(struct anv_cmd_buffer
*cmd_buffer
,
1036 struct blorp_batch
*batch
,
1037 const VkClearAttachment
*attachment
,
1038 uint32_t rectCount
, const VkClearRect
*pRects
)
1040 static const union isl_color_value color_value
= { .u32
= { 0, } };
1041 const struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
1042 const uint32_t att_idx
= subpass
->depth_stencil_attachment
.attachment
;
1044 if (att_idx
== VK_ATTACHMENT_UNUSED
)
1047 struct anv_render_pass_attachment
*pass_att
=
1048 &cmd_buffer
->state
.pass
->attachments
[att_idx
];
1050 bool clear_depth
= attachment
->aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
;
1051 bool clear_stencil
= attachment
->aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
;
1053 enum isl_format depth_format
= ISL_FORMAT_UNSUPPORTED
;
1055 depth_format
= anv_get_isl_format(&cmd_buffer
->device
->info
,
1057 VK_IMAGE_ASPECT_DEPTH_BIT
,
1058 VK_IMAGE_TILING_OPTIMAL
);
1061 uint32_t binding_table
;
1063 binding_table_for_surface_state(cmd_buffer
,
1064 cmd_buffer
->state
.null_surface_state
,
1066 if (result
!= VK_SUCCESS
)
1069 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1070 if (subpass
->view_mask
) {
1072 for_each_bit(view_idx
, subpass
->view_mask
) {
1073 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
1074 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
1075 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
1076 VkClearDepthStencilValue value
= attachment
->clearValue
.depthStencil
;
1077 blorp_clear_attachments(batch
, binding_table
,
1078 depth_format
, pass_att
->samples
,
1081 offset
.x
+ extent
.width
,
1082 offset
.y
+ extent
.height
,
1084 clear_depth
, value
.depth
,
1085 clear_stencil
? 0xff : 0, value
.stencil
);
1091 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
1092 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
1093 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
1094 VkClearDepthStencilValue value
= attachment
->clearValue
.depthStencil
;
1095 blorp_clear_attachments(batch
, binding_table
,
1096 depth_format
, pass_att
->samples
,
1097 pRects
[r
].baseArrayLayer
,
1098 pRects
[r
].layerCount
,
1100 offset
.x
+ extent
.width
, offset
.y
+ extent
.height
,
1102 clear_depth
, value
.depth
,
1103 clear_stencil
? 0xff : 0, value
.stencil
);
1107 void anv_CmdClearAttachments(
1108 VkCommandBuffer commandBuffer
,
1109 uint32_t attachmentCount
,
1110 const VkClearAttachment
* pAttachments
,
1112 const VkClearRect
* pRects
)
1114 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
1116 /* Because this gets called within a render pass, we tell blorp not to
1117 * trash our depth and stencil buffers.
1119 struct blorp_batch batch
;
1120 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
,
1121 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL
);
1123 for (uint32_t a
= 0; a
< attachmentCount
; ++a
) {
1124 if (pAttachments
[a
].aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
) {
1125 clear_color_attachment(cmd_buffer
, &batch
,
1129 clear_depth_stencil_attachment(cmd_buffer
, &batch
,
1135 blorp_batch_finish(&batch
);
1138 enum subpass_stage
{
1141 SUBPASS_STAGE_RESOLVE
,
1145 subpass_needs_clear(const struct anv_cmd_buffer
*cmd_buffer
)
1147 const struct anv_cmd_state
*cmd_state
= &cmd_buffer
->state
;
1148 uint32_t ds
= cmd_state
->subpass
->depth_stencil_attachment
.attachment
;
1150 for (uint32_t i
= 0; i
< cmd_state
->subpass
->color_count
; ++i
) {
1151 uint32_t a
= cmd_state
->subpass
->color_attachments
[i
].attachment
;
1152 if (a
== VK_ATTACHMENT_UNUSED
)
1155 assert(a
< cmd_state
->pass
->attachment_count
);
1156 if (cmd_state
->attachments
[a
].pending_clear_aspects
) {
1161 if (ds
!= VK_ATTACHMENT_UNUSED
) {
1162 assert(ds
< cmd_state
->pass
->attachment_count
);
1163 if (cmd_state
->attachments
[ds
].pending_clear_aspects
)
1171 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer
*cmd_buffer
)
1173 const struct anv_cmd_state
*cmd_state
= &cmd_buffer
->state
;
1174 const VkRect2D render_area
= cmd_buffer
->state
.render_area
;
1177 if (!subpass_needs_clear(cmd_buffer
))
1180 /* Because this gets called within a render pass, we tell blorp not to
1181 * trash our depth and stencil buffers.
1183 struct blorp_batch batch
;
1184 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
,
1185 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL
);
1187 VkClearRect clear_rect
= {
1188 .rect
= cmd_buffer
->state
.render_area
,
1189 .baseArrayLayer
= 0,
1190 .layerCount
= cmd_buffer
->state
.framebuffer
->layers
,
1193 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
1194 for (uint32_t i
= 0; i
< cmd_state
->subpass
->color_count
; ++i
) {
1195 const uint32_t a
= cmd_state
->subpass
->color_attachments
[i
].attachment
;
1196 if (a
== VK_ATTACHMENT_UNUSED
)
1199 assert(a
< cmd_state
->pass
->attachment_count
);
1200 struct anv_attachment_state
*att_state
= &cmd_state
->attachments
[a
];
1202 if (!att_state
->pending_clear_aspects
)
1205 assert(att_state
->pending_clear_aspects
== VK_IMAGE_ASPECT_COLOR_BIT
);
1207 struct anv_image_view
*iview
= fb
->attachments
[a
];
1208 const struct anv_image
*image
= iview
->image
;
1209 struct blorp_surf surf
;
1210 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_COLOR_BIT
,
1211 att_state
->aux_usage
, &surf
);
1213 if (att_state
->fast_clear
) {
1214 surf
.clear_color
= vk_to_isl_color(att_state
->clear_value
.color
);
1216 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1218 * "After Render target fast clear, pipe-control with color cache
1219 * write-flush must be issued before sending any DRAW commands on
1220 * that render target."
1222 * This comment is a bit cryptic and doesn't really tell you what's
1223 * going or what's really needed. It appears that fast clear ops are
1224 * not properly synchronized with other drawing. This means that we
1225 * cannot have a fast clear operation in the pipe at the same time as
1226 * other regular drawing operations. We need to use a PIPE_CONTROL
1227 * to ensure that the contents of the previous draw hit the render
1228 * target before we resolve and then use a second PIPE_CONTROL after
1229 * the resolve to ensure that it is completed before any additional
1232 cmd_buffer
->state
.pending_pipe_bits
|=
1233 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
| ANV_PIPE_CS_STALL_BIT
;
1235 blorp_fast_clear(&batch
, &surf
, iview
->isl
.format
,
1236 iview
->isl
.base_level
,
1237 iview
->isl
.base_array_layer
, fb
->layers
,
1238 render_area
.offset
.x
, render_area
.offset
.y
,
1239 render_area
.offset
.x
+ render_area
.extent
.width
,
1240 render_area
.offset
.y
+ render_area
.extent
.height
);
1242 cmd_buffer
->state
.pending_pipe_bits
|=
1243 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
| ANV_PIPE_CS_STALL_BIT
;
1245 blorp_clear(&batch
, &surf
, iview
->isl
.format
,
1246 anv_swizzle_for_render(iview
->isl
.swizzle
),
1247 iview
->isl
.base_level
,
1248 iview
->isl
.base_array_layer
, fb
->layers
,
1249 render_area
.offset
.x
, render_area
.offset
.y
,
1250 render_area
.offset
.x
+ render_area
.extent
.width
,
1251 render_area
.offset
.y
+ render_area
.extent
.height
,
1252 vk_to_isl_color(att_state
->clear_value
.color
), NULL
);
1255 att_state
->pending_clear_aspects
= 0;
1258 const uint32_t ds
= cmd_state
->subpass
->depth_stencil_attachment
.attachment
;
1259 assert(ds
== VK_ATTACHMENT_UNUSED
|| ds
< cmd_state
->pass
->attachment_count
);
1261 if (ds
!= VK_ATTACHMENT_UNUSED
&&
1262 cmd_state
->attachments
[ds
].pending_clear_aspects
) {
1264 VkClearAttachment clear_att
= {
1265 .aspectMask
= cmd_state
->attachments
[ds
].pending_clear_aspects
,
1266 .clearValue
= cmd_state
->attachments
[ds
].clear_value
,
1270 const uint8_t gen
= cmd_buffer
->device
->info
.gen
;
1271 bool clear_with_hiz
= gen
>= 8 && cmd_state
->attachments
[ds
].aux_usage
==
1273 const struct anv_image_view
*iview
= fb
->attachments
[ds
];
1275 if (clear_with_hiz
) {
1276 const bool clear_depth
= clear_att
.aspectMask
&
1277 VK_IMAGE_ASPECT_DEPTH_BIT
;
1278 const bool clear_stencil
= clear_att
.aspectMask
&
1279 VK_IMAGE_ASPECT_STENCIL_BIT
;
1281 /* Check against restrictions for depth buffer clearing. A great GPU
1282 * performance benefit isn't expected when using the HZ sequence for
1283 * stencil-only clears. Therefore, we don't emit a HZ op sequence for
1284 * a stencil clear in addition to using the BLORP-fallback for depth.
1287 if (!blorp_can_hiz_clear_depth(gen
, iview
->isl
.format
,
1288 iview
->image
->samples
,
1289 render_area
.offset
.x
,
1290 render_area
.offset
.y
,
1291 render_area
.offset
.x
+
1292 render_area
.extent
.width
,
1293 render_area
.offset
.y
+
1294 render_area
.extent
.height
)) {
1295 clear_with_hiz
= false;
1296 } else if (clear_att
.clearValue
.depthStencil
.depth
!=
1298 /* Don't enable fast depth clears for any color not equal to
1301 clear_with_hiz
= false;
1302 } else if (gen
== 8 &&
1303 anv_can_sample_with_hiz(&cmd_buffer
->device
->info
,
1305 iview
->image
->samples
)) {
1306 /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
1307 * fast-cleared portion of a HiZ buffer. Testing has revealed
1308 * that Gen8 only supports returning 0.0f. Gens prior to gen8 do
1309 * not support this feature at all.
1311 clear_with_hiz
= false;
1315 if (clear_with_hiz
) {
1316 blorp_gen8_hiz_clear_attachments(&batch
, iview
->image
->samples
,
1317 render_area
.offset
.x
,
1318 render_area
.offset
.y
,
1319 render_area
.offset
.x
+
1320 render_area
.extent
.width
,
1321 render_area
.offset
.y
+
1322 render_area
.extent
.height
,
1323 clear_depth
, clear_stencil
,
1324 clear_att
.clearValue
.
1325 depthStencil
.stencil
);
1329 if (!clear_with_hiz
) {
1330 clear_depth_stencil_attachment(cmd_buffer
, &batch
,
1331 &clear_att
, 1, &clear_rect
);
1334 cmd_state
->attachments
[ds
].pending_clear_aspects
= 0;
1337 blorp_batch_finish(&batch
);
1341 resolve_image(struct blorp_batch
*batch
,
1342 const struct anv_image
*src_image
,
1343 uint32_t src_level
, uint32_t src_layer
,
1344 const struct anv_image
*dst_image
,
1345 uint32_t dst_level
, uint32_t dst_layer
,
1346 VkImageAspectFlags aspect_mask
,
1347 uint32_t src_x
, uint32_t src_y
, uint32_t dst_x
, uint32_t dst_y
,
1348 uint32_t width
, uint32_t height
)
1350 assert(src_image
->type
== VK_IMAGE_TYPE_2D
);
1351 assert(src_image
->samples
> 1);
1352 assert(dst_image
->type
== VK_IMAGE_TYPE_2D
);
1353 assert(dst_image
->samples
== 1);
1356 for_each_bit(a
, aspect_mask
) {
1357 VkImageAspectFlagBits aspect
= 1 << a
;
1359 struct blorp_surf src_surf
, dst_surf
;
1360 get_blorp_surf_for_anv_image(src_image
, aspect
,
1361 src_image
->aux_usage
, &src_surf
);
1362 get_blorp_surf_for_anv_image(dst_image
, aspect
,
1363 dst_image
->aux_usage
, &dst_surf
);
1366 &src_surf
, src_level
, src_layer
,
1367 ISL_FORMAT_UNSUPPORTED
, ISL_SWIZZLE_IDENTITY
,
1368 &dst_surf
, dst_level
, dst_layer
,
1369 ISL_FORMAT_UNSUPPORTED
, ISL_SWIZZLE_IDENTITY
,
1370 src_x
, src_y
, src_x
+ width
, src_y
+ height
,
1371 dst_x
, dst_y
, dst_x
+ width
, dst_y
+ height
,
1372 0x2600 /* GL_NEAREST */, false, false);
1376 void anv_CmdResolveImage(
1377 VkCommandBuffer commandBuffer
,
1379 VkImageLayout srcImageLayout
,
1381 VkImageLayout dstImageLayout
,
1382 uint32_t regionCount
,
1383 const VkImageResolve
* pRegions
)
1385 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
1386 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
1387 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
1389 struct blorp_batch batch
;
1390 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1392 for (uint32_t r
= 0; r
< regionCount
; r
++) {
1393 assert(pRegions
[r
].srcSubresource
.aspectMask
==
1394 pRegions
[r
].dstSubresource
.aspectMask
);
1395 assert(anv_get_layerCount(src_image
, &pRegions
[r
].srcSubresource
) ==
1396 anv_get_layerCount(dst_image
, &pRegions
[r
].dstSubresource
));
1398 const uint32_t layer_count
=
1399 anv_get_layerCount(dst_image
, &pRegions
[r
].dstSubresource
);
1401 for (uint32_t layer
= 0; layer
< layer_count
; layer
++) {
1402 resolve_image(&batch
,
1403 src_image
, pRegions
[r
].srcSubresource
.mipLevel
,
1404 pRegions
[r
].srcSubresource
.baseArrayLayer
+ layer
,
1405 dst_image
, pRegions
[r
].dstSubresource
.mipLevel
,
1406 pRegions
[r
].dstSubresource
.baseArrayLayer
+ layer
,
1407 pRegions
[r
].dstSubresource
.aspectMask
,
1408 pRegions
[r
].srcOffset
.x
, pRegions
[r
].srcOffset
.y
,
1409 pRegions
[r
].dstOffset
.x
, pRegions
[r
].dstOffset
.y
,
1410 pRegions
[r
].extent
.width
, pRegions
[r
].extent
.height
);
1414 blorp_batch_finish(&batch
);
1418 ccs_resolve_attachment(struct anv_cmd_buffer
*cmd_buffer
,
1419 struct blorp_batch
*batch
,
1422 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
1423 struct anv_attachment_state
*att_state
=
1424 &cmd_buffer
->state
.attachments
[att
];
1426 if (att_state
->aux_usage
== ISL_AUX_USAGE_NONE
||
1427 att_state
->aux_usage
== ISL_AUX_USAGE_MCS
)
1428 return; /* Nothing to resolve */
1430 assert(att_state
->aux_usage
== ISL_AUX_USAGE_CCS_E
||
1431 att_state
->aux_usage
== ISL_AUX_USAGE_CCS_D
);
1433 struct anv_render_pass
*pass
= cmd_buffer
->state
.pass
;
1434 const uint32_t subpass_idx
= anv_get_subpass_id(&cmd_buffer
->state
);
1436 /* Scan forward to see what all ways this attachment will be used.
1437 * Ideally, we would like to resolve in the same subpass as the last write
1438 * of a particular attachment. That way we only resolve once but it's
1439 * still hot in the cache.
1441 bool found_draw
= false;
1442 enum anv_subpass_usage usage
= 0;
1443 for (uint32_t s
= subpass_idx
+ 1; s
< pass
->subpass_count
; s
++) {
1444 usage
|= pass
->attachments
[att
].subpass_usage
[s
];
1446 if (usage
& (ANV_SUBPASS_USAGE_DRAW
| ANV_SUBPASS_USAGE_RESOLVE_DST
)) {
1447 /* We found another subpass that draws to this attachment. We'll
1448 * wait to resolve until then.
1455 struct anv_image_view
*iview
= fb
->attachments
[att
];
1456 const struct anv_image
*image
= iview
->image
;
1457 assert(image
->aspects
== VK_IMAGE_ASPECT_COLOR_BIT
);
1459 enum blorp_fast_clear_op resolve_op
= BLORP_FAST_CLEAR_OP_NONE
;
1461 /* This is the last subpass that writes to this attachment so we need to
1462 * resolve here. Ideally, we would like to only resolve if the storeOp
1463 * is set to VK_ATTACHMENT_STORE_OP_STORE. However, we need to ensure
1464 * that the CCS bits are set to "resolved" because there may be copy or
1465 * blit operations (which may ignore CCS) between now and the next time
1466 * we render and we need to ensure that anything they write will be
1467 * respected in the next render. Unfortunately, the hardware does not
1468 * provide us with any sort of "invalidate" pass that sets the CCS to
1469 * "resolved" without writing to the render target.
1471 if (iview
->image
->aux_usage
!= ISL_AUX_USAGE_CCS_E
) {
1472 /* The image destination surface doesn't support compression outside
1473 * the render pass. We need a full resolve.
1475 resolve_op
= BLORP_FAST_CLEAR_OP_RESOLVE_FULL
;
1476 } else if (att_state
->fast_clear
) {
1477 /* We don't know what to do with clear colors outside the render
1478 * pass. We need a partial resolve. Only transparent black is
1479 * built into the surface state object and thus no resolve is
1480 * required for this case.
1482 if (att_state
->clear_value
.color
.uint32
[0] ||
1483 att_state
->clear_value
.color
.uint32
[1] ||
1484 att_state
->clear_value
.color
.uint32
[2] ||
1485 att_state
->clear_value
.color
.uint32
[3])
1486 resolve_op
= BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL
;
1488 /* The image "natively" supports all the compression we care about
1489 * and we don't need to resolve at all. If this is the case, we also
1490 * don't need to resolve for any of the input attachment cases below.
1493 } else if (usage
& ANV_SUBPASS_USAGE_INPUT
) {
1494 /* Input attachments are clear-color aware so, at least on Sky Lake, we
1495 * can frequently sample from them with no resolves at all.
1497 if (att_state
->aux_usage
!= att_state
->input_aux_usage
) {
1498 assert(att_state
->input_aux_usage
== ISL_AUX_USAGE_NONE
);
1499 resolve_op
= BLORP_FAST_CLEAR_OP_RESOLVE_FULL
;
1500 } else if (!att_state
->clear_color_is_zero_one
) {
1501 /* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color:
1503 * "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this RT
1504 * is fast cleared with non-0/1 clear value, this RT must be
1505 * partially resolved (refer to Partial Resolve operation) before
1506 * binding this surface to Sampler."
1508 resolve_op
= BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL
;
1512 if (resolve_op
== BLORP_FAST_CLEAR_OP_NONE
)
1515 struct blorp_surf surf
;
1516 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_COLOR_BIT
,
1517 att_state
->aux_usage
, &surf
);
1518 if (att_state
->fast_clear
)
1519 surf
.clear_color
= vk_to_isl_color(att_state
->clear_value
.color
);
1521 /* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
1523 * "When performing a render target resolve, PIPE_CONTROL with end of
1524 * pipe sync must be delivered."
1526 * This comment is a bit cryptic and doesn't really tell you what's going
1527 * or what's really needed. It appears that fast clear ops are not
1528 * properly synchronized with other drawing. We need to use a PIPE_CONTROL
1529 * to ensure that the contents of the previous draw hit the render target
1530 * before we resolve and then use a second PIPE_CONTROL after the resolve
1531 * to ensure that it is completed before any additional drawing occurs.
1533 cmd_buffer
->state
.pending_pipe_bits
|=
1534 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
| ANV_PIPE_CS_STALL_BIT
;
1536 for (uint32_t layer
= 0; layer
< fb
->layers
; layer
++) {
1537 blorp_ccs_resolve(batch
, &surf
,
1538 iview
->isl
.base_level
,
1539 iview
->isl
.base_array_layer
+ layer
,
1540 iview
->isl
.format
, resolve_op
);
1543 cmd_buffer
->state
.pending_pipe_bits
|=
1544 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
| ANV_PIPE_CS_STALL_BIT
;
1546 /* Once we've done any sort of resolve, we're no longer fast-cleared */
1547 att_state
->fast_clear
= false;
1548 if (att_state
->aux_usage
== ISL_AUX_USAGE_CCS_D
)
1549 att_state
->aux_usage
= ISL_AUX_USAGE_NONE
;
1553 anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer
*cmd_buffer
)
1555 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
1556 struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
1559 struct blorp_batch batch
;
1560 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1562 for (uint32_t i
= 0; i
< subpass
->color_count
; ++i
) {
1563 const uint32_t att
= subpass
->color_attachments
[i
].attachment
;
1564 if (att
== VK_ATTACHMENT_UNUSED
)
1567 assert(att
< cmd_buffer
->state
.pass
->attachment_count
);
1568 ccs_resolve_attachment(cmd_buffer
, &batch
, att
);
1571 if (subpass
->has_resolve
) {
1572 /* We are about to do some MSAA resolves. We need to flush so that the
1573 * result of writes to the MSAA color attachments show up in the sampler
1574 * when we blit to the single-sampled resolve target.
1576 cmd_buffer
->state
.pending_pipe_bits
|=
1577 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT
|
1578 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
;
1580 for (uint32_t i
= 0; i
< subpass
->color_count
; ++i
) {
1581 uint32_t src_att
= subpass
->color_attachments
[i
].attachment
;
1582 uint32_t dst_att
= subpass
->resolve_attachments
[i
].attachment
;
1584 if (dst_att
== VK_ATTACHMENT_UNUSED
)
1587 assert(src_att
< cmd_buffer
->state
.pass
->attachment_count
);
1588 assert(dst_att
< cmd_buffer
->state
.pass
->attachment_count
);
1590 if (cmd_buffer
->state
.attachments
[dst_att
].pending_clear_aspects
) {
1591 /* From the Vulkan 1.0 spec:
1593 * If the first use of an attachment in a render pass is as a
1594 * resolve attachment, then the loadOp is effectively ignored
1595 * as the resolve is guaranteed to overwrite all pixels in the
1598 cmd_buffer
->state
.attachments
[dst_att
].pending_clear_aspects
= 0;
1601 struct anv_image_view
*src_iview
= fb
->attachments
[src_att
];
1602 struct anv_image_view
*dst_iview
= fb
->attachments
[dst_att
];
1604 const VkRect2D render_area
= cmd_buffer
->state
.render_area
;
1606 assert(src_iview
->aspect_mask
== dst_iview
->aspect_mask
);
1607 resolve_image(&batch
, src_iview
->image
,
1608 src_iview
->isl
.base_level
,
1609 src_iview
->isl
.base_array_layer
,
1611 dst_iview
->isl
.base_level
,
1612 dst_iview
->isl
.base_array_layer
,
1613 src_iview
->aspect_mask
,
1614 render_area
.offset
.x
, render_area
.offset
.y
,
1615 render_area
.offset
.x
, render_area
.offset
.y
,
1616 render_area
.extent
.width
, render_area
.extent
.height
);
1618 ccs_resolve_attachment(cmd_buffer
, &batch
, dst_att
);
1622 blorp_batch_finish(&batch
);
1626 anv_gen8_hiz_op_resolve(struct anv_cmd_buffer
*cmd_buffer
,
1627 const struct anv_image
*image
,
1628 enum blorp_hiz_op op
)
1632 /* Don't resolve depth buffers without an auxiliary HiZ buffer and
1633 * don't perform such a resolve on gens that don't support it.
1635 if (cmd_buffer
->device
->info
.gen
< 8 ||
1636 image
->aux_usage
!= ISL_AUX_USAGE_HIZ
)
1639 assert(op
== BLORP_HIZ_OP_HIZ_RESOLVE
||
1640 op
== BLORP_HIZ_OP_DEPTH_RESOLVE
);
1642 struct blorp_batch batch
;
1643 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1645 struct blorp_surf surf
;
1646 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_DEPTH_BIT
,
1647 ISL_AUX_USAGE_NONE
, &surf
);
1649 /* Manually add the aux HiZ surf */
1650 surf
.aux_surf
= &image
->aux_surface
.isl
,
1651 surf
.aux_addr
= (struct blorp_address
) {
1652 .buffer
= image
->bo
,
1653 .offset
= image
->offset
+ image
->aux_surface
.offset
,
1655 surf
.aux_usage
= ISL_AUX_USAGE_HIZ
;
1657 surf
.clear_color
.u32
[0] = (uint32_t) ANV_HZ_FC_VAL
;
1659 blorp_gen6_hiz_op(&batch
, &surf
, 0, 0, op
);
1660 blorp_batch_finish(&batch
);