2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "anv_private.h"
27 lookup_blorp_shader(struct blorp_context
*blorp
,
28 const void *key
, uint32_t key_size
,
29 uint32_t *kernel_out
, void *prog_data_out
)
31 struct anv_device
*device
= blorp
->driver_ctx
;
33 /* The blorp cache must be a real cache */
34 assert(device
->blorp_shader_cache
.cache
);
36 struct anv_shader_bin
*bin
=
37 anv_pipeline_cache_search(&device
->blorp_shader_cache
, key
, key_size
);
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
44 anv_shader_bin_unref(device
, bin
);
46 *kernel_out
= bin
->kernel
.offset
;
47 *(const struct brw_stage_prog_data
**)prog_data_out
= bin
->prog_data
;
53 upload_blorp_shader(struct blorp_context
*blorp
,
54 const void *key
, uint32_t key_size
,
55 const void *kernel
, uint32_t kernel_size
,
56 const struct brw_stage_prog_data
*prog_data
,
57 uint32_t prog_data_size
,
58 uint32_t *kernel_out
, void *prog_data_out
)
60 struct anv_device
*device
= blorp
->driver_ctx
;
62 /* The blorp cache must be a real cache */
63 assert(device
->blorp_shader_cache
.cache
);
65 struct anv_pipeline_bind_map bind_map
= {
70 struct anv_shader_bin
*bin
=
71 anv_pipeline_cache_upload_kernel(&device
->blorp_shader_cache
,
72 key
, key_size
, kernel
, kernel_size
,
73 prog_data
, prog_data_size
, &bind_map
);
78 /* The cache already has a reference and it's not going anywhere so there
79 * is no need to hold a second reference.
81 anv_shader_bin_unref(device
, bin
);
83 *kernel_out
= bin
->kernel
.offset
;
84 *(const struct brw_stage_prog_data
**)prog_data_out
= bin
->prog_data
;
90 anv_device_init_blorp(struct anv_device
*device
)
92 anv_pipeline_cache_init(&device
->blorp_shader_cache
, device
, true);
93 blorp_init(&device
->blorp
, device
, &device
->isl_dev
);
94 device
->blorp
.compiler
= device
->instance
->physicalDevice
.compiler
;
95 device
->blorp
.lookup_shader
= lookup_blorp_shader
;
96 device
->blorp
.upload_shader
= upload_blorp_shader
;
97 switch (device
->info
.gen
) {
99 if (device
->info
.is_haswell
) {
100 device
->blorp
.exec
= gen75_blorp_exec
;
102 device
->blorp
.exec
= gen7_blorp_exec
;
106 device
->blorp
.exec
= gen8_blorp_exec
;
109 device
->blorp
.exec
= gen9_blorp_exec
;
112 device
->blorp
.exec
= gen10_blorp_exec
;
115 unreachable("Unknown hardware generation");
120 anv_device_finish_blorp(struct anv_device
*device
)
122 blorp_finish(&device
->blorp
);
123 anv_pipeline_cache_finish(&device
->blorp_shader_cache
);
127 get_blorp_surf_for_anv_buffer(struct anv_device
*device
,
128 struct anv_buffer
*buffer
, uint64_t offset
,
129 uint32_t width
, uint32_t height
,
130 uint32_t row_pitch
, enum isl_format format
,
131 struct blorp_surf
*blorp_surf
,
132 struct isl_surf
*isl_surf
)
134 const struct isl_format_layout
*fmtl
=
135 isl_format_get_layout(format
);
138 /* ASTC is the only format which doesn't support linear layouts.
139 * Create an equivalently sized surface with ISL to get around this.
141 if (fmtl
->txc
== ISL_TXC_ASTC
) {
142 /* Use an equivalently sized format */
143 format
= ISL_FORMAT_R32G32B32A32_UINT
;
144 assert(fmtl
->bpb
== isl_format_get_layout(format
)->bpb
);
146 /* Shrink the dimensions for the new format */
147 width
= DIV_ROUND_UP(width
, fmtl
->bw
);
148 height
= DIV_ROUND_UP(height
, fmtl
->bh
);
151 *blorp_surf
= (struct blorp_surf
) {
154 .buffer
= buffer
->bo
,
155 .offset
= buffer
->offset
+ offset
,
156 .mocs
= device
->default_mocs
,
160 ok
= isl_surf_init(&device
->isl_dev
, isl_surf
,
161 .dim
= ISL_SURF_DIM_2D
,
169 .row_pitch
= row_pitch
,
170 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
|
171 ISL_SURF_USAGE_RENDER_TARGET_BIT
,
172 .tiling_flags
= ISL_TILING_LINEAR_BIT
);
176 #define ANV_AUX_USAGE_DEFAULT ((enum isl_aux_usage)0xff)
178 static struct blorp_address
179 anv_to_blorp_address(struct anv_address addr
)
181 return (struct blorp_address
) {
183 .offset
= addr
.offset
,
188 get_blorp_surf_for_anv_image(const struct anv_device
*device
,
189 const struct anv_image
*image
,
190 VkImageAspectFlags aspect
,
191 enum isl_aux_usage aux_usage
,
192 struct blorp_surf
*blorp_surf
)
194 uint32_t plane
= anv_image_aspect_to_plane(image
->aspects
, aspect
);
196 if (aux_usage
== ANV_AUX_USAGE_DEFAULT
)
197 aux_usage
= image
->planes
[plane
].aux_usage
;
199 if (aspect
== VK_IMAGE_ASPECT_STENCIL_BIT
||
200 aux_usage
== ISL_AUX_USAGE_HIZ
)
201 aux_usage
= ISL_AUX_USAGE_NONE
;
203 const struct anv_surface
*surface
= &image
->planes
[plane
].surface
;
204 *blorp_surf
= (struct blorp_surf
) {
205 .surf
= &surface
->isl
,
207 .buffer
= image
->planes
[plane
].bo
,
208 .offset
= image
->planes
[plane
].bo_offset
+ surface
->offset
,
209 .mocs
= device
->default_mocs
,
213 if (aux_usage
!= ISL_AUX_USAGE_NONE
) {
214 const struct anv_surface
*aux_surface
= &image
->planes
[plane
].aux_surface
;
215 blorp_surf
->aux_surf
= &aux_surface
->isl
,
216 blorp_surf
->aux_addr
= (struct blorp_address
) {
217 .buffer
= image
->planes
[plane
].bo
,
218 .offset
= image
->planes
[plane
].bo_offset
+ aux_surface
->offset
,
219 .mocs
= device
->default_mocs
,
221 blorp_surf
->aux_usage
= aux_usage
;
225 void anv_CmdCopyImage(
226 VkCommandBuffer commandBuffer
,
228 VkImageLayout srcImageLayout
,
230 VkImageLayout dstImageLayout
,
231 uint32_t regionCount
,
232 const VkImageCopy
* pRegions
)
234 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
235 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
236 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
238 struct blorp_batch batch
;
239 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
241 for (unsigned r
= 0; r
< regionCount
; r
++) {
242 VkOffset3D srcOffset
=
243 anv_sanitize_image_offset(src_image
->type
, pRegions
[r
].srcOffset
);
244 VkOffset3D dstOffset
=
245 anv_sanitize_image_offset(dst_image
->type
, pRegions
[r
].dstOffset
);
247 anv_sanitize_image_extent(src_image
->type
, pRegions
[r
].extent
);
249 unsigned dst_base_layer
, layer_count
;
250 if (dst_image
->type
== VK_IMAGE_TYPE_3D
) {
251 dst_base_layer
= pRegions
[r
].dstOffset
.z
;
252 layer_count
= pRegions
[r
].extent
.depth
;
254 dst_base_layer
= pRegions
[r
].dstSubresource
.baseArrayLayer
;
256 anv_get_layerCount(dst_image
, &pRegions
[r
].dstSubresource
);
259 unsigned src_base_layer
;
260 if (src_image
->type
== VK_IMAGE_TYPE_3D
) {
261 src_base_layer
= pRegions
[r
].srcOffset
.z
;
263 src_base_layer
= pRegions
[r
].srcSubresource
.baseArrayLayer
;
264 assert(layer_count
==
265 anv_get_layerCount(src_image
, &pRegions
[r
].srcSubresource
));
268 VkImageAspectFlags src_mask
= pRegions
[r
].srcSubresource
.aspectMask
,
269 dst_mask
= pRegions
[r
].dstSubresource
.aspectMask
;
271 assert(anv_image_aspects_compatible(src_mask
, dst_mask
));
273 if (_mesa_bitcount(src_mask
) > 1) {
275 anv_foreach_image_aspect_bit(aspect_bit
, src_image
, src_mask
) {
276 struct blorp_surf src_surf
, dst_surf
;
277 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
278 src_image
, 1UL << aspect_bit
,
279 ANV_AUX_USAGE_DEFAULT
, &src_surf
);
280 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
281 dst_image
, 1UL << aspect_bit
,
282 ANV_AUX_USAGE_DEFAULT
, &dst_surf
);
284 for (unsigned i
= 0; i
< layer_count
; i
++) {
285 blorp_copy(&batch
, &src_surf
, pRegions
[r
].srcSubresource
.mipLevel
,
287 &dst_surf
, pRegions
[r
].dstSubresource
.mipLevel
,
289 srcOffset
.x
, srcOffset
.y
,
290 dstOffset
.x
, dstOffset
.y
,
291 extent
.width
, extent
.height
);
295 struct blorp_surf src_surf
, dst_surf
;
296 get_blorp_surf_for_anv_image(cmd_buffer
->device
, src_image
, src_mask
,
297 ANV_AUX_USAGE_DEFAULT
, &src_surf
);
298 get_blorp_surf_for_anv_image(cmd_buffer
->device
, dst_image
, dst_mask
,
299 ANV_AUX_USAGE_DEFAULT
, &dst_surf
);
301 for (unsigned i
= 0; i
< layer_count
; i
++) {
302 blorp_copy(&batch
, &src_surf
, pRegions
[r
].srcSubresource
.mipLevel
,
304 &dst_surf
, pRegions
[r
].dstSubresource
.mipLevel
,
306 srcOffset
.x
, srcOffset
.y
,
307 dstOffset
.x
, dstOffset
.y
,
308 extent
.width
, extent
.height
);
313 blorp_batch_finish(&batch
);
317 copy_buffer_to_image(struct anv_cmd_buffer
*cmd_buffer
,
318 struct anv_buffer
*anv_buffer
,
319 struct anv_image
*anv_image
,
320 uint32_t regionCount
,
321 const VkBufferImageCopy
* pRegions
,
322 bool buffer_to_image
)
324 struct blorp_batch batch
;
325 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
328 struct blorp_surf surf
;
331 } image
, buffer
, *src
, *dst
;
334 buffer
.offset
= (VkOffset3D
) { 0, 0, 0 };
336 if (buffer_to_image
) {
344 for (unsigned r
= 0; r
< regionCount
; r
++) {
345 const VkImageAspectFlags aspect
= pRegions
[r
].imageSubresource
.aspectMask
;
347 get_blorp_surf_for_anv_image(cmd_buffer
->device
, anv_image
, aspect
,
348 ANV_AUX_USAGE_DEFAULT
, &image
.surf
);
350 anv_sanitize_image_offset(anv_image
->type
, pRegions
[r
].imageOffset
);
351 image
.level
= pRegions
[r
].imageSubresource
.mipLevel
;
354 anv_sanitize_image_extent(anv_image
->type
, pRegions
[r
].imageExtent
);
355 if (anv_image
->type
!= VK_IMAGE_TYPE_3D
) {
356 image
.offset
.z
= pRegions
[r
].imageSubresource
.baseArrayLayer
;
358 anv_get_layerCount(anv_image
, &pRegions
[r
].imageSubresource
);
361 const enum isl_format buffer_format
=
362 anv_get_isl_format(&cmd_buffer
->device
->info
, anv_image
->vk_format
,
363 aspect
, VK_IMAGE_TILING_LINEAR
);
365 const VkExtent3D bufferImageExtent
= {
366 .width
= pRegions
[r
].bufferRowLength
?
367 pRegions
[r
].bufferRowLength
: extent
.width
,
368 .height
= pRegions
[r
].bufferImageHeight
?
369 pRegions
[r
].bufferImageHeight
: extent
.height
,
372 const struct isl_format_layout
*buffer_fmtl
=
373 isl_format_get_layout(buffer_format
);
375 const uint32_t buffer_row_pitch
=
376 DIV_ROUND_UP(bufferImageExtent
.width
, buffer_fmtl
->bw
) *
377 (buffer_fmtl
->bpb
/ 8);
379 const uint32_t buffer_layer_stride
=
380 DIV_ROUND_UP(bufferImageExtent
.height
, buffer_fmtl
->bh
) *
383 struct isl_surf buffer_isl_surf
;
384 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
385 anv_buffer
, pRegions
[r
].bufferOffset
,
386 extent
.width
, extent
.height
,
387 buffer_row_pitch
, buffer_format
,
388 &buffer
.surf
, &buffer_isl_surf
);
390 for (unsigned z
= 0; z
< extent
.depth
; z
++) {
391 blorp_copy(&batch
, &src
->surf
, src
->level
, src
->offset
.z
,
392 &dst
->surf
, dst
->level
, dst
->offset
.z
,
393 src
->offset
.x
, src
->offset
.y
, dst
->offset
.x
, dst
->offset
.y
,
394 extent
.width
, extent
.height
);
397 buffer
.surf
.addr
.offset
+= buffer_layer_stride
;
401 blorp_batch_finish(&batch
);
404 void anv_CmdCopyBufferToImage(
405 VkCommandBuffer commandBuffer
,
408 VkImageLayout dstImageLayout
,
409 uint32_t regionCount
,
410 const VkBufferImageCopy
* pRegions
)
412 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
413 ANV_FROM_HANDLE(anv_buffer
, src_buffer
, srcBuffer
);
414 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
416 copy_buffer_to_image(cmd_buffer
, src_buffer
, dst_image
,
417 regionCount
, pRegions
, true);
420 void anv_CmdCopyImageToBuffer(
421 VkCommandBuffer commandBuffer
,
423 VkImageLayout srcImageLayout
,
425 uint32_t regionCount
,
426 const VkBufferImageCopy
* pRegions
)
428 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
429 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
430 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
432 copy_buffer_to_image(cmd_buffer
, dst_buffer
, src_image
,
433 regionCount
, pRegions
, false);
437 flip_coords(unsigned *src0
, unsigned *src1
, unsigned *dst0
, unsigned *dst1
)
441 unsigned tmp
= *src0
;
448 unsigned tmp
= *dst0
;
457 void anv_CmdBlitImage(
458 VkCommandBuffer commandBuffer
,
460 VkImageLayout srcImageLayout
,
462 VkImageLayout dstImageLayout
,
463 uint32_t regionCount
,
464 const VkImageBlit
* pRegions
,
468 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
469 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
470 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
472 struct blorp_surf src
, dst
;
476 case VK_FILTER_NEAREST
:
477 gl_filter
= 0x2600; /* GL_NEAREST */
479 case VK_FILTER_LINEAR
:
480 gl_filter
= 0x2601; /* GL_LINEAR */
483 unreachable("Invalid filter");
486 struct blorp_batch batch
;
487 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
489 for (unsigned r
= 0; r
< regionCount
; r
++) {
490 const VkImageSubresourceLayers
*src_res
= &pRegions
[r
].srcSubresource
;
491 const VkImageSubresourceLayers
*dst_res
= &pRegions
[r
].dstSubresource
;
493 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
494 src_image
, src_res
->aspectMask
,
495 ANV_AUX_USAGE_DEFAULT
, &src
);
496 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
497 dst_image
, dst_res
->aspectMask
,
498 ANV_AUX_USAGE_DEFAULT
, &dst
);
500 struct anv_format_plane src_format
=
501 anv_get_format_plane(&cmd_buffer
->device
->info
, src_image
->vk_format
,
502 src_res
->aspectMask
, src_image
->tiling
);
503 struct anv_format_plane dst_format
=
504 anv_get_format_plane(&cmd_buffer
->device
->info
, dst_image
->vk_format
,
505 dst_res
->aspectMask
, dst_image
->tiling
);
507 unsigned dst_start
, dst_end
;
508 if (dst_image
->type
== VK_IMAGE_TYPE_3D
) {
509 assert(dst_res
->baseArrayLayer
== 0);
510 dst_start
= pRegions
[r
].dstOffsets
[0].z
;
511 dst_end
= pRegions
[r
].dstOffsets
[1].z
;
513 dst_start
= dst_res
->baseArrayLayer
;
514 dst_end
= dst_start
+ anv_get_layerCount(dst_image
, dst_res
);
517 unsigned src_start
, src_end
;
518 if (src_image
->type
== VK_IMAGE_TYPE_3D
) {
519 assert(src_res
->baseArrayLayer
== 0);
520 src_start
= pRegions
[r
].srcOffsets
[0].z
;
521 src_end
= pRegions
[r
].srcOffsets
[1].z
;
523 src_start
= src_res
->baseArrayLayer
;
524 src_end
= src_start
+ anv_get_layerCount(src_image
, src_res
);
527 bool flip_z
= flip_coords(&src_start
, &src_end
, &dst_start
, &dst_end
);
528 float src_z_step
= (float)(src_end
+ 1 - src_start
) /
529 (float)(dst_end
+ 1 - dst_start
);
536 unsigned src_x0
= pRegions
[r
].srcOffsets
[0].x
;
537 unsigned src_x1
= pRegions
[r
].srcOffsets
[1].x
;
538 unsigned dst_x0
= pRegions
[r
].dstOffsets
[0].x
;
539 unsigned dst_x1
= pRegions
[r
].dstOffsets
[1].x
;
540 bool flip_x
= flip_coords(&src_x0
, &src_x1
, &dst_x0
, &dst_x1
);
542 unsigned src_y0
= pRegions
[r
].srcOffsets
[0].y
;
543 unsigned src_y1
= pRegions
[r
].srcOffsets
[1].y
;
544 unsigned dst_y0
= pRegions
[r
].dstOffsets
[0].y
;
545 unsigned dst_y1
= pRegions
[r
].dstOffsets
[1].y
;
546 bool flip_y
= flip_coords(&src_y0
, &src_y1
, &dst_y0
, &dst_y1
);
548 const unsigned num_layers
= dst_end
- dst_start
;
549 for (unsigned i
= 0; i
< num_layers
; i
++) {
550 unsigned dst_z
= dst_start
+ i
;
551 unsigned src_z
= src_start
+ i
* src_z_step
;
553 blorp_blit(&batch
, &src
, src_res
->mipLevel
, src_z
,
554 src_format
.isl_format
, src_format
.swizzle
,
555 &dst
, dst_res
->mipLevel
, dst_z
,
556 dst_format
.isl_format
,
557 anv_swizzle_for_render(dst_format
.swizzle
),
558 src_x0
, src_y0
, src_x1
, src_y1
,
559 dst_x0
, dst_y0
, dst_x1
, dst_y1
,
560 gl_filter
, flip_x
, flip_y
);
565 blorp_batch_finish(&batch
);
568 static enum isl_format
569 isl_format_for_size(unsigned size_B
)
572 case 4: return ISL_FORMAT_R32_UINT
;
573 case 8: return ISL_FORMAT_R32G32_UINT
;
574 case 16: return ISL_FORMAT_R32G32B32A32_UINT
;
576 unreachable("Not a power-of-two format size");
581 * Returns the greatest common divisor of a and b that is a power of two.
584 gcd_pow2_u64(uint64_t a
, uint64_t b
)
586 assert(a
> 0 || b
> 0);
588 unsigned a_log2
= ffsll(a
) - 1;
589 unsigned b_log2
= ffsll(b
) - 1;
591 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
592 * case, the MIN2() will take the other one. If both are 0 then we will
593 * hit the assert above.
595 return 1 << MIN2(a_log2
, b_log2
);
598 /* This is maximum possible width/height our HW can handle */
599 #define MAX_SURFACE_DIM (1ull << 14)
601 void anv_CmdCopyBuffer(
602 VkCommandBuffer commandBuffer
,
605 uint32_t regionCount
,
606 const VkBufferCopy
* pRegions
)
608 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
609 ANV_FROM_HANDLE(anv_buffer
, src_buffer
, srcBuffer
);
610 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
612 struct blorp_batch batch
;
613 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
615 for (unsigned r
= 0; r
< regionCount
; r
++) {
616 struct blorp_address src
= {
617 .buffer
= src_buffer
->bo
,
618 .offset
= src_buffer
->offset
+ pRegions
[r
].srcOffset
,
619 .mocs
= cmd_buffer
->device
->default_mocs
,
621 struct blorp_address dst
= {
622 .buffer
= dst_buffer
->bo
,
623 .offset
= dst_buffer
->offset
+ pRegions
[r
].dstOffset
,
624 .mocs
= cmd_buffer
->device
->default_mocs
,
627 blorp_buffer_copy(&batch
, src
, dst
, pRegions
[r
].size
);
630 blorp_batch_finish(&batch
);
633 void anv_CmdUpdateBuffer(
634 VkCommandBuffer commandBuffer
,
636 VkDeviceSize dstOffset
,
637 VkDeviceSize dataSize
,
640 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
641 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
643 struct blorp_batch batch
;
644 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
646 /* We can't quite grab a full block because the state stream needs a
647 * little data at the top to build its linked list.
649 const uint32_t max_update_size
=
650 cmd_buffer
->device
->dynamic_state_pool
.block_size
- 64;
652 assert(max_update_size
< MAX_SURFACE_DIM
* 4);
654 /* We're about to read data that was written from the CPU. Flush the
655 * texture cache so we don't get anything stale.
657 cmd_buffer
->state
.pending_pipe_bits
|= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT
;
660 const uint32_t copy_size
= MIN2(dataSize
, max_update_size
);
662 struct anv_state tmp_data
=
663 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
, copy_size
, 64);
665 memcpy(tmp_data
.map
, pData
, copy_size
);
667 anv_state_flush(cmd_buffer
->device
, tmp_data
);
669 struct blorp_address src
= {
670 .buffer
= &cmd_buffer
->device
->dynamic_state_pool
.block_pool
.bo
,
671 .offset
= tmp_data
.offset
,
672 .mocs
= cmd_buffer
->device
->default_mocs
,
674 struct blorp_address dst
= {
675 .buffer
= dst_buffer
->bo
,
676 .offset
= dst_buffer
->offset
+ dstOffset
,
677 .mocs
= cmd_buffer
->device
->default_mocs
,
680 blorp_buffer_copy(&batch
, src
, dst
, copy_size
);
682 dataSize
-= copy_size
;
683 dstOffset
+= copy_size
;
684 pData
= (void *)pData
+ copy_size
;
687 blorp_batch_finish(&batch
);
690 void anv_CmdFillBuffer(
691 VkCommandBuffer commandBuffer
,
693 VkDeviceSize dstOffset
,
694 VkDeviceSize fillSize
,
697 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
698 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
699 struct blorp_surf surf
;
700 struct isl_surf isl_surf
;
702 struct blorp_batch batch
;
703 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
705 fillSize
= anv_buffer_get_range(dst_buffer
, dstOffset
, fillSize
);
707 /* From the Vulkan spec:
709 * "size is the number of bytes to fill, and must be either a multiple
710 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
711 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
712 * buffer is not a multiple of 4, then the nearest smaller multiple is
717 /* First, we compute the biggest format that can be used with the
718 * given offsets and size.
721 bs
= gcd_pow2_u64(bs
, dstOffset
);
722 bs
= gcd_pow2_u64(bs
, fillSize
);
723 enum isl_format isl_format
= isl_format_for_size(bs
);
725 union isl_color_value color
= {
726 .u32
= { data
, data
, data
, data
},
729 const uint64_t max_fill_size
= MAX_SURFACE_DIM
* MAX_SURFACE_DIM
* bs
;
730 while (fillSize
>= max_fill_size
) {
731 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
732 dst_buffer
, dstOffset
,
733 MAX_SURFACE_DIM
, MAX_SURFACE_DIM
,
734 MAX_SURFACE_DIM
* bs
, isl_format
,
737 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
738 0, 0, 1, 0, 0, MAX_SURFACE_DIM
, MAX_SURFACE_DIM
,
740 fillSize
-= max_fill_size
;
741 dstOffset
+= max_fill_size
;
744 uint64_t height
= fillSize
/ (MAX_SURFACE_DIM
* bs
);
745 assert(height
< MAX_SURFACE_DIM
);
747 const uint64_t rect_fill_size
= height
* MAX_SURFACE_DIM
* bs
;
748 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
749 dst_buffer
, dstOffset
,
750 MAX_SURFACE_DIM
, height
,
751 MAX_SURFACE_DIM
* bs
, isl_format
,
754 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
755 0, 0, 1, 0, 0, MAX_SURFACE_DIM
, height
,
757 fillSize
-= rect_fill_size
;
758 dstOffset
+= rect_fill_size
;
762 const uint32_t width
= fillSize
/ bs
;
763 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
764 dst_buffer
, dstOffset
,
766 width
* bs
, isl_format
,
769 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
770 0, 0, 1, 0, 0, width
, 1,
774 blorp_batch_finish(&batch
);
777 void anv_CmdClearColorImage(
778 VkCommandBuffer commandBuffer
,
780 VkImageLayout imageLayout
,
781 const VkClearColorValue
* pColor
,
783 const VkImageSubresourceRange
* pRanges
)
785 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
786 ANV_FROM_HANDLE(anv_image
, image
, _image
);
788 static const bool color_write_disable
[4] = { false, false, false, false };
790 struct blorp_batch batch
;
791 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
794 for (unsigned r
= 0; r
< rangeCount
; r
++) {
795 if (pRanges
[r
].aspectMask
== 0)
798 assert(pRanges
[r
].aspectMask
& VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV
);
800 struct blorp_surf surf
;
801 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
802 image
, pRanges
[r
].aspectMask
,
803 ANV_AUX_USAGE_DEFAULT
, &surf
);
805 struct anv_format_plane src_format
=
806 anv_get_format_plane(&cmd_buffer
->device
->info
, image
->vk_format
,
807 VK_IMAGE_ASPECT_COLOR_BIT
, image
->tiling
);
809 unsigned base_layer
= pRanges
[r
].baseArrayLayer
;
810 unsigned layer_count
= anv_get_layerCount(image
, &pRanges
[r
]);
812 for (unsigned i
= 0; i
< anv_get_levelCount(image
, &pRanges
[r
]); i
++) {
813 const unsigned level
= pRanges
[r
].baseMipLevel
+ i
;
814 const unsigned level_width
= anv_minify(image
->extent
.width
, level
);
815 const unsigned level_height
= anv_minify(image
->extent
.height
, level
);
817 if (image
->type
== VK_IMAGE_TYPE_3D
) {
819 layer_count
= anv_minify(image
->extent
.depth
, level
);
822 blorp_clear(&batch
, &surf
,
823 src_format
.isl_format
, src_format
.swizzle
,
824 level
, base_layer
, layer_count
,
825 0, 0, level_width
, level_height
,
826 vk_to_isl_color(*pColor
), color_write_disable
);
830 blorp_batch_finish(&batch
);
833 void anv_CmdClearDepthStencilImage(
834 VkCommandBuffer commandBuffer
,
836 VkImageLayout imageLayout
,
837 const VkClearDepthStencilValue
* pDepthStencil
,
839 const VkImageSubresourceRange
* pRanges
)
841 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
842 ANV_FROM_HANDLE(anv_image
, image
, image_h
);
844 struct blorp_batch batch
;
845 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
847 struct blorp_surf depth
, stencil
;
848 if (image
->aspects
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
849 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
850 image
, VK_IMAGE_ASPECT_DEPTH_BIT
,
851 ISL_AUX_USAGE_NONE
, &depth
);
853 memset(&depth
, 0, sizeof(depth
));
856 if (image
->aspects
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
857 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
858 image
, VK_IMAGE_ASPECT_STENCIL_BIT
,
859 ISL_AUX_USAGE_NONE
, &stencil
);
861 memset(&stencil
, 0, sizeof(stencil
));
864 for (unsigned r
= 0; r
< rangeCount
; r
++) {
865 if (pRanges
[r
].aspectMask
== 0)
868 bool clear_depth
= pRanges
[r
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
;
869 bool clear_stencil
= pRanges
[r
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
;
871 unsigned base_layer
= pRanges
[r
].baseArrayLayer
;
872 unsigned layer_count
= anv_get_layerCount(image
, &pRanges
[r
]);
874 for (unsigned i
= 0; i
< anv_get_levelCount(image
, &pRanges
[r
]); i
++) {
875 const unsigned level
= pRanges
[r
].baseMipLevel
+ i
;
876 const unsigned level_width
= anv_minify(image
->extent
.width
, level
);
877 const unsigned level_height
= anv_minify(image
->extent
.height
, level
);
879 if (image
->type
== VK_IMAGE_TYPE_3D
)
880 layer_count
= anv_minify(image
->extent
.depth
, level
);
882 blorp_clear_depth_stencil(&batch
, &depth
, &stencil
,
883 level
, base_layer
, layer_count
,
884 0, 0, level_width
, level_height
,
885 clear_depth
, pDepthStencil
->depth
,
886 clear_stencil
? 0xff : 0,
887 pDepthStencil
->stencil
);
891 blorp_batch_finish(&batch
);
895 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer
*cmd_buffer
,
896 uint32_t num_entries
,
897 uint32_t *state_offset
,
898 struct anv_state
*bt_state
)
900 *bt_state
= anv_cmd_buffer_alloc_binding_table(cmd_buffer
, num_entries
,
902 if (bt_state
->map
== NULL
) {
903 /* We ran out of space. Grab a new binding table block. */
904 VkResult result
= anv_cmd_buffer_new_binding_table_block(cmd_buffer
);
905 if (result
!= VK_SUCCESS
)
908 /* Re-emit state base addresses so we get the new surface state base
909 * address before we start emitting binding tables etc.
911 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
913 *bt_state
= anv_cmd_buffer_alloc_binding_table(cmd_buffer
, num_entries
,
915 assert(bt_state
->map
!= NULL
);
922 binding_table_for_surface_state(struct anv_cmd_buffer
*cmd_buffer
,
923 struct anv_state surface_state
,
926 uint32_t state_offset
;
927 struct anv_state bt_state
;
930 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer
, 1, &state_offset
,
932 if (result
!= VK_SUCCESS
)
935 uint32_t *bt_map
= bt_state
.map
;
936 bt_map
[0] = surface_state
.offset
+ state_offset
;
938 *bt_offset
= bt_state
.offset
;
943 clear_color_attachment(struct anv_cmd_buffer
*cmd_buffer
,
944 struct blorp_batch
*batch
,
945 const VkClearAttachment
*attachment
,
946 uint32_t rectCount
, const VkClearRect
*pRects
)
948 const struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
949 const uint32_t color_att
= attachment
->colorAttachment
;
950 const uint32_t att_idx
= subpass
->color_attachments
[color_att
].attachment
;
952 if (att_idx
== VK_ATTACHMENT_UNUSED
)
955 struct anv_render_pass_attachment
*pass_att
=
956 &cmd_buffer
->state
.pass
->attachments
[att_idx
];
957 struct anv_attachment_state
*att_state
=
958 &cmd_buffer
->state
.attachments
[att_idx
];
960 uint32_t binding_table
;
962 binding_table_for_surface_state(cmd_buffer
, att_state
->color
.state
,
964 if (result
!= VK_SUCCESS
)
967 union isl_color_value clear_color
=
968 vk_to_isl_color(attachment
->clearValue
.color
);
970 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
971 if (subpass
->view_mask
) {
973 for_each_bit(view_idx
, subpass
->view_mask
) {
974 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
975 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
976 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
977 blorp_clear_attachments(batch
, binding_table
,
978 ISL_FORMAT_UNSUPPORTED
, pass_att
->samples
,
981 offset
.x
+ extent
.width
,
982 offset
.y
+ extent
.height
,
983 true, clear_color
, false, 0.0f
, 0, 0);
989 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
990 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
991 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
992 blorp_clear_attachments(batch
, binding_table
,
993 ISL_FORMAT_UNSUPPORTED
, pass_att
->samples
,
994 pRects
[r
].baseArrayLayer
,
995 pRects
[r
].layerCount
,
997 offset
.x
+ extent
.width
, offset
.y
+ extent
.height
,
998 true, clear_color
, false, 0.0f
, 0, 0);
1003 clear_depth_stencil_attachment(struct anv_cmd_buffer
*cmd_buffer
,
1004 struct blorp_batch
*batch
,
1005 const VkClearAttachment
*attachment
,
1006 uint32_t rectCount
, const VkClearRect
*pRects
)
1008 static const union isl_color_value color_value
= { .u32
= { 0, } };
1009 const struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
1010 const uint32_t att_idx
= subpass
->depth_stencil_attachment
.attachment
;
1012 if (att_idx
== VK_ATTACHMENT_UNUSED
)
1015 struct anv_render_pass_attachment
*pass_att
=
1016 &cmd_buffer
->state
.pass
->attachments
[att_idx
];
1018 bool clear_depth
= attachment
->aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
;
1019 bool clear_stencil
= attachment
->aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
;
1021 enum isl_format depth_format
= ISL_FORMAT_UNSUPPORTED
;
1023 depth_format
= anv_get_isl_format(&cmd_buffer
->device
->info
,
1025 VK_IMAGE_ASPECT_DEPTH_BIT
,
1026 VK_IMAGE_TILING_OPTIMAL
);
1029 uint32_t binding_table
;
1031 binding_table_for_surface_state(cmd_buffer
,
1032 cmd_buffer
->state
.null_surface_state
,
1034 if (result
!= VK_SUCCESS
)
1037 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1038 if (subpass
->view_mask
) {
1040 for_each_bit(view_idx
, subpass
->view_mask
) {
1041 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
1042 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
1043 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
1044 VkClearDepthStencilValue value
= attachment
->clearValue
.depthStencil
;
1045 blorp_clear_attachments(batch
, binding_table
,
1046 depth_format
, pass_att
->samples
,
1049 offset
.x
+ extent
.width
,
1050 offset
.y
+ extent
.height
,
1052 clear_depth
, value
.depth
,
1053 clear_stencil
? 0xff : 0, value
.stencil
);
1059 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
1060 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
1061 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
1062 VkClearDepthStencilValue value
= attachment
->clearValue
.depthStencil
;
1063 blorp_clear_attachments(batch
, binding_table
,
1064 depth_format
, pass_att
->samples
,
1065 pRects
[r
].baseArrayLayer
,
1066 pRects
[r
].layerCount
,
1068 offset
.x
+ extent
.width
, offset
.y
+ extent
.height
,
1070 clear_depth
, value
.depth
,
1071 clear_stencil
? 0xff : 0, value
.stencil
);
1075 void anv_CmdClearAttachments(
1076 VkCommandBuffer commandBuffer
,
1077 uint32_t attachmentCount
,
1078 const VkClearAttachment
* pAttachments
,
1080 const VkClearRect
* pRects
)
1082 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
1084 /* Because this gets called within a render pass, we tell blorp not to
1085 * trash our depth and stencil buffers.
1087 struct blorp_batch batch
;
1088 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
,
1089 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL
);
1091 for (uint32_t a
= 0; a
< attachmentCount
; ++a
) {
1092 if (pAttachments
[a
].aspectMask
& VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV
) {
1093 assert(pAttachments
[a
].aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
);
1094 clear_color_attachment(cmd_buffer
, &batch
,
1098 clear_depth_stencil_attachment(cmd_buffer
, &batch
,
1104 blorp_batch_finish(&batch
);
1107 enum subpass_stage
{
1110 SUBPASS_STAGE_RESOLVE
,
1114 subpass_needs_clear(const struct anv_cmd_buffer
*cmd_buffer
)
1116 const struct anv_cmd_state
*cmd_state
= &cmd_buffer
->state
;
1117 uint32_t ds
= cmd_state
->subpass
->depth_stencil_attachment
.attachment
;
1119 for (uint32_t i
= 0; i
< cmd_state
->subpass
->color_count
; ++i
) {
1120 uint32_t a
= cmd_state
->subpass
->color_attachments
[i
].attachment
;
1121 if (a
== VK_ATTACHMENT_UNUSED
)
1124 assert(a
< cmd_state
->pass
->attachment_count
);
1125 if (cmd_state
->attachments
[a
].pending_clear_aspects
) {
1130 if (ds
!= VK_ATTACHMENT_UNUSED
) {
1131 assert(ds
< cmd_state
->pass
->attachment_count
);
1132 if (cmd_state
->attachments
[ds
].pending_clear_aspects
)
1140 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer
*cmd_buffer
)
1142 const struct anv_cmd_state
*cmd_state
= &cmd_buffer
->state
;
1143 const VkRect2D render_area
= cmd_buffer
->state
.render_area
;
1146 if (!subpass_needs_clear(cmd_buffer
))
1149 /* Because this gets called within a render pass, we tell blorp not to
1150 * trash our depth and stencil buffers.
1152 struct blorp_batch batch
;
1153 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
,
1154 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL
);
1156 VkClearRect clear_rect
= {
1157 .rect
= cmd_buffer
->state
.render_area
,
1158 .baseArrayLayer
= 0,
1159 .layerCount
= cmd_buffer
->state
.framebuffer
->layers
,
1162 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
1163 for (uint32_t i
= 0; i
< cmd_state
->subpass
->color_count
; ++i
) {
1164 const uint32_t a
= cmd_state
->subpass
->color_attachments
[i
].attachment
;
1165 if (a
== VK_ATTACHMENT_UNUSED
)
1168 assert(a
< cmd_state
->pass
->attachment_count
);
1169 struct anv_attachment_state
*att_state
= &cmd_state
->attachments
[a
];
1171 if (!att_state
->pending_clear_aspects
)
1174 assert(att_state
->pending_clear_aspects
== VK_IMAGE_ASPECT_COLOR_BIT
);
1176 struct anv_image_view
*iview
= fb
->attachments
[a
];
1177 const struct anv_image
*image
= iview
->image
;
1178 struct blorp_surf surf
;
1179 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
1180 image
, VK_IMAGE_ASPECT_COLOR_BIT
,
1181 att_state
->aux_usage
, &surf
);
1183 if (att_state
->fast_clear
) {
1184 surf
.clear_color
= vk_to_isl_color(att_state
->clear_value
.color
);
1186 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1188 * "After Render target fast clear, pipe-control with color cache
1189 * write-flush must be issued before sending any DRAW commands on
1190 * that render target."
1192 * This comment is a bit cryptic and doesn't really tell you what's
1193 * going or what's really needed. It appears that fast clear ops are
1194 * not properly synchronized with other drawing. This means that we
1195 * cannot have a fast clear operation in the pipe at the same time as
1196 * other regular drawing operations. We need to use a PIPE_CONTROL
1197 * to ensure that the contents of the previous draw hit the render
1198 * target before we resolve and then use a second PIPE_CONTROL after
1199 * the resolve to ensure that it is completed before any additional
1202 cmd_buffer
->state
.pending_pipe_bits
|=
1203 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
| ANV_PIPE_CS_STALL_BIT
;
1205 assert(image
->n_planes
== 1);
1206 blorp_fast_clear(&batch
, &surf
, iview
->planes
[0].isl
.format
,
1207 iview
->planes
[0].isl
.base_level
,
1208 iview
->planes
[0].isl
.base_array_layer
, fb
->layers
,
1209 render_area
.offset
.x
, render_area
.offset
.y
,
1210 render_area
.offset
.x
+ render_area
.extent
.width
,
1211 render_area
.offset
.y
+ render_area
.extent
.height
);
1213 cmd_buffer
->state
.pending_pipe_bits
|=
1214 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
| ANV_PIPE_CS_STALL_BIT
;
1216 assert(image
->n_planes
== 1);
1217 blorp_clear(&batch
, &surf
, iview
->planes
[0].isl
.format
,
1218 anv_swizzle_for_render(iview
->planes
[0].isl
.swizzle
),
1219 iview
->planes
[0].isl
.base_level
,
1220 iview
->planes
[0].isl
.base_array_layer
, fb
->layers
,
1221 render_area
.offset
.x
, render_area
.offset
.y
,
1222 render_area
.offset
.x
+ render_area
.extent
.width
,
1223 render_area
.offset
.y
+ render_area
.extent
.height
,
1224 vk_to_isl_color(att_state
->clear_value
.color
), NULL
);
1227 att_state
->pending_clear_aspects
= 0;
1230 const uint32_t ds
= cmd_state
->subpass
->depth_stencil_attachment
.attachment
;
1231 assert(ds
== VK_ATTACHMENT_UNUSED
|| ds
< cmd_state
->pass
->attachment_count
);
1233 if (ds
!= VK_ATTACHMENT_UNUSED
&&
1234 cmd_state
->attachments
[ds
].pending_clear_aspects
) {
1236 VkClearAttachment clear_att
= {
1237 .aspectMask
= cmd_state
->attachments
[ds
].pending_clear_aspects
,
1238 .clearValue
= cmd_state
->attachments
[ds
].clear_value
,
1242 const uint8_t gen
= cmd_buffer
->device
->info
.gen
;
1243 bool clear_with_hiz
= gen
>= 8 && cmd_state
->attachments
[ds
].aux_usage
==
1245 const struct anv_image_view
*iview
= fb
->attachments
[ds
];
1247 if (clear_with_hiz
) {
1248 const bool clear_depth
= clear_att
.aspectMask
&
1249 VK_IMAGE_ASPECT_DEPTH_BIT
;
1250 const bool clear_stencil
= clear_att
.aspectMask
&
1251 VK_IMAGE_ASPECT_STENCIL_BIT
;
1253 /* Check against restrictions for depth buffer clearing. A great GPU
1254 * performance benefit isn't expected when using the HZ sequence for
1255 * stencil-only clears. Therefore, we don't emit a HZ op sequence for
1256 * a stencil clear in addition to using the BLORP-fallback for depth.
1259 if (!blorp_can_hiz_clear_depth(gen
, iview
->planes
[0].isl
.format
,
1260 iview
->image
->samples
,
1261 render_area
.offset
.x
,
1262 render_area
.offset
.y
,
1263 render_area
.offset
.x
+
1264 render_area
.extent
.width
,
1265 render_area
.offset
.y
+
1266 render_area
.extent
.height
)) {
1267 clear_with_hiz
= false;
1268 } else if (clear_att
.clearValue
.depthStencil
.depth
!=
1270 /* Don't enable fast depth clears for any color not equal to
1273 clear_with_hiz
= false;
1274 } else if (gen
== 8 &&
1275 anv_can_sample_with_hiz(&cmd_buffer
->device
->info
,
1277 /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
1278 * fast-cleared portion of a HiZ buffer. Testing has revealed
1279 * that Gen8 only supports returning 0.0f. Gens prior to gen8 do
1280 * not support this feature at all.
1282 clear_with_hiz
= false;
1286 if (clear_with_hiz
) {
1287 blorp_gen8_hiz_clear_attachments(&batch
, iview
->image
->samples
,
1288 render_area
.offset
.x
,
1289 render_area
.offset
.y
,
1290 render_area
.offset
.x
+
1291 render_area
.extent
.width
,
1292 render_area
.offset
.y
+
1293 render_area
.extent
.height
,
1294 clear_depth
, clear_stencil
,
1295 clear_att
.clearValue
.
1296 depthStencil
.stencil
);
1298 /* From the SKL PRM, Depth Buffer Clear:
1300 * Depth Buffer Clear Workaround
1301 * Depth buffer clear pass using any of the methods (WM_STATE,
1302 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
1303 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
1304 * “set” before starting to render. DepthStall and DepthFlush are
1305 * not needed between consecutive depth clear passes nor is it
1306 * required if the depth-clear pass was done with “full_surf_clear”
1307 * bit set in the 3DSTATE_WM_HZ_OP.
1310 cmd_buffer
->state
.pending_pipe_bits
|=
1311 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT
| ANV_PIPE_DEPTH_STALL_BIT
;
1316 if (!clear_with_hiz
) {
1317 clear_depth_stencil_attachment(cmd_buffer
, &batch
,
1318 &clear_att
, 1, &clear_rect
);
1321 cmd_state
->attachments
[ds
].pending_clear_aspects
= 0;
1324 blorp_batch_finish(&batch
);
1328 resolve_surface(struct blorp_batch
*batch
,
1329 struct blorp_surf
*src_surf
,
1330 uint32_t src_level
, uint32_t src_layer
,
1331 struct blorp_surf
*dst_surf
,
1332 uint32_t dst_level
, uint32_t dst_layer
,
1333 uint32_t src_x
, uint32_t src_y
, uint32_t dst_x
, uint32_t dst_y
,
1334 uint32_t width
, uint32_t height
)
1337 src_surf
, src_level
, src_layer
,
1338 ISL_FORMAT_UNSUPPORTED
, ISL_SWIZZLE_IDENTITY
,
1339 dst_surf
, dst_level
, dst_layer
,
1340 ISL_FORMAT_UNSUPPORTED
, ISL_SWIZZLE_IDENTITY
,
1341 src_x
, src_y
, src_x
+ width
, src_y
+ height
,
1342 dst_x
, dst_y
, dst_x
+ width
, dst_y
+ height
,
1343 0x2600 /* GL_NEAREST */, false, false);
1347 resolve_image(struct anv_device
*device
,
1348 struct blorp_batch
*batch
,
1349 const struct anv_image
*src_image
,
1350 uint32_t src_level
, uint32_t src_layer
,
1351 const struct anv_image
*dst_image
,
1352 uint32_t dst_level
, uint32_t dst_layer
,
1353 VkImageAspectFlags aspect_mask
,
1354 uint32_t src_x
, uint32_t src_y
, uint32_t dst_x
, uint32_t dst_y
,
1355 uint32_t width
, uint32_t height
)
1357 assert(src_image
->type
== VK_IMAGE_TYPE_2D
);
1358 assert(src_image
->samples
> 1);
1359 assert(dst_image
->type
== VK_IMAGE_TYPE_2D
);
1360 assert(dst_image
->samples
== 1);
1361 assert(src_image
->n_planes
== dst_image
->n_planes
);
1363 uint32_t aspect_bit
;
1365 anv_foreach_image_aspect_bit(aspect_bit
, src_image
, aspect_mask
) {
1366 struct blorp_surf src_surf
, dst_surf
;
1367 get_blorp_surf_for_anv_image(device
, src_image
, 1UL << aspect_bit
,
1368 ANV_AUX_USAGE_DEFAULT
, &src_surf
);
1369 get_blorp_surf_for_anv_image(device
, dst_image
, 1UL << aspect_bit
,
1370 ANV_AUX_USAGE_DEFAULT
, &dst_surf
);
1372 assert(!src_image
->format
->can_ycbcr
);
1373 assert(!dst_image
->format
->can_ycbcr
);
1375 resolve_surface(batch
,
1376 &src_surf
, src_level
, src_layer
,
1377 &dst_surf
, dst_level
, dst_layer
,
1378 src_x
, src_y
, dst_x
, dst_y
, width
, height
);
1382 void anv_CmdResolveImage(
1383 VkCommandBuffer commandBuffer
,
1385 VkImageLayout srcImageLayout
,
1387 VkImageLayout dstImageLayout
,
1388 uint32_t regionCount
,
1389 const VkImageResolve
* pRegions
)
1391 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
1392 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
1393 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
1395 struct blorp_batch batch
;
1396 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1398 for (uint32_t r
= 0; r
< regionCount
; r
++) {
1399 assert(pRegions
[r
].srcSubresource
.aspectMask
==
1400 pRegions
[r
].dstSubresource
.aspectMask
);
1401 assert(anv_get_layerCount(src_image
, &pRegions
[r
].srcSubresource
) ==
1402 anv_get_layerCount(dst_image
, &pRegions
[r
].dstSubresource
));
1404 const uint32_t layer_count
=
1405 anv_get_layerCount(dst_image
, &pRegions
[r
].dstSubresource
);
1407 VkImageAspectFlags src_mask
= pRegions
[r
].srcSubresource
.aspectMask
,
1408 dst_mask
= pRegions
[r
].dstSubresource
.aspectMask
;
1410 assert(anv_image_aspects_compatible(src_mask
, dst_mask
));
1412 for (uint32_t layer
= 0; layer
< layer_count
; layer
++) {
1413 resolve_image(cmd_buffer
->device
, &batch
,
1415 pRegions
[r
].srcSubresource
.mipLevel
,
1416 pRegions
[r
].srcSubresource
.baseArrayLayer
+ layer
,
1418 pRegions
[r
].dstSubresource
.mipLevel
,
1419 pRegions
[r
].dstSubresource
.baseArrayLayer
+ layer
,
1420 pRegions
[r
].dstSubresource
.aspectMask
,
1421 pRegions
[r
].srcOffset
.x
, pRegions
[r
].srcOffset
.y
,
1422 pRegions
[r
].dstOffset
.x
, pRegions
[r
].dstOffset
.y
,
1423 pRegions
[r
].extent
.width
, pRegions
[r
].extent
.height
);
1427 blorp_batch_finish(&batch
);
1430 static enum isl_aux_usage
1431 fast_clear_aux_usage(const struct anv_image
*image
,
1432 VkImageAspectFlagBits aspect
)
1434 uint32_t plane
= anv_image_aspect_to_plane(image
->aspects
, aspect
);
1435 if (image
->planes
[plane
].aux_usage
== ISL_AUX_USAGE_NONE
)
1436 return ISL_AUX_USAGE_CCS_D
;
1438 return image
->planes
[plane
].aux_usage
;
1442 anv_image_fast_clear(struct anv_cmd_buffer
*cmd_buffer
,
1443 const struct anv_image
*image
,
1444 VkImageAspectFlagBits aspect
,
1445 const uint32_t base_level
, const uint32_t level_count
,
1446 const uint32_t base_layer
, uint32_t layer_count
)
1448 assert(image
->type
== VK_IMAGE_TYPE_3D
|| image
->extent
.depth
== 1);
1450 if (image
->type
== VK_IMAGE_TYPE_3D
) {
1451 assert(base_layer
== 0);
1452 assert(layer_count
== anv_minify(image
->extent
.depth
, base_level
));
1455 struct blorp_batch batch
;
1456 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1458 struct blorp_surf surf
;
1459 get_blorp_surf_for_anv_image(cmd_buffer
->device
, image
, aspect
,
1460 fast_clear_aux_usage(image
, aspect
),
1463 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1465 * "After Render target fast clear, pipe-control with color cache
1466 * write-flush must be issued before sending any DRAW commands on
1467 * that render target."
1469 * This comment is a bit cryptic and doesn't really tell you what's going
1470 * or what's really needed. It appears that fast clear ops are not
1471 * properly synchronized with other drawing. This means that we cannot
1472 * have a fast clear operation in the pipe at the same time as other
1473 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1474 * that the contents of the previous draw hit the render target before we
1475 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1476 * that it is completed before any additional drawing occurs.
1478 cmd_buffer
->state
.pending_pipe_bits
|=
1479 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
| ANV_PIPE_CS_STALL_BIT
;
1481 uint32_t plane
= anv_image_aspect_to_plane(image
->aspects
, aspect
);
1482 uint32_t width_div
= image
->format
->planes
[plane
].denominator_scales
[0];
1483 uint32_t height_div
= image
->format
->planes
[plane
].denominator_scales
[1];
1485 for (uint32_t l
= 0; l
< level_count
; l
++) {
1486 const uint32_t level
= base_level
+ l
;
1488 const VkExtent3D extent
= {
1489 .width
= anv_minify(image
->extent
.width
, level
),
1490 .height
= anv_minify(image
->extent
.height
, level
),
1491 .depth
= anv_minify(image
->extent
.depth
, level
),
1494 if (image
->type
== VK_IMAGE_TYPE_3D
)
1495 layer_count
= extent
.depth
;
1497 assert(level
< anv_image_aux_levels(image
, aspect
));
1498 assert(base_layer
+ layer_count
<= anv_image_aux_layers(image
, aspect
, level
));
1499 blorp_fast_clear(&batch
, &surf
, surf
.surf
->format
,
1500 level
, base_layer
, layer_count
,
1502 extent
.width
/ width_div
,
1503 extent
.height
/ height_div
);
1506 cmd_buffer
->state
.pending_pipe_bits
|=
1507 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
| ANV_PIPE_CS_STALL_BIT
;
1511 anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer
*cmd_buffer
)
1513 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
1514 struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
1516 if (subpass
->has_resolve
) {
1517 struct blorp_batch batch
;
1518 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1520 /* We are about to do some MSAA resolves. We need to flush so that the
1521 * result of writes to the MSAA color attachments show up in the sampler
1522 * when we blit to the single-sampled resolve target.
1524 cmd_buffer
->state
.pending_pipe_bits
|=
1525 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT
|
1526 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
;
1528 for (uint32_t i
= 0; i
< subpass
->color_count
; ++i
) {
1529 uint32_t src_att
= subpass
->color_attachments
[i
].attachment
;
1530 uint32_t dst_att
= subpass
->resolve_attachments
[i
].attachment
;
1532 if (dst_att
== VK_ATTACHMENT_UNUSED
)
1535 assert(src_att
< cmd_buffer
->state
.pass
->attachment_count
);
1536 assert(dst_att
< cmd_buffer
->state
.pass
->attachment_count
);
1538 if (cmd_buffer
->state
.attachments
[dst_att
].pending_clear_aspects
) {
1539 /* From the Vulkan 1.0 spec:
1541 * If the first use of an attachment in a render pass is as a
1542 * resolve attachment, then the loadOp is effectively ignored
1543 * as the resolve is guaranteed to overwrite all pixels in the
1546 cmd_buffer
->state
.attachments
[dst_att
].pending_clear_aspects
= 0;
1549 struct anv_image_view
*src_iview
= fb
->attachments
[src_att
];
1550 struct anv_image_view
*dst_iview
= fb
->attachments
[dst_att
];
1552 enum isl_aux_usage src_aux_usage
=
1553 cmd_buffer
->state
.attachments
[src_att
].aux_usage
;
1554 enum isl_aux_usage dst_aux_usage
=
1555 cmd_buffer
->state
.attachments
[dst_att
].aux_usage
;
1557 const VkRect2D render_area
= cmd_buffer
->state
.render_area
;
1559 assert(src_iview
->aspect_mask
== VK_IMAGE_ASPECT_COLOR_BIT
&&
1560 dst_iview
->aspect_mask
== VK_IMAGE_ASPECT_COLOR_BIT
);
1562 struct blorp_surf src_surf
, dst_surf
;
1563 get_blorp_surf_for_anv_image(cmd_buffer
->device
, src_iview
->image
,
1564 VK_IMAGE_ASPECT_COLOR_BIT
,
1565 src_aux_usage
, &src_surf
);
1566 get_blorp_surf_for_anv_image(cmd_buffer
->device
, dst_iview
->image
,
1567 VK_IMAGE_ASPECT_COLOR_BIT
,
1568 dst_aux_usage
, &dst_surf
);
1570 assert(!src_iview
->image
->format
->can_ycbcr
);
1571 assert(!dst_iview
->image
->format
->can_ycbcr
);
1573 resolve_surface(&batch
,
1575 src_iview
->planes
[0].isl
.base_level
,
1576 src_iview
->planes
[0].isl
.base_array_layer
,
1578 dst_iview
->planes
[0].isl
.base_level
,
1579 dst_iview
->planes
[0].isl
.base_array_layer
,
1580 render_area
.offset
.x
, render_area
.offset
.y
,
1581 render_area
.offset
.x
, render_area
.offset
.y
,
1582 render_area
.extent
.width
, render_area
.extent
.height
);
1585 blorp_batch_finish(&batch
);
1590 anv_image_copy_to_shadow(struct anv_cmd_buffer
*cmd_buffer
,
1591 const struct anv_image
*image
,
1592 uint32_t base_level
, uint32_t level_count
,
1593 uint32_t base_layer
, uint32_t layer_count
)
1595 struct blorp_batch batch
;
1596 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1598 assert(image
->aspects
== VK_IMAGE_ASPECT_COLOR_BIT
&& image
->n_planes
== 1);
1600 struct blorp_surf surf
;
1601 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
1602 image
, VK_IMAGE_ASPECT_COLOR_BIT
,
1603 ISL_AUX_USAGE_NONE
, &surf
);
1605 struct blorp_surf shadow_surf
= {
1606 .surf
= &image
->planes
[0].shadow_surface
.isl
,
1608 .buffer
= image
->planes
[0].bo
,
1609 .offset
= image
->planes
[0].bo_offset
+
1610 image
->planes
[0].shadow_surface
.offset
,
1611 .mocs
= cmd_buffer
->device
->default_mocs
,
1615 for (uint32_t l
= 0; l
< level_count
; l
++) {
1616 const uint32_t level
= base_level
+ l
;
1618 const VkExtent3D extent
= {
1619 .width
= anv_minify(image
->extent
.width
, level
),
1620 .height
= anv_minify(image
->extent
.height
, level
),
1621 .depth
= anv_minify(image
->extent
.depth
, level
),
1624 if (image
->type
== VK_IMAGE_TYPE_3D
)
1625 layer_count
= extent
.depth
;
1627 for (uint32_t a
= 0; a
< layer_count
; a
++) {
1628 const uint32_t layer
= base_layer
+ a
;
1630 blorp_copy(&batch
, &surf
, level
, layer
,
1631 &shadow_surf
, level
, layer
,
1632 0, 0, 0, 0, extent
.width
, extent
.height
);
1636 blorp_batch_finish(&batch
);
1640 anv_gen8_hiz_op_resolve(struct anv_cmd_buffer
*cmd_buffer
,
1641 const struct anv_image
*image
,
1642 enum blorp_hiz_op op
)
1646 assert(anv_image_aspect_to_plane(image
->aspects
,
1647 VK_IMAGE_ASPECT_DEPTH_BIT
) == 0);
1649 /* Don't resolve depth buffers without an auxiliary HiZ buffer and
1650 * don't perform such a resolve on gens that don't support it.
1652 if (cmd_buffer
->device
->info
.gen
< 8 ||
1653 image
->planes
[0].aux_usage
!= ISL_AUX_USAGE_HIZ
)
1656 assert(op
== BLORP_HIZ_OP_HIZ_RESOLVE
||
1657 op
== BLORP_HIZ_OP_DEPTH_RESOLVE
);
1659 struct blorp_batch batch
;
1660 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1662 struct blorp_surf surf
;
1663 get_blorp_surf_for_anv_image(cmd_buffer
->device
,
1664 image
, VK_IMAGE_ASPECT_DEPTH_BIT
,
1665 ISL_AUX_USAGE_NONE
, &surf
);
1667 /* Manually add the aux HiZ surf */
1668 surf
.aux_surf
= &image
->planes
[0].aux_surface
.isl
,
1669 surf
.aux_addr
= (struct blorp_address
) {
1670 .buffer
= image
->planes
[0].bo
,
1671 .offset
= image
->planes
[0].bo_offset
+
1672 image
->planes
[0].aux_surface
.offset
,
1673 .mocs
= cmd_buffer
->device
->default_mocs
,
1675 surf
.aux_usage
= ISL_AUX_USAGE_HIZ
;
1677 surf
.clear_color
.f32
[0] = ANV_HZ_FC_VAL
;
1679 blorp_hiz_op(&batch
, &surf
, 0, 0, 1, op
);
1680 blorp_batch_finish(&batch
);
1684 anv_ccs_resolve(struct anv_cmd_buffer
* const cmd_buffer
,
1685 const struct anv_image
* const image
,
1686 VkImageAspectFlagBits aspect
,
1687 const uint8_t level
,
1688 const uint32_t start_layer
, const uint32_t layer_count
,
1689 const enum blorp_fast_clear_op op
)
1691 assert(cmd_buffer
&& image
);
1693 uint32_t plane
= anv_image_aspect_to_plane(image
->aspects
, aspect
);
1695 /* The resolved subresource range must have a CCS buffer. */
1696 assert(level
< anv_image_aux_levels(image
, aspect
));
1697 assert(start_layer
+ layer_count
<=
1698 anv_image_aux_layers(image
, aspect
, level
));
1699 assert(image
->aspects
& VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV
&& image
->samples
== 1);
1701 struct blorp_batch batch
;
1702 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
,
1703 BLORP_BATCH_PREDICATE_ENABLE
);
1705 struct blorp_surf surf
;
1706 get_blorp_surf_for_anv_image(cmd_buffer
->device
, image
, aspect
,
1707 fast_clear_aux_usage(image
, aspect
),
1709 surf
.clear_color_addr
= anv_to_blorp_address(
1710 anv_image_get_clear_color_addr(cmd_buffer
->device
, image
, aspect
, level
));
1712 blorp_ccs_resolve(&batch
, &surf
, level
, start_layer
, layer_count
,
1713 image
->planes
[plane
].surface
.isl
.format
, op
);
1715 blorp_batch_finish(&batch
);