2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "anv_private.h"
27 lookup_blorp_shader(struct blorp_context
*blorp
,
28 const void *key
, uint32_t key_size
,
29 uint32_t *kernel_out
, void *prog_data_out
)
31 struct anv_device
*device
= blorp
->driver_ctx
;
33 /* The blorp cache must be a real cache */
34 assert(device
->blorp_shader_cache
.cache
);
36 struct anv_shader_bin
*bin
=
37 anv_pipeline_cache_search(&device
->blorp_shader_cache
, key
, key_size
);
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
44 anv_shader_bin_unref(device
, bin
);
46 *kernel_out
= bin
->kernel
.offset
;
47 *(const struct brw_stage_prog_data
**)prog_data_out
=
48 anv_shader_bin_get_prog_data(bin
);
54 upload_blorp_shader(struct blorp_context
*blorp
,
55 const void *key
, uint32_t key_size
,
56 const void *kernel
, uint32_t kernel_size
,
57 const struct brw_stage_prog_data
*prog_data
,
58 uint32_t prog_data_size
,
59 uint32_t *kernel_out
, void *prog_data_out
)
61 struct anv_device
*device
= blorp
->driver_ctx
;
63 /* The blorp cache must be a real cache */
64 assert(device
->blorp_shader_cache
.cache
);
66 struct anv_pipeline_bind_map bind_map
= {
71 struct anv_shader_bin
*bin
=
72 anv_pipeline_cache_upload_kernel(&device
->blorp_shader_cache
,
73 key
, key_size
, kernel
, kernel_size
,
74 prog_data
, prog_data_size
, &bind_map
);
76 /* The cache already has a reference and it's not going anywhere so there
77 * is no need to hold a second reference.
79 anv_shader_bin_unref(device
, bin
);
81 *kernel_out
= bin
->kernel
.offset
;
82 *(const struct brw_stage_prog_data
**)prog_data_out
=
83 anv_shader_bin_get_prog_data(bin
);
87 anv_device_init_blorp(struct anv_device
*device
)
89 anv_pipeline_cache_init(&device
->blorp_shader_cache
, device
, true);
90 blorp_init(&device
->blorp
, device
, &device
->isl_dev
);
91 device
->blorp
.compiler
= device
->instance
->physicalDevice
.compiler
;
92 device
->blorp
.mocs
.tex
= device
->default_mocs
;
93 device
->blorp
.mocs
.rb
= device
->default_mocs
;
94 device
->blorp
.mocs
.vb
= device
->default_mocs
;
95 device
->blorp
.lookup_shader
= lookup_blorp_shader
;
96 device
->blorp
.upload_shader
= upload_blorp_shader
;
97 switch (device
->info
.gen
) {
99 if (device
->info
.is_haswell
) {
100 device
->blorp
.exec
= gen75_blorp_exec
;
102 device
->blorp
.exec
= gen7_blorp_exec
;
106 device
->blorp
.exec
= gen8_blorp_exec
;
109 device
->blorp
.exec
= gen9_blorp_exec
;
112 unreachable("Unknown hardware generation");
117 anv_device_finish_blorp(struct anv_device
*device
)
119 blorp_finish(&device
->blorp
);
120 anv_pipeline_cache_finish(&device
->blorp_shader_cache
);
124 get_blorp_surf_for_anv_buffer(struct anv_device
*device
,
125 struct anv_buffer
*buffer
, uint64_t offset
,
126 uint32_t width
, uint32_t height
,
127 uint32_t row_pitch
, enum isl_format format
,
128 struct blorp_surf
*blorp_surf
,
129 struct isl_surf
*isl_surf
)
131 *blorp_surf
= (struct blorp_surf
) {
134 .buffer
= buffer
->bo
,
135 .offset
= buffer
->offset
+ offset
,
139 isl_surf_init(&device
->isl_dev
, isl_surf
,
140 .dim
= ISL_SURF_DIM_2D
,
148 .min_pitch
= row_pitch
,
149 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
|
150 ISL_SURF_USAGE_RENDER_TARGET_BIT
,
151 .tiling_flags
= ISL_TILING_LINEAR_BIT
);
152 assert(isl_surf
->row_pitch
== row_pitch
);
156 get_blorp_surf_for_anv_image(const struct anv_image
*image
,
157 VkImageAspectFlags aspect
,
158 struct blorp_surf
*blorp_surf
)
160 const struct anv_surface
*surface
=
161 anv_image_get_surface_for_aspect_mask(image
, aspect
);
163 *blorp_surf
= (struct blorp_surf
) {
164 .surf
= &surface
->isl
,
167 .offset
= image
->offset
+ surface
->offset
,
172 void anv_CmdCopyImage(
173 VkCommandBuffer commandBuffer
,
175 VkImageLayout srcImageLayout
,
177 VkImageLayout dstImageLayout
,
178 uint32_t regionCount
,
179 const VkImageCopy
* pRegions
)
181 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
182 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
183 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
185 struct blorp_batch batch
;
186 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
188 for (unsigned r
= 0; r
< regionCount
; r
++) {
189 VkOffset3D srcOffset
=
190 anv_sanitize_image_offset(src_image
->type
, pRegions
[r
].srcOffset
);
191 VkOffset3D dstOffset
=
192 anv_sanitize_image_offset(dst_image
->type
, pRegions
[r
].dstOffset
);
194 anv_sanitize_image_extent(src_image
->type
, pRegions
[r
].extent
);
196 unsigned dst_base_layer
, layer_count
;
197 if (dst_image
->type
== VK_IMAGE_TYPE_3D
) {
198 dst_base_layer
= pRegions
[r
].dstOffset
.z
;
199 layer_count
= pRegions
[r
].extent
.depth
;
201 dst_base_layer
= pRegions
[r
].dstSubresource
.baseArrayLayer
;
202 layer_count
= pRegions
[r
].dstSubresource
.layerCount
;
205 unsigned src_base_layer
;
206 if (src_image
->type
== VK_IMAGE_TYPE_3D
) {
207 src_base_layer
= pRegions
[r
].srcOffset
.z
;
209 src_base_layer
= pRegions
[r
].srcSubresource
.baseArrayLayer
;
210 assert(pRegions
[r
].srcSubresource
.layerCount
== layer_count
);
213 assert(pRegions
[r
].srcSubresource
.aspectMask
==
214 pRegions
[r
].dstSubresource
.aspectMask
);
217 for_each_bit(a
, pRegions
[r
].dstSubresource
.aspectMask
) {
218 VkImageAspectFlagBits aspect
= (1 << a
);
220 struct blorp_surf src_surf
, dst_surf
;
221 get_blorp_surf_for_anv_image(src_image
, aspect
, &src_surf
);
222 get_blorp_surf_for_anv_image(dst_image
, aspect
, &dst_surf
);
224 for (unsigned i
= 0; i
< layer_count
; i
++) {
225 blorp_copy(&batch
, &src_surf
, pRegions
[r
].srcSubresource
.mipLevel
,
227 &dst_surf
, pRegions
[r
].dstSubresource
.mipLevel
,
229 srcOffset
.x
, srcOffset
.y
,
230 dstOffset
.x
, dstOffset
.y
,
231 extent
.width
, extent
.height
);
236 blorp_batch_finish(&batch
);
240 copy_buffer_to_image(struct anv_cmd_buffer
*cmd_buffer
,
241 struct anv_buffer
*anv_buffer
,
242 struct anv_image
*anv_image
,
243 uint32_t regionCount
,
244 const VkBufferImageCopy
* pRegions
,
245 bool buffer_to_image
)
247 struct blorp_batch batch
;
248 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
251 struct blorp_surf surf
;
254 } image
, buffer
, *src
, *dst
;
257 buffer
.offset
= (VkOffset3D
) { 0, 0, 0 };
259 if (buffer_to_image
) {
267 for (unsigned r
= 0; r
< regionCount
; r
++) {
268 const VkImageAspectFlags aspect
= pRegions
[r
].imageSubresource
.aspectMask
;
270 get_blorp_surf_for_anv_image(anv_image
, aspect
, &image
.surf
);
272 anv_sanitize_image_offset(anv_image
->type
, pRegions
[r
].imageOffset
);
273 image
.level
= pRegions
[r
].imageSubresource
.mipLevel
;
276 anv_sanitize_image_extent(anv_image
->type
, pRegions
[r
].imageExtent
);
277 if (anv_image
->type
!= VK_IMAGE_TYPE_3D
) {
278 image
.offset
.z
= pRegions
[r
].imageSubresource
.baseArrayLayer
;
279 extent
.depth
= pRegions
[r
].imageSubresource
.layerCount
;
282 const enum isl_format buffer_format
=
283 anv_get_isl_format(&cmd_buffer
->device
->info
, anv_image
->vk_format
,
284 aspect
, VK_IMAGE_TILING_LINEAR
);
286 const VkExtent3D bufferImageExtent
= {
287 .width
= pRegions
[r
].bufferRowLength
?
288 pRegions
[r
].bufferRowLength
: extent
.width
,
289 .height
= pRegions
[r
].bufferImageHeight
?
290 pRegions
[r
].bufferImageHeight
: extent
.height
,
293 const struct isl_format_layout
*buffer_fmtl
=
294 isl_format_get_layout(buffer_format
);
296 const uint32_t buffer_row_pitch
=
297 DIV_ROUND_UP(bufferImageExtent
.width
, buffer_fmtl
->bw
) *
298 (buffer_fmtl
->bpb
/ 8);
300 const uint32_t buffer_layer_stride
=
301 DIV_ROUND_UP(bufferImageExtent
.height
, buffer_fmtl
->bh
) *
304 struct isl_surf buffer_isl_surf
;
305 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
306 anv_buffer
, pRegions
[r
].bufferOffset
,
307 extent
.width
, extent
.height
,
308 buffer_row_pitch
, buffer_format
,
309 &buffer
.surf
, &buffer_isl_surf
);
311 for (unsigned z
= 0; z
< extent
.depth
; z
++) {
312 blorp_copy(&batch
, &src
->surf
, src
->level
, src
->offset
.z
,
313 &dst
->surf
, dst
->level
, dst
->offset
.z
,
314 src
->offset
.x
, src
->offset
.y
, dst
->offset
.x
, dst
->offset
.y
,
315 extent
.width
, extent
.height
);
318 buffer
.surf
.addr
.offset
+= buffer_layer_stride
;
322 blorp_batch_finish(&batch
);
325 void anv_CmdCopyBufferToImage(
326 VkCommandBuffer commandBuffer
,
329 VkImageLayout dstImageLayout
,
330 uint32_t regionCount
,
331 const VkBufferImageCopy
* pRegions
)
333 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
334 ANV_FROM_HANDLE(anv_buffer
, src_buffer
, srcBuffer
);
335 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
337 copy_buffer_to_image(cmd_buffer
, src_buffer
, dst_image
,
338 regionCount
, pRegions
, true);
341 void anv_CmdCopyImageToBuffer(
342 VkCommandBuffer commandBuffer
,
344 VkImageLayout srcImageLayout
,
346 uint32_t regionCount
,
347 const VkBufferImageCopy
* pRegions
)
349 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
350 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
351 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
353 copy_buffer_to_image(cmd_buffer
, dst_buffer
, src_image
,
354 regionCount
, pRegions
, false);
358 flip_coords(unsigned *src0
, unsigned *src1
, unsigned *dst0
, unsigned *dst1
)
362 unsigned tmp
= *src0
;
369 unsigned tmp
= *dst0
;
378 void anv_CmdBlitImage(
379 VkCommandBuffer commandBuffer
,
381 VkImageLayout srcImageLayout
,
383 VkImageLayout dstImageLayout
,
384 uint32_t regionCount
,
385 const VkImageBlit
* pRegions
,
389 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
390 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
391 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
393 struct blorp_surf src
, dst
;
397 case VK_FILTER_NEAREST
:
398 gl_filter
= 0x2600; /* GL_NEAREST */
400 case VK_FILTER_LINEAR
:
401 gl_filter
= 0x2601; /* GL_LINEAR */
404 unreachable("Invalid filter");
407 struct blorp_batch batch
;
408 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
410 for (unsigned r
= 0; r
< regionCount
; r
++) {
411 const VkImageSubresourceLayers
*src_res
= &pRegions
[r
].srcSubresource
;
412 const VkImageSubresourceLayers
*dst_res
= &pRegions
[r
].dstSubresource
;
414 get_blorp_surf_for_anv_image(src_image
, src_res
->aspectMask
, &src
);
415 get_blorp_surf_for_anv_image(dst_image
, dst_res
->aspectMask
, &dst
);
417 struct anv_format src_format
=
418 anv_get_format(&cmd_buffer
->device
->info
, src_image
->vk_format
,
419 src_res
->aspectMask
, src_image
->tiling
);
420 struct anv_format dst_format
=
421 anv_get_format(&cmd_buffer
->device
->info
, dst_image
->vk_format
,
422 dst_res
->aspectMask
, dst_image
->tiling
);
424 unsigned dst_start
, dst_end
;
425 if (dst_image
->type
== VK_IMAGE_TYPE_3D
) {
426 assert(dst_res
->baseArrayLayer
== 0);
427 dst_start
= pRegions
[r
].dstOffsets
[0].z
;
428 dst_end
= pRegions
[r
].dstOffsets
[1].z
;
430 dst_start
= dst_res
->baseArrayLayer
;
431 dst_end
= dst_start
+ dst_res
->layerCount
;
434 unsigned src_start
, src_end
;
435 if (src_image
->type
== VK_IMAGE_TYPE_3D
) {
436 assert(src_res
->baseArrayLayer
== 0);
437 src_start
= pRegions
[r
].srcOffsets
[0].z
;
438 src_end
= pRegions
[r
].srcOffsets
[1].z
;
440 src_start
= src_res
->baseArrayLayer
;
441 src_end
= src_start
+ src_res
->layerCount
;
444 bool flip_z
= flip_coords(&src_start
, &src_end
, &dst_start
, &dst_end
);
445 float src_z_step
= (float)(src_end
+ 1 - src_start
) /
446 (float)(dst_end
+ 1 - dst_start
);
453 unsigned src_x0
= pRegions
[r
].srcOffsets
[0].x
;
454 unsigned src_x1
= pRegions
[r
].srcOffsets
[1].x
;
455 unsigned dst_x0
= pRegions
[r
].dstOffsets
[0].x
;
456 unsigned dst_x1
= pRegions
[r
].dstOffsets
[1].x
;
457 bool flip_x
= flip_coords(&src_x0
, &src_x1
, &dst_x0
, &dst_x1
);
459 unsigned src_y0
= pRegions
[r
].srcOffsets
[0].y
;
460 unsigned src_y1
= pRegions
[r
].srcOffsets
[1].y
;
461 unsigned dst_y0
= pRegions
[r
].dstOffsets
[0].y
;
462 unsigned dst_y1
= pRegions
[r
].dstOffsets
[1].y
;
463 bool flip_y
= flip_coords(&src_y0
, &src_y1
, &dst_y0
, &dst_y1
);
465 const unsigned num_layers
= dst_end
- dst_start
;
466 for (unsigned i
= 0; i
< num_layers
; i
++) {
467 unsigned dst_z
= dst_start
+ i
;
468 unsigned src_z
= src_start
+ i
* src_z_step
;
470 blorp_blit(&batch
, &src
, src_res
->mipLevel
, src_z
,
471 src_format
.isl_format
, src_format
.swizzle
,
472 &dst
, dst_res
->mipLevel
, dst_z
,
473 dst_format
.isl_format
, dst_format
.swizzle
,
474 src_x0
, src_y0
, src_x1
, src_y1
,
475 dst_x0
, dst_y0
, dst_x1
, dst_y1
,
476 gl_filter
, flip_x
, flip_y
);
481 blorp_batch_finish(&batch
);
484 static enum isl_format
485 isl_format_for_size(unsigned size_B
)
488 case 1: return ISL_FORMAT_R8_UINT
;
489 case 2: return ISL_FORMAT_R8G8_UINT
;
490 case 4: return ISL_FORMAT_R8G8B8A8_UINT
;
491 case 8: return ISL_FORMAT_R16G16B16A16_UINT
;
492 case 16: return ISL_FORMAT_R32G32B32A32_UINT
;
494 unreachable("Not a power-of-two format size");
499 do_buffer_copy(struct blorp_batch
*batch
,
500 struct anv_bo
*src
, uint64_t src_offset
,
501 struct anv_bo
*dst
, uint64_t dst_offset
,
502 int width
, int height
, int block_size
)
504 struct anv_device
*device
= batch
->blorp
->driver_ctx
;
506 /* The actual format we pick doesn't matter as blorp will throw it away.
507 * The only thing that actually matters is the size.
509 enum isl_format format
= isl_format_for_size(block_size
);
511 struct isl_surf surf
;
512 isl_surf_init(&device
->isl_dev
, &surf
,
513 .dim
= ISL_SURF_DIM_2D
,
521 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
|
522 ISL_SURF_USAGE_RENDER_TARGET_BIT
,
523 .tiling_flags
= ISL_TILING_LINEAR_BIT
);
524 assert(surf
.row_pitch
== width
* block_size
);
526 struct blorp_surf src_blorp_surf
= {
530 .offset
= src_offset
,
534 struct blorp_surf dst_blorp_surf
= {
538 .offset
= dst_offset
,
542 blorp_copy(batch
, &src_blorp_surf
, 0, 0, &dst_blorp_surf
, 0, 0,
543 0, 0, 0, 0, width
, height
);
547 * Returns the greatest common divisor of a and b that is a power of two.
549 static inline uint64_t
550 gcd_pow2_u64(uint64_t a
, uint64_t b
)
552 assert(a
> 0 || b
> 0);
554 unsigned a_log2
= ffsll(a
) - 1;
555 unsigned b_log2
= ffsll(b
) - 1;
557 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
558 * case, the MIN2() will take the other one. If both are 0 then we will
559 * hit the assert above.
561 return 1 << MIN2(a_log2
, b_log2
);
564 /* This is maximum possible width/height our HW can handle */
565 #define MAX_SURFACE_DIM (1ull << 14)
567 void anv_CmdCopyBuffer(
568 VkCommandBuffer commandBuffer
,
571 uint32_t regionCount
,
572 const VkBufferCopy
* pRegions
)
574 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
575 ANV_FROM_HANDLE(anv_buffer
, src_buffer
, srcBuffer
);
576 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
578 struct blorp_batch batch
;
579 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
581 for (unsigned r
= 0; r
< regionCount
; r
++) {
582 uint64_t src_offset
= src_buffer
->offset
+ pRegions
[r
].srcOffset
;
583 uint64_t dst_offset
= dst_buffer
->offset
+ pRegions
[r
].dstOffset
;
584 uint64_t copy_size
= pRegions
[r
].size
;
586 /* First, we compute the biggest format that can be used with the
587 * given offsets and size.
590 bs
= gcd_pow2_u64(bs
, src_offset
);
591 bs
= gcd_pow2_u64(bs
, dst_offset
);
592 bs
= gcd_pow2_u64(bs
, pRegions
[r
].size
);
594 /* First, we make a bunch of max-sized copies */
595 uint64_t max_copy_size
= MAX_SURFACE_DIM
* MAX_SURFACE_DIM
* bs
;
596 while (copy_size
>= max_copy_size
) {
597 do_buffer_copy(&batch
, src_buffer
->bo
, src_offset
,
598 dst_buffer
->bo
, dst_offset
,
599 MAX_SURFACE_DIM
, MAX_SURFACE_DIM
, bs
);
600 copy_size
-= max_copy_size
;
601 src_offset
+= max_copy_size
;
602 dst_offset
+= max_copy_size
;
605 /* Now make a max-width copy */
606 uint64_t height
= copy_size
/ (MAX_SURFACE_DIM
* bs
);
607 assert(height
< MAX_SURFACE_DIM
);
609 uint64_t rect_copy_size
= height
* MAX_SURFACE_DIM
* bs
;
610 do_buffer_copy(&batch
, src_buffer
->bo
, src_offset
,
611 dst_buffer
->bo
, dst_offset
,
612 MAX_SURFACE_DIM
, height
, bs
);
613 copy_size
-= rect_copy_size
;
614 src_offset
+= rect_copy_size
;
615 dst_offset
+= rect_copy_size
;
618 /* Finally, make a small copy to finish it off */
619 if (copy_size
!= 0) {
620 do_buffer_copy(&batch
, src_buffer
->bo
, src_offset
,
621 dst_buffer
->bo
, dst_offset
,
622 copy_size
/ bs
, 1, bs
);
626 blorp_batch_finish(&batch
);
629 void anv_CmdUpdateBuffer(
630 VkCommandBuffer commandBuffer
,
632 VkDeviceSize dstOffset
,
633 VkDeviceSize dataSize
,
634 const uint32_t* pData
)
636 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
637 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
639 struct blorp_batch batch
;
640 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
642 /* We can't quite grab a full block because the state stream needs a
643 * little data at the top to build its linked list.
645 const uint32_t max_update_size
=
646 cmd_buffer
->device
->dynamic_state_block_pool
.block_size
- 64;
648 assert(max_update_size
< MAX_SURFACE_DIM
* 4);
651 const uint32_t copy_size
= MIN2(dataSize
, max_update_size
);
653 struct anv_state tmp_data
=
654 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
, copy_size
, 64);
656 memcpy(tmp_data
.map
, pData
, copy_size
);
659 bs
= gcd_pow2_u64(bs
, dstOffset
);
660 bs
= gcd_pow2_u64(bs
, copy_size
);
662 do_buffer_copy(&batch
,
663 &cmd_buffer
->device
->dynamic_state_block_pool
.bo
,
665 dst_buffer
->bo
, dst_buffer
->offset
+ dstOffset
,
666 copy_size
/ bs
, 1, bs
);
668 dataSize
-= copy_size
;
669 dstOffset
+= copy_size
;
670 pData
= (void *)pData
+ copy_size
;
673 blorp_batch_finish(&batch
);
676 void anv_CmdFillBuffer(
677 VkCommandBuffer commandBuffer
,
679 VkDeviceSize dstOffset
,
680 VkDeviceSize fillSize
,
683 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
684 ANV_FROM_HANDLE(anv_buffer
, dst_buffer
, dstBuffer
);
685 struct blorp_surf surf
;
686 struct isl_surf isl_surf
;
688 struct blorp_batch batch
;
689 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
691 if (fillSize
== VK_WHOLE_SIZE
) {
692 fillSize
= dst_buffer
->size
- dstOffset
;
693 /* Make sure fillSize is a multiple of 4 */
697 /* First, we compute the biggest format that can be used with the
698 * given offsets and size.
701 bs
= gcd_pow2_u64(bs
, dstOffset
);
702 bs
= gcd_pow2_u64(bs
, fillSize
);
703 enum isl_format isl_format
= isl_format_for_size(bs
);
705 union isl_color_value color
= {
706 .u32
= { data
, data
, data
, data
},
709 const uint64_t max_fill_size
= MAX_SURFACE_DIM
* MAX_SURFACE_DIM
* bs
;
710 while (fillSize
>= max_fill_size
) {
711 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
712 dst_buffer
, dstOffset
,
713 MAX_SURFACE_DIM
, MAX_SURFACE_DIM
,
714 MAX_SURFACE_DIM
* bs
, isl_format
,
717 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
718 0, 0, 1, 0, 0, MAX_SURFACE_DIM
, MAX_SURFACE_DIM
,
720 fillSize
-= max_fill_size
;
721 dstOffset
+= max_fill_size
;
724 uint64_t height
= fillSize
/ (MAX_SURFACE_DIM
* bs
);
725 assert(height
< MAX_SURFACE_DIM
);
727 const uint64_t rect_fill_size
= height
* MAX_SURFACE_DIM
* bs
;
728 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
729 dst_buffer
, dstOffset
,
730 MAX_SURFACE_DIM
, height
,
731 MAX_SURFACE_DIM
* bs
, isl_format
,
734 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
735 0, 0, 1, 0, 0, MAX_SURFACE_DIM
, height
,
737 fillSize
-= rect_fill_size
;
738 dstOffset
+= rect_fill_size
;
742 const uint32_t width
= fillSize
/ bs
;
743 get_blorp_surf_for_anv_buffer(cmd_buffer
->device
,
744 dst_buffer
, dstOffset
,
746 width
* bs
, isl_format
,
749 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
750 0, 0, 1, 0, 0, width
, 1,
754 blorp_batch_finish(&batch
);
757 void anv_CmdClearColorImage(
758 VkCommandBuffer commandBuffer
,
760 VkImageLayout imageLayout
,
761 const VkClearColorValue
* pColor
,
763 const VkImageSubresourceRange
* pRanges
)
765 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
766 ANV_FROM_HANDLE(anv_image
, image
, _image
);
768 static const bool color_write_disable
[4] = { false, false, false, false };
770 struct blorp_batch batch
;
771 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
773 union isl_color_value clear_color
;
774 memcpy(clear_color
.u32
, pColor
->uint32
, sizeof(pColor
->uint32
));
776 struct blorp_surf surf
;
777 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_COLOR_BIT
, &surf
);
779 for (unsigned r
= 0; r
< rangeCount
; r
++) {
780 if (pRanges
[r
].aspectMask
== 0)
783 assert(pRanges
[r
].aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
);
785 struct anv_format src_format
=
786 anv_get_format(&cmd_buffer
->device
->info
, image
->vk_format
,
787 VK_IMAGE_ASPECT_COLOR_BIT
, image
->tiling
);
789 unsigned base_layer
= pRanges
[r
].baseArrayLayer
;
790 unsigned layer_count
= pRanges
[r
].layerCount
;
792 for (unsigned i
= 0; i
< pRanges
[r
].levelCount
; i
++) {
793 const unsigned level
= pRanges
[r
].baseMipLevel
+ i
;
794 const unsigned level_width
= anv_minify(image
->extent
.width
, level
);
795 const unsigned level_height
= anv_minify(image
->extent
.height
, level
);
797 if (image
->type
== VK_IMAGE_TYPE_3D
) {
799 layer_count
= anv_minify(image
->extent
.depth
, level
);
802 blorp_clear(&batch
, &surf
,
803 src_format
.isl_format
, src_format
.swizzle
,
804 level
, base_layer
, layer_count
,
805 0, 0, level_width
, level_height
,
806 clear_color
, color_write_disable
);
810 blorp_batch_finish(&batch
);
813 void anv_CmdClearDepthStencilImage(
814 VkCommandBuffer commandBuffer
,
816 VkImageLayout imageLayout
,
817 const VkClearDepthStencilValue
* pDepthStencil
,
819 const VkImageSubresourceRange
* pRanges
)
821 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
822 ANV_FROM_HANDLE(anv_image
, image
, image_h
);
824 struct blorp_batch batch
;
825 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
827 struct blorp_surf depth
, stencil
;
828 if (image
->aspects
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
829 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_DEPTH_BIT
,
832 memset(&depth
, 0, sizeof(depth
));
835 if (image
->aspects
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
836 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_STENCIL_BIT
,
839 memset(&stencil
, 0, sizeof(stencil
));
842 for (unsigned r
= 0; r
< rangeCount
; r
++) {
843 if (pRanges
[r
].aspectMask
== 0)
846 bool clear_depth
= pRanges
[r
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
;
847 bool clear_stencil
= pRanges
[r
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
;
849 unsigned base_layer
= pRanges
[r
].baseArrayLayer
;
850 unsigned layer_count
= pRanges
[r
].layerCount
;
852 for (unsigned i
= 0; i
< pRanges
[r
].levelCount
; i
++) {
853 const unsigned level
= pRanges
[r
].baseMipLevel
+ i
;
854 const unsigned level_width
= anv_minify(image
->extent
.width
, level
);
855 const unsigned level_height
= anv_minify(image
->extent
.height
, level
);
857 if (image
->type
== VK_IMAGE_TYPE_3D
)
858 layer_count
= anv_minify(image
->extent
.depth
, level
);
860 blorp_clear_depth_stencil(&batch
, &depth
, &stencil
,
861 level
, base_layer
, layer_count
,
862 0, 0, level_width
, level_height
,
863 clear_depth
, pDepthStencil
->depth
,
864 clear_stencil
? 0xff : 0,
865 pDepthStencil
->stencil
);
869 blorp_batch_finish(&batch
);
873 clear_color_attachment(struct anv_cmd_buffer
*cmd_buffer
,
874 struct blorp_batch
*batch
,
875 const VkClearAttachment
*attachment
,
876 uint32_t rectCount
, const VkClearRect
*pRects
)
878 const struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
879 const struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
880 const uint32_t att
= attachment
->colorAttachment
;
881 const struct anv_image_view
*iview
=
882 fb
->attachments
[subpass
->color_attachments
[att
]];
883 const struct anv_image
*image
= iview
->image
;
885 struct blorp_surf surf
;
886 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_COLOR_BIT
, &surf
);
888 union isl_color_value clear_color
;
889 memcpy(clear_color
.u32
, attachment
->clearValue
.color
.uint32
,
890 sizeof(clear_color
.u32
));
892 static const bool color_write_disable
[4] = { false, false, false, false };
894 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
895 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
896 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
897 blorp_clear(batch
, &surf
, iview
->isl
.format
, iview
->isl
.swizzle
,
898 iview
->isl
.base_level
,
899 iview
->isl
.base_array_layer
+ pRects
[r
].baseArrayLayer
,
900 pRects
[r
].layerCount
,
902 offset
.x
+ extent
.width
, offset
.y
+ extent
.height
,
903 clear_color
, color_write_disable
);
908 clear_depth_stencil_attachment(struct anv_cmd_buffer
*cmd_buffer
,
909 struct blorp_batch
*batch
,
910 const VkClearAttachment
*attachment
,
911 uint32_t rectCount
, const VkClearRect
*pRects
)
913 const struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
914 const struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
915 const struct anv_image_view
*iview
=
916 fb
->attachments
[subpass
->depth_stencil_attachment
];
917 const struct anv_image
*image
= iview
->image
;
919 bool clear_depth
= attachment
->aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
;
920 bool clear_stencil
= attachment
->aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
;
922 struct blorp_surf depth
, stencil
;
924 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_DEPTH_BIT
,
927 memset(&depth
, 0, sizeof(depth
));
931 get_blorp_surf_for_anv_image(image
, VK_IMAGE_ASPECT_STENCIL_BIT
,
934 memset(&stencil
, 0, sizeof(stencil
));
937 for (uint32_t r
= 0; r
< rectCount
; ++r
) {
938 const VkOffset2D offset
= pRects
[r
].rect
.offset
;
939 const VkExtent2D extent
= pRects
[r
].rect
.extent
;
940 VkClearDepthStencilValue value
= attachment
->clearValue
.depthStencil
;
941 blorp_clear_depth_stencil(batch
, &depth
, &stencil
,
942 iview
->isl
.base_level
,
943 iview
->isl
.base_array_layer
+
944 pRects
[r
].baseArrayLayer
,
945 pRects
[r
].layerCount
,
947 offset
.x
+ extent
.width
,
948 offset
.y
+ extent
.height
,
949 clear_depth
, value
.depth
,
950 clear_stencil
? 0xff : 0, value
.stencil
);
954 void anv_CmdClearAttachments(
955 VkCommandBuffer commandBuffer
,
956 uint32_t attachmentCount
,
957 const VkClearAttachment
* pAttachments
,
959 const VkClearRect
* pRects
)
961 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
963 /* Because this gets called within a render pass, we tell blorp not to
964 * trash our depth and stencil buffers.
966 struct blorp_batch batch
;
967 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
,
968 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL
);
970 for (uint32_t a
= 0; a
< attachmentCount
; ++a
) {
971 if (pAttachments
[a
].aspectMask
== VK_IMAGE_ASPECT_COLOR_BIT
) {
972 clear_color_attachment(cmd_buffer
, &batch
,
976 clear_depth_stencil_attachment(cmd_buffer
, &batch
,
982 blorp_batch_finish(&batch
);
986 subpass_needs_clear(const struct anv_cmd_buffer
*cmd_buffer
)
988 const struct anv_cmd_state
*cmd_state
= &cmd_buffer
->state
;
989 uint32_t ds
= cmd_state
->subpass
->depth_stencil_attachment
;
991 for (uint32_t i
= 0; i
< cmd_state
->subpass
->color_count
; ++i
) {
992 uint32_t a
= cmd_state
->subpass
->color_attachments
[i
];
993 if (cmd_state
->attachments
[a
].pending_clear_aspects
) {
998 if (ds
!= VK_ATTACHMENT_UNUSED
&&
999 cmd_state
->attachments
[ds
].pending_clear_aspects
) {
1007 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer
*cmd_buffer
)
1009 const struct anv_cmd_state
*cmd_state
= &cmd_buffer
->state
;
1011 if (!subpass_needs_clear(cmd_buffer
))
1014 /* Because this gets called within a render pass, we tell blorp not to
1015 * trash our depth and stencil buffers.
1017 struct blorp_batch batch
;
1018 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
,
1019 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL
);
1021 VkClearRect clear_rect
= {
1022 .rect
= cmd_buffer
->state
.render_area
,
1023 .baseArrayLayer
= 0,
1024 .layerCount
= cmd_buffer
->state
.framebuffer
->layers
,
1027 for (uint32_t i
= 0; i
< cmd_state
->subpass
->color_count
; ++i
) {
1028 const uint32_t a
= cmd_state
->subpass
->color_attachments
[i
];
1030 if (!cmd_state
->attachments
[a
].pending_clear_aspects
)
1033 assert(cmd_state
->attachments
[a
].pending_clear_aspects
==
1034 VK_IMAGE_ASPECT_COLOR_BIT
);
1036 VkClearAttachment clear_att
= {
1037 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
1038 .colorAttachment
= i
, /* Use attachment index relative to subpass */
1039 .clearValue
= cmd_state
->attachments
[a
].clear_value
,
1042 clear_color_attachment(cmd_buffer
, &batch
, &clear_att
, 1, &clear_rect
);
1044 cmd_state
->attachments
[a
].pending_clear_aspects
= 0;
1047 const uint32_t ds
= cmd_state
->subpass
->depth_stencil_attachment
;
1049 if (ds
!= VK_ATTACHMENT_UNUSED
&&
1050 cmd_state
->attachments
[ds
].pending_clear_aspects
) {
1052 VkClearAttachment clear_att
= {
1053 .aspectMask
= cmd_state
->attachments
[ds
].pending_clear_aspects
,
1054 .clearValue
= cmd_state
->attachments
[ds
].clear_value
,
1057 clear_depth_stencil_attachment(cmd_buffer
, &batch
,
1058 &clear_att
, 1, &clear_rect
);
1060 cmd_state
->attachments
[ds
].pending_clear_aspects
= 0;
1063 blorp_batch_finish(&batch
);
1067 resolve_image(struct blorp_batch
*batch
,
1068 const struct anv_image
*src_image
,
1069 uint32_t src_level
, uint32_t src_layer
,
1070 const struct anv_image
*dst_image
,
1071 uint32_t dst_level
, uint32_t dst_layer
,
1072 VkImageAspectFlags aspect_mask
,
1073 uint32_t src_x
, uint32_t src_y
, uint32_t dst_x
, uint32_t dst_y
,
1074 uint32_t width
, uint32_t height
)
1076 assert(src_image
->type
== VK_IMAGE_TYPE_2D
);
1077 assert(src_image
->samples
> 1);
1078 assert(dst_image
->type
== VK_IMAGE_TYPE_2D
);
1079 assert(dst_image
->samples
== 1);
1082 for_each_bit(a
, aspect_mask
) {
1083 VkImageAspectFlagBits aspect
= 1 << a
;
1085 struct blorp_surf src_surf
, dst_surf
;
1086 get_blorp_surf_for_anv_image(src_image
, aspect
, &src_surf
);
1087 get_blorp_surf_for_anv_image(dst_image
, aspect
, &dst_surf
);
1090 &src_surf
, src_level
, src_layer
,
1091 ISL_FORMAT_UNSUPPORTED
, ISL_SWIZZLE_IDENTITY
,
1092 &dst_surf
, dst_level
, dst_layer
,
1093 ISL_FORMAT_UNSUPPORTED
, ISL_SWIZZLE_IDENTITY
,
1094 src_x
, src_y
, src_x
+ width
, src_y
+ height
,
1095 dst_x
, dst_y
, dst_x
+ width
, dst_y
+ height
,
1096 0x2600 /* GL_NEAREST */, false, false);
1100 void anv_CmdResolveImage(
1101 VkCommandBuffer commandBuffer
,
1103 VkImageLayout srcImageLayout
,
1105 VkImageLayout dstImageLayout
,
1106 uint32_t regionCount
,
1107 const VkImageResolve
* pRegions
)
1109 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
1110 ANV_FROM_HANDLE(anv_image
, src_image
, srcImage
);
1111 ANV_FROM_HANDLE(anv_image
, dst_image
, dstImage
);
1113 struct blorp_batch batch
;
1114 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1116 for (uint32_t r
= 0; r
< regionCount
; r
++) {
1117 assert(pRegions
[r
].srcSubresource
.aspectMask
==
1118 pRegions
[r
].dstSubresource
.aspectMask
);
1119 assert(pRegions
[r
].srcSubresource
.layerCount
==
1120 pRegions
[r
].dstSubresource
.layerCount
);
1122 const uint32_t layer_count
= pRegions
[r
].dstSubresource
.layerCount
;
1124 for (uint32_t layer
= 0; layer
< layer_count
; layer
++) {
1125 resolve_image(&batch
,
1126 src_image
, pRegions
[r
].srcSubresource
.mipLevel
,
1127 pRegions
[r
].srcSubresource
.baseArrayLayer
+ layer
,
1128 dst_image
, pRegions
[r
].dstSubresource
.mipLevel
,
1129 pRegions
[r
].dstSubresource
.baseArrayLayer
+ layer
,
1130 pRegions
[r
].dstSubresource
.aspectMask
,
1131 pRegions
[r
].srcOffset
.x
, pRegions
[r
].srcOffset
.y
,
1132 pRegions
[r
].dstOffset
.x
, pRegions
[r
].dstOffset
.y
,
1133 pRegions
[r
].extent
.width
, pRegions
[r
].extent
.height
);
1137 blorp_batch_finish(&batch
);
1141 anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer
*cmd_buffer
)
1143 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
1144 struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
1146 /* FINISHME(perf): Skip clears for resolve attachments.
1148 * From the Vulkan 1.0 spec:
1150 * If the first use of an attachment in a render pass is as a resolve
1151 * attachment, then the loadOp is effectively ignored as the resolve is
1152 * guaranteed to overwrite all pixels in the render area.
1155 if (!subpass
->has_resolve
)
1158 struct blorp_batch batch
;
1159 blorp_batch_init(&cmd_buffer
->device
->blorp
, &batch
, cmd_buffer
, 0);
1161 for (uint32_t i
= 0; i
< subpass
->color_count
; ++i
) {
1162 uint32_t src_att
= subpass
->color_attachments
[i
];
1163 uint32_t dst_att
= subpass
->resolve_attachments
[i
];
1165 if (dst_att
== VK_ATTACHMENT_UNUSED
)
1168 struct anv_image_view
*src_iview
= fb
->attachments
[src_att
];
1169 struct anv_image_view
*dst_iview
= fb
->attachments
[dst_att
];
1171 const VkRect2D render_area
= cmd_buffer
->state
.render_area
;
1173 assert(src_iview
->aspect_mask
== dst_iview
->aspect_mask
);
1174 resolve_image(&batch
, src_iview
->image
,
1175 src_iview
->isl
.base_level
, src_iview
->isl
.base_array_layer
,
1177 dst_iview
->isl
.base_level
, dst_iview
->isl
.base_array_layer
,
1178 src_iview
->aspect_mask
,
1179 render_area
.offset
.x
, render_area
.offset
.y
,
1180 render_area
.offset
.x
, render_area
.offset
.y
,
1181 render_area
.extent
.width
, render_area
.extent
.height
);
1184 blorp_batch_finish(&batch
);