232d02fdddc7386af1bad5c80326758386e4ea6e
[mesa.git] / src / intel / vulkan / anv_blorp.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
27 lookup_blorp_shader(struct blorp_batch *batch,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct blorp_context *blorp = batch->blorp;
32 struct anv_device *device = blorp->driver_ctx;
33
34 /* The default cache must be a real cache */
35 assert(device->default_pipeline_cache.cache);
36
37 struct anv_shader_bin *bin =
38 anv_pipeline_cache_search(&device->default_pipeline_cache, key, key_size);
39 if (!bin)
40 return false;
41
42 /* The cache already has a reference and it's not going anywhere so there
43 * is no need to hold a second reference.
44 */
45 anv_shader_bin_unref(device, bin);
46
47 *kernel_out = bin->kernel.offset;
48 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
49
50 return true;
51 }
52
53 static bool
54 upload_blorp_shader(struct blorp_batch *batch,
55 const void *key, uint32_t key_size,
56 const void *kernel, uint32_t kernel_size,
57 const struct brw_stage_prog_data *prog_data,
58 uint32_t prog_data_size,
59 uint32_t *kernel_out, void *prog_data_out)
60 {
61 struct blorp_context *blorp = batch->blorp;
62 struct anv_device *device = blorp->driver_ctx;
63
64 /* The blorp cache must be a real cache */
65 assert(device->default_pipeline_cache.cache);
66
67 struct anv_pipeline_bind_map bind_map = {
68 .surface_count = 0,
69 .sampler_count = 0,
70 };
71
72 struct anv_shader_bin *bin =
73 anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache,
74 key, key_size, kernel, kernel_size,
75 NULL, 0,
76 prog_data, prog_data_size,
77 NULL, 0, NULL, &bind_map);
78
79 if (!bin)
80 return false;
81
82 /* The cache already has a reference and it's not going anywhere so there
83 * is no need to hold a second reference.
84 */
85 anv_shader_bin_unref(device, bin);
86
87 *kernel_out = bin->kernel.offset;
88 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
89
90 return true;
91 }
92
93 void
94 anv_device_init_blorp(struct anv_device *device)
95 {
96 blorp_init(&device->blorp, device, &device->isl_dev);
97 device->blorp.compiler = device->physical->compiler;
98 device->blorp.lookup_shader = lookup_blorp_shader;
99 device->blorp.upload_shader = upload_blorp_shader;
100 switch (device->info.gen) {
101 case 7:
102 if (device->info.is_haswell) {
103 device->blorp.exec = gen75_blorp_exec;
104 } else {
105 device->blorp.exec = gen7_blorp_exec;
106 }
107 break;
108 case 8:
109 device->blorp.exec = gen8_blorp_exec;
110 break;
111 case 9:
112 device->blorp.exec = gen9_blorp_exec;
113 break;
114 case 10:
115 device->blorp.exec = gen10_blorp_exec;
116 break;
117 case 11:
118 device->blorp.exec = gen11_blorp_exec;
119 break;
120 case 12:
121 device->blorp.exec = gen12_blorp_exec;
122 break;
123 default:
124 unreachable("Unknown hardware generation");
125 }
126 }
127
128 void
129 anv_device_finish_blorp(struct anv_device *device)
130 {
131 blorp_finish(&device->blorp);
132 }
133
134 static void
135 get_blorp_surf_for_anv_buffer(struct anv_device *device,
136 struct anv_buffer *buffer, uint64_t offset,
137 uint32_t width, uint32_t height,
138 uint32_t row_pitch, enum isl_format format,
139 struct blorp_surf *blorp_surf,
140 struct isl_surf *isl_surf)
141 {
142 const struct isl_format_layout *fmtl =
143 isl_format_get_layout(format);
144 bool ok UNUSED;
145
146 /* ASTC is the only format which doesn't support linear layouts.
147 * Create an equivalently sized surface with ISL to get around this.
148 */
149 if (fmtl->txc == ISL_TXC_ASTC) {
150 /* Use an equivalently sized format */
151 format = ISL_FORMAT_R32G32B32A32_UINT;
152 assert(fmtl->bpb == isl_format_get_layout(format)->bpb);
153
154 /* Shrink the dimensions for the new format */
155 width = DIV_ROUND_UP(width, fmtl->bw);
156 height = DIV_ROUND_UP(height, fmtl->bh);
157 }
158
159 *blorp_surf = (struct blorp_surf) {
160 .surf = isl_surf,
161 .addr = {
162 .buffer = buffer->address.bo,
163 .offset = buffer->address.offset + offset,
164 .mocs = anv_mocs_for_bo(device, buffer->address.bo),
165 },
166 };
167
168 ok = isl_surf_init(&device->isl_dev, isl_surf,
169 .dim = ISL_SURF_DIM_2D,
170 .format = format,
171 .width = width,
172 .height = height,
173 .depth = 1,
174 .levels = 1,
175 .array_len = 1,
176 .samples = 1,
177 .row_pitch_B = row_pitch,
178 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
179 ISL_SURF_USAGE_RENDER_TARGET_BIT,
180 .tiling_flags = ISL_TILING_LINEAR_BIT);
181 assert(ok);
182 }
183
184 /* Pick something high enough that it won't be used in core and low enough it
185 * will never map to an extension.
186 */
187 #define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
188
189 static struct blorp_address
190 anv_to_blorp_address(struct anv_address addr)
191 {
192 return (struct blorp_address) {
193 .buffer = addr.bo,
194 .offset = addr.offset,
195 };
196 }
197
198 static void
199 get_blorp_surf_for_anv_image(const struct anv_device *device,
200 const struct anv_image *image,
201 VkImageAspectFlags aspect,
202 VkImageUsageFlags usage,
203 VkImageLayout layout,
204 enum isl_aux_usage aux_usage,
205 struct blorp_surf *blorp_surf)
206 {
207 uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
208
209 if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
210 assert(usage != 0);
211 aux_usage = anv_layout_to_aux_usage(&device->info, image,
212 aspect, usage, layout);
213 }
214
215 const struct anv_surface *surface = &image->planes[plane].surface;
216 *blorp_surf = (struct blorp_surf) {
217 .surf = &surface->isl,
218 .addr = {
219 .buffer = image->planes[plane].address.bo,
220 .offset = image->planes[plane].address.offset + surface->offset,
221 .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
222 },
223 };
224
225 if (aux_usage != ISL_AUX_USAGE_NONE) {
226 const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
227 blorp_surf->aux_surf = &aux_surface->isl,
228 blorp_surf->aux_addr = (struct blorp_address) {
229 .buffer = image->planes[plane].address.bo,
230 .offset = image->planes[plane].address.offset + aux_surface->offset,
231 .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
232 };
233 blorp_surf->aux_usage = aux_usage;
234
235 /* If we're doing a partial resolve, then we need the indirect clear
236 * color. If we are doing a fast clear and want to store/update the
237 * clear color, we also pass the address to blorp, otherwise it will only
238 * stomp the CCS to a particular value and won't care about format or
239 * clear value
240 */
241 if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
242 const struct anv_address clear_color_addr =
243 anv_image_get_clear_color_addr(device, image, aspect);
244 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
245 } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
246 if (device->info.gen >= 10) {
247 /* Vulkan always clears to 1.0. On gen < 10, we set that directly
248 * in the state packet. For gen >= 10, must provide the clear
249 * value in a buffer. We have a single global buffer that stores
250 * the 1.0 value.
251 */
252 const struct anv_address clear_color_addr = (struct anv_address) {
253 .bo = device->hiz_clear_bo,
254 };
255 blorp_surf->clear_color_addr =
256 anv_to_blorp_address(clear_color_addr);
257 } else {
258 blorp_surf->clear_color = (union isl_color_value) {
259 .f32 = { ANV_HZ_FC_VAL },
260 };
261 }
262 }
263 }
264 }
265
266 static bool
267 get_blorp_surf_for_anv_shadow_image(const struct anv_device *device,
268 const struct anv_image *image,
269 VkImageAspectFlags aspect,
270 struct blorp_surf *blorp_surf)
271 {
272
273 uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
274 if (image->planes[plane].shadow_surface.isl.size_B == 0)
275 return false;
276
277 *blorp_surf = (struct blorp_surf) {
278 .surf = &image->planes[plane].shadow_surface.isl,
279 .addr = {
280 .buffer = image->planes[plane].address.bo,
281 .offset = image->planes[plane].address.offset +
282 image->planes[plane].shadow_surface.offset,
283 .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
284 },
285 };
286
287 return true;
288 }
289
290 void anv_CmdCopyImage(
291 VkCommandBuffer commandBuffer,
292 VkImage srcImage,
293 VkImageLayout srcImageLayout,
294 VkImage dstImage,
295 VkImageLayout dstImageLayout,
296 uint32_t regionCount,
297 const VkImageCopy* pRegions)
298 {
299 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
300 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
301 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
302
303 struct blorp_batch batch;
304 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
305
306 for (unsigned r = 0; r < regionCount; r++) {
307 VkOffset3D srcOffset =
308 anv_sanitize_image_offset(src_image->type, pRegions[r].srcOffset);
309 VkOffset3D dstOffset =
310 anv_sanitize_image_offset(dst_image->type, pRegions[r].dstOffset);
311 VkExtent3D extent =
312 anv_sanitize_image_extent(src_image->type, pRegions[r].extent);
313
314 const uint32_t dst_level = pRegions[r].dstSubresource.mipLevel;
315 unsigned dst_base_layer, layer_count;
316 if (dst_image->type == VK_IMAGE_TYPE_3D) {
317 dst_base_layer = pRegions[r].dstOffset.z;
318 layer_count = pRegions[r].extent.depth;
319 } else {
320 dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer;
321 layer_count =
322 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
323 }
324
325 const uint32_t src_level = pRegions[r].srcSubresource.mipLevel;
326 unsigned src_base_layer;
327 if (src_image->type == VK_IMAGE_TYPE_3D) {
328 src_base_layer = pRegions[r].srcOffset.z;
329 } else {
330 src_base_layer = pRegions[r].srcSubresource.baseArrayLayer;
331 assert(layer_count ==
332 anv_get_layerCount(src_image, &pRegions[r].srcSubresource));
333 }
334
335 VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask,
336 dst_mask = pRegions[r].dstSubresource.aspectMask;
337
338 assert(anv_image_aspects_compatible(src_mask, dst_mask));
339
340 if (util_bitcount(src_mask) > 1) {
341 uint32_t aspect_bit;
342 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
343 struct blorp_surf src_surf, dst_surf;
344 get_blorp_surf_for_anv_image(cmd_buffer->device,
345 src_image, 1UL << aspect_bit,
346 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
347 srcImageLayout, ISL_AUX_USAGE_NONE,
348 &src_surf);
349 get_blorp_surf_for_anv_image(cmd_buffer->device,
350 dst_image, 1UL << aspect_bit,
351 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
352 dstImageLayout, ISL_AUX_USAGE_NONE,
353 &dst_surf);
354 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
355 1UL << aspect_bit,
356 dst_surf.aux_usage, dst_level,
357 dst_base_layer, layer_count);
358
359 for (unsigned i = 0; i < layer_count; i++) {
360 blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
361 &dst_surf, dst_level, dst_base_layer + i,
362 srcOffset.x, srcOffset.y,
363 dstOffset.x, dstOffset.y,
364 extent.width, extent.height);
365 }
366
367 struct blorp_surf dst_shadow_surf;
368 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
369 dst_image,
370 1UL << aspect_bit,
371 &dst_shadow_surf)) {
372 for (unsigned i = 0; i < layer_count; i++) {
373 blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
374 &dst_shadow_surf, dst_level, dst_base_layer + i,
375 srcOffset.x, srcOffset.y,
376 dstOffset.x, dstOffset.y,
377 extent.width, extent.height);
378 }
379 }
380 }
381 } else {
382 struct blorp_surf src_surf, dst_surf;
383 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask,
384 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
385 srcImageLayout, ISL_AUX_USAGE_NONE,
386 &src_surf);
387 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask,
388 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
389 dstImageLayout, ISL_AUX_USAGE_NONE,
390 &dst_surf);
391 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
392 dst_surf.aux_usage, dst_level,
393 dst_base_layer, layer_count);
394
395 for (unsigned i = 0; i < layer_count; i++) {
396 blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
397 &dst_surf, dst_level, dst_base_layer + i,
398 srcOffset.x, srcOffset.y,
399 dstOffset.x, dstOffset.y,
400 extent.width, extent.height);
401 }
402
403 struct blorp_surf dst_shadow_surf;
404 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
405 dst_image, dst_mask,
406 &dst_shadow_surf)) {
407 for (unsigned i = 0; i < layer_count; i++) {
408 blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
409 &dst_shadow_surf, dst_level, dst_base_layer + i,
410 srcOffset.x, srcOffset.y,
411 dstOffset.x, dstOffset.y,
412 extent.width, extent.height);
413 }
414 }
415 }
416 }
417
418 blorp_batch_finish(&batch);
419 }
420
421 static enum isl_format
422 isl_format_for_size(unsigned size_B)
423 {
424 /* Prefer 32-bit per component formats for CmdFillBuffer */
425 switch (size_B) {
426 case 1: return ISL_FORMAT_R8_UINT;
427 case 2: return ISL_FORMAT_R16_UINT;
428 case 3: return ISL_FORMAT_R8G8B8_UINT;
429 case 4: return ISL_FORMAT_R32_UINT;
430 case 6: return ISL_FORMAT_R16G16B16_UINT;
431 case 8: return ISL_FORMAT_R32G32_UINT;
432 case 12: return ISL_FORMAT_R32G32B32_UINT;
433 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
434 default:
435 unreachable("Unknown format size");
436 }
437 }
438
439 static void
440 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
441 struct anv_buffer *anv_buffer,
442 struct anv_image *anv_image,
443 VkImageLayout image_layout,
444 uint32_t regionCount,
445 const VkBufferImageCopy* pRegions,
446 bool buffer_to_image)
447 {
448 struct blorp_batch batch;
449 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
450
451 struct {
452 struct blorp_surf surf;
453 uint32_t level;
454 VkOffset3D offset;
455 } image, buffer, *src, *dst;
456
457 buffer.level = 0;
458 buffer.offset = (VkOffset3D) { 0, 0, 0 };
459
460 if (buffer_to_image) {
461 src = &buffer;
462 dst = &image;
463 } else {
464 src = &image;
465 dst = &buffer;
466 }
467
468 for (unsigned r = 0; r < regionCount; r++) {
469 const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
470
471 get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect,
472 buffer_to_image ?
473 VK_IMAGE_USAGE_TRANSFER_DST_BIT :
474 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
475 image_layout, ISL_AUX_USAGE_NONE,
476 &image.surf);
477 image.offset =
478 anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset);
479 image.level = pRegions[r].imageSubresource.mipLevel;
480
481 VkExtent3D extent =
482 anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent);
483 if (anv_image->type != VK_IMAGE_TYPE_3D) {
484 image.offset.z = pRegions[r].imageSubresource.baseArrayLayer;
485 extent.depth =
486 anv_get_layerCount(anv_image, &pRegions[r].imageSubresource);
487 }
488
489 const enum isl_format linear_format =
490 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format,
491 aspect, VK_IMAGE_TILING_LINEAR);
492 const struct isl_format_layout *linear_fmtl =
493 isl_format_get_layout(linear_format);
494
495 const uint32_t buffer_row_length =
496 pRegions[r].bufferRowLength ?
497 pRegions[r].bufferRowLength : extent.width;
498
499 const uint32_t buffer_image_height =
500 pRegions[r].bufferImageHeight ?
501 pRegions[r].bufferImageHeight : extent.height;
502
503 const uint32_t buffer_row_pitch =
504 DIV_ROUND_UP(buffer_row_length, linear_fmtl->bw) *
505 (linear_fmtl->bpb / 8);
506
507 const uint32_t buffer_layer_stride =
508 DIV_ROUND_UP(buffer_image_height, linear_fmtl->bh) *
509 buffer_row_pitch;
510
511 /* Some formats have additional restrictions which may cause ISL to
512 * fail to create a surface for us. Some examples include:
513 *
514 * 1. ASTC formats are not allowed to be LINEAR and must be tiled
515 * 2. YCbCr formats have to have 2-pixel aligned strides
516 *
517 * To avoid these issues, we always bind the buffer as if it's a
518 * "normal" format like RGBA32_UINT. Since we're using blorp_copy,
519 * the format doesn't matter as long as it has the right bpb.
520 */
521 const VkExtent2D buffer_extent = {
522 .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
523 .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
524 };
525 const enum isl_format buffer_format =
526 isl_format_for_size(linear_fmtl->bpb / 8);
527
528 struct isl_surf buffer_isl_surf;
529 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
530 anv_buffer, pRegions[r].bufferOffset,
531 buffer_extent.width, buffer_extent.height,
532 buffer_row_pitch, buffer_format,
533 &buffer.surf, &buffer_isl_surf);
534
535 bool dst_has_shadow = false;
536 struct blorp_surf dst_shadow_surf;
537 if (&image == dst) {
538 anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
539 aspect, dst->surf.aux_usage,
540 dst->level,
541 dst->offset.z, extent.depth);
542
543 dst_has_shadow =
544 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
545 anv_image, aspect,
546 &dst_shadow_surf);
547 }
548
549 for (unsigned z = 0; z < extent.depth; z++) {
550 blorp_copy(&batch, &src->surf, src->level, src->offset.z,
551 &dst->surf, dst->level, dst->offset.z,
552 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
553 extent.width, extent.height);
554
555 if (dst_has_shadow) {
556 blorp_copy(&batch, &src->surf, src->level, src->offset.z,
557 &dst_shadow_surf, dst->level, dst->offset.z,
558 src->offset.x, src->offset.y,
559 dst->offset.x, dst->offset.y,
560 extent.width, extent.height);
561 }
562
563 image.offset.z++;
564 buffer.surf.addr.offset += buffer_layer_stride;
565 }
566 }
567
568 blorp_batch_finish(&batch);
569 }
570
571 void anv_CmdCopyBufferToImage(
572 VkCommandBuffer commandBuffer,
573 VkBuffer srcBuffer,
574 VkImage dstImage,
575 VkImageLayout dstImageLayout,
576 uint32_t regionCount,
577 const VkBufferImageCopy* pRegions)
578 {
579 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
580 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
581 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
582
583 copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, dstImageLayout,
584 regionCount, pRegions, true);
585 }
586
587 void anv_CmdCopyImageToBuffer(
588 VkCommandBuffer commandBuffer,
589 VkImage srcImage,
590 VkImageLayout srcImageLayout,
591 VkBuffer dstBuffer,
592 uint32_t regionCount,
593 const VkBufferImageCopy* pRegions)
594 {
595 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
596 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
597 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
598
599 copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, srcImageLayout,
600 regionCount, pRegions, false);
601
602 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
603 }
604
605 static bool
606 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
607 {
608 bool flip = false;
609 if (*src0 > *src1) {
610 unsigned tmp = *src0;
611 *src0 = *src1;
612 *src1 = tmp;
613 flip = !flip;
614 }
615
616 if (*dst0 > *dst1) {
617 unsigned tmp = *dst0;
618 *dst0 = *dst1;
619 *dst1 = tmp;
620 flip = !flip;
621 }
622
623 return flip;
624 }
625
626 void anv_CmdBlitImage(
627 VkCommandBuffer commandBuffer,
628 VkImage srcImage,
629 VkImageLayout srcImageLayout,
630 VkImage dstImage,
631 VkImageLayout dstImageLayout,
632 uint32_t regionCount,
633 const VkImageBlit* pRegions,
634 VkFilter filter)
635
636 {
637 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
638 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
639 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
640
641 struct blorp_surf src, dst;
642
643 enum blorp_filter blorp_filter;
644 switch (filter) {
645 case VK_FILTER_NEAREST:
646 blorp_filter = BLORP_FILTER_NEAREST;
647 break;
648 case VK_FILTER_LINEAR:
649 blorp_filter = BLORP_FILTER_BILINEAR;
650 break;
651 default:
652 unreachable("Invalid filter");
653 }
654
655 struct blorp_batch batch;
656 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
657
658 for (unsigned r = 0; r < regionCount; r++) {
659 const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
660 const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
661
662 assert(anv_image_aspects_compatible(src_res->aspectMask,
663 dst_res->aspectMask));
664
665 uint32_t aspect_bit;
666 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
667 get_blorp_surf_for_anv_image(cmd_buffer->device,
668 src_image, 1U << aspect_bit,
669 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
670 srcImageLayout, ISL_AUX_USAGE_NONE, &src);
671 get_blorp_surf_for_anv_image(cmd_buffer->device,
672 dst_image, 1U << aspect_bit,
673 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
674 dstImageLayout, ISL_AUX_USAGE_NONE, &dst);
675
676 struct anv_format_plane src_format =
677 anv_get_format_plane(&cmd_buffer->device->info, src_image->vk_format,
678 1U << aspect_bit, src_image->tiling);
679 struct anv_format_plane dst_format =
680 anv_get_format_plane(&cmd_buffer->device->info, dst_image->vk_format,
681 1U << aspect_bit, dst_image->tiling);
682
683 unsigned dst_start, dst_end;
684 if (dst_image->type == VK_IMAGE_TYPE_3D) {
685 assert(dst_res->baseArrayLayer == 0);
686 dst_start = pRegions[r].dstOffsets[0].z;
687 dst_end = pRegions[r].dstOffsets[1].z;
688 } else {
689 dst_start = dst_res->baseArrayLayer;
690 dst_end = dst_start + anv_get_layerCount(dst_image, dst_res);
691 }
692
693 unsigned src_start, src_end;
694 if (src_image->type == VK_IMAGE_TYPE_3D) {
695 assert(src_res->baseArrayLayer == 0);
696 src_start = pRegions[r].srcOffsets[0].z;
697 src_end = pRegions[r].srcOffsets[1].z;
698 } else {
699 src_start = src_res->baseArrayLayer;
700 src_end = src_start + anv_get_layerCount(src_image, src_res);
701 }
702
703 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
704 float src_z_step = (float)(src_end + 1 - src_start) /
705 (float)(dst_end + 1 - dst_start);
706
707 if (flip_z) {
708 src_start = src_end;
709 src_z_step *= -1;
710 }
711
712 unsigned src_x0 = pRegions[r].srcOffsets[0].x;
713 unsigned src_x1 = pRegions[r].srcOffsets[1].x;
714 unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
715 unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
716 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
717
718 unsigned src_y0 = pRegions[r].srcOffsets[0].y;
719 unsigned src_y1 = pRegions[r].srcOffsets[1].y;
720 unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
721 unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
722 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
723
724 const unsigned num_layers = dst_end - dst_start;
725 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
726 1U << aspect_bit,
727 dst.aux_usage,
728 dst_res->mipLevel,
729 dst_start, num_layers);
730
731 for (unsigned i = 0; i < num_layers; i++) {
732 unsigned dst_z = dst_start + i;
733 unsigned src_z = src_start + i * src_z_step;
734
735 blorp_blit(&batch, &src, src_res->mipLevel, src_z,
736 src_format.isl_format, src_format.swizzle,
737 &dst, dst_res->mipLevel, dst_z,
738 dst_format.isl_format, dst_format.swizzle,
739 src_x0, src_y0, src_x1, src_y1,
740 dst_x0, dst_y0, dst_x1, dst_y1,
741 blorp_filter, flip_x, flip_y);
742 }
743 }
744 }
745
746 blorp_batch_finish(&batch);
747 }
748
749 /**
750 * Returns the greatest common divisor of a and b that is a power of two.
751 */
752 static uint64_t
753 gcd_pow2_u64(uint64_t a, uint64_t b)
754 {
755 assert(a > 0 || b > 0);
756
757 unsigned a_log2 = ffsll(a) - 1;
758 unsigned b_log2 = ffsll(b) - 1;
759
760 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
761 * case, the MIN2() will take the other one. If both are 0 then we will
762 * hit the assert above.
763 */
764 return 1 << MIN2(a_log2, b_log2);
765 }
766
767 /* This is maximum possible width/height our HW can handle */
768 #define MAX_SURFACE_DIM (1ull << 14)
769
770 void anv_CmdCopyBuffer(
771 VkCommandBuffer commandBuffer,
772 VkBuffer srcBuffer,
773 VkBuffer dstBuffer,
774 uint32_t regionCount,
775 const VkBufferCopy* pRegions)
776 {
777 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
778 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
779 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
780
781 struct blorp_batch batch;
782 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
783
784 for (unsigned r = 0; r < regionCount; r++) {
785 struct blorp_address src = {
786 .buffer = src_buffer->address.bo,
787 .offset = src_buffer->address.offset + pRegions[r].srcOffset,
788 .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo),
789 };
790 struct blorp_address dst = {
791 .buffer = dst_buffer->address.bo,
792 .offset = dst_buffer->address.offset + pRegions[r].dstOffset,
793 .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
794 };
795
796 blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
797 }
798
799 blorp_batch_finish(&batch);
800
801 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
802 }
803
804 void anv_CmdUpdateBuffer(
805 VkCommandBuffer commandBuffer,
806 VkBuffer dstBuffer,
807 VkDeviceSize dstOffset,
808 VkDeviceSize dataSize,
809 const void* pData)
810 {
811 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
812 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
813
814 struct blorp_batch batch;
815 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
816
817 /* We can't quite grab a full block because the state stream needs a
818 * little data at the top to build its linked list.
819 */
820 const uint32_t max_update_size =
821 cmd_buffer->device->dynamic_state_pool.block_size - 64;
822
823 assert(max_update_size < MAX_SURFACE_DIM * 4);
824
825 /* We're about to read data that was written from the CPU. Flush the
826 * texture cache so we don't get anything stale.
827 */
828 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
829
830 while (dataSize) {
831 const uint32_t copy_size = MIN2(dataSize, max_update_size);
832
833 struct anv_state tmp_data =
834 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
835
836 memcpy(tmp_data.map, pData, copy_size);
837
838 struct blorp_address src = {
839 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
840 .offset = tmp_data.offset,
841 .mocs = cmd_buffer->device->isl_dev.mocs.internal,
842 };
843 struct blorp_address dst = {
844 .buffer = dst_buffer->address.bo,
845 .offset = dst_buffer->address.offset + dstOffset,
846 .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
847 };
848
849 blorp_buffer_copy(&batch, src, dst, copy_size);
850
851 dataSize -= copy_size;
852 dstOffset += copy_size;
853 pData = (void *)pData + copy_size;
854 }
855
856 blorp_batch_finish(&batch);
857
858 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
859 }
860
861 void anv_CmdFillBuffer(
862 VkCommandBuffer commandBuffer,
863 VkBuffer dstBuffer,
864 VkDeviceSize dstOffset,
865 VkDeviceSize fillSize,
866 uint32_t data)
867 {
868 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
869 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
870 struct blorp_surf surf;
871 struct isl_surf isl_surf;
872
873 struct blorp_batch batch;
874 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
875
876 fillSize = anv_buffer_get_range(dst_buffer, dstOffset, fillSize);
877
878 /* From the Vulkan spec:
879 *
880 * "size is the number of bytes to fill, and must be either a multiple
881 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
882 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
883 * buffer is not a multiple of 4, then the nearest smaller multiple is
884 * used."
885 */
886 fillSize &= ~3ull;
887
888 /* First, we compute the biggest format that can be used with the
889 * given offsets and size.
890 */
891 int bs = 16;
892 bs = gcd_pow2_u64(bs, dstOffset);
893 bs = gcd_pow2_u64(bs, fillSize);
894 enum isl_format isl_format = isl_format_for_size(bs);
895
896 union isl_color_value color = {
897 .u32 = { data, data, data, data },
898 };
899
900 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
901 while (fillSize >= max_fill_size) {
902 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
903 dst_buffer, dstOffset,
904 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
905 MAX_SURFACE_DIM * bs, isl_format,
906 &surf, &isl_surf);
907
908 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
909 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
910 color, NULL);
911 fillSize -= max_fill_size;
912 dstOffset += max_fill_size;
913 }
914
915 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
916 assert(height < MAX_SURFACE_DIM);
917 if (height != 0) {
918 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
919 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
920 dst_buffer, dstOffset,
921 MAX_SURFACE_DIM, height,
922 MAX_SURFACE_DIM * bs, isl_format,
923 &surf, &isl_surf);
924
925 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
926 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
927 color, NULL);
928 fillSize -= rect_fill_size;
929 dstOffset += rect_fill_size;
930 }
931
932 if (fillSize != 0) {
933 const uint32_t width = fillSize / bs;
934 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
935 dst_buffer, dstOffset,
936 width, 1,
937 width * bs, isl_format,
938 &surf, &isl_surf);
939
940 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
941 0, 0, 1, 0, 0, width, 1,
942 color, NULL);
943 }
944
945 blorp_batch_finish(&batch);
946
947 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
948 }
949
950 void anv_CmdClearColorImage(
951 VkCommandBuffer commandBuffer,
952 VkImage _image,
953 VkImageLayout imageLayout,
954 const VkClearColorValue* pColor,
955 uint32_t rangeCount,
956 const VkImageSubresourceRange* pRanges)
957 {
958 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
959 ANV_FROM_HANDLE(anv_image, image, _image);
960
961 static const bool color_write_disable[4] = { false, false, false, false };
962
963 struct blorp_batch batch;
964 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
965
966
967 for (unsigned r = 0; r < rangeCount; r++) {
968 if (pRanges[r].aspectMask == 0)
969 continue;
970
971 assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
972
973 struct blorp_surf surf;
974 get_blorp_surf_for_anv_image(cmd_buffer->device,
975 image, pRanges[r].aspectMask,
976 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
977 imageLayout, ISL_AUX_USAGE_NONE, &surf);
978
979 struct anv_format_plane src_format =
980 anv_get_format_plane(&cmd_buffer->device->info, image->vk_format,
981 VK_IMAGE_ASPECT_COLOR_BIT, image->tiling);
982
983 unsigned base_layer = pRanges[r].baseArrayLayer;
984 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]);
985
986 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
987 const unsigned level = pRanges[r].baseMipLevel + i;
988 const unsigned level_width = anv_minify(image->extent.width, level);
989 const unsigned level_height = anv_minify(image->extent.height, level);
990
991 if (image->type == VK_IMAGE_TYPE_3D) {
992 base_layer = 0;
993 layer_count = anv_minify(image->extent.depth, level);
994 }
995
996 anv_cmd_buffer_mark_image_written(cmd_buffer, image,
997 pRanges[r].aspectMask,
998 surf.aux_usage, level,
999 base_layer, layer_count);
1000
1001 blorp_clear(&batch, &surf,
1002 src_format.isl_format, src_format.swizzle,
1003 level, base_layer, layer_count,
1004 0, 0, level_width, level_height,
1005 vk_to_isl_color(*pColor), color_write_disable);
1006 }
1007 }
1008
1009 blorp_batch_finish(&batch);
1010 }
1011
1012 void anv_CmdClearDepthStencilImage(
1013 VkCommandBuffer commandBuffer,
1014 VkImage image_h,
1015 VkImageLayout imageLayout,
1016 const VkClearDepthStencilValue* pDepthStencil,
1017 uint32_t rangeCount,
1018 const VkImageSubresourceRange* pRanges)
1019 {
1020 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1021 ANV_FROM_HANDLE(anv_image, image, image_h);
1022
1023 struct blorp_batch batch;
1024 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1025
1026 struct blorp_surf depth, stencil, stencil_shadow;
1027 if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1028 get_blorp_surf_for_anv_image(cmd_buffer->device,
1029 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1030 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1031 imageLayout, ISL_AUX_USAGE_NONE, &depth);
1032 } else {
1033 memset(&depth, 0, sizeof(depth));
1034 }
1035
1036 bool has_stencil_shadow = false;
1037 if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1038 get_blorp_surf_for_anv_image(cmd_buffer->device,
1039 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1040 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1041 imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1042
1043 has_stencil_shadow =
1044 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1045 VK_IMAGE_ASPECT_STENCIL_BIT,
1046 &stencil_shadow);
1047 } else {
1048 memset(&stencil, 0, sizeof(stencil));
1049 }
1050
1051 for (unsigned r = 0; r < rangeCount; r++) {
1052 if (pRanges[r].aspectMask == 0)
1053 continue;
1054
1055 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1056 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1057
1058 unsigned base_layer = pRanges[r].baseArrayLayer;
1059 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]);
1060
1061 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
1062 const unsigned level = pRanges[r].baseMipLevel + i;
1063 const unsigned level_width = anv_minify(image->extent.width, level);
1064 const unsigned level_height = anv_minify(image->extent.height, level);
1065
1066 if (image->type == VK_IMAGE_TYPE_3D)
1067 layer_count = anv_minify(image->extent.depth, level);
1068
1069 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1070 level, base_layer, layer_count,
1071 0, 0, level_width, level_height,
1072 clear_depth, pDepthStencil->depth,
1073 clear_stencil ? 0xff : 0,
1074 pDepthStencil->stencil);
1075
1076 if (clear_stencil && has_stencil_shadow) {
1077 union isl_color_value stencil_color = {
1078 .u32 = { pDepthStencil->stencil, },
1079 };
1080 blorp_clear(&batch, &stencil_shadow,
1081 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1082 level, base_layer, layer_count,
1083 0, 0, level_width, level_height,
1084 stencil_color, NULL);
1085 }
1086 }
1087 }
1088
1089 blorp_batch_finish(&batch);
1090 }
1091
1092 VkResult
1093 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1094 uint32_t num_entries,
1095 uint32_t *state_offset,
1096 struct anv_state *bt_state)
1097 {
1098 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1099 state_offset);
1100 if (bt_state->map == NULL) {
1101 /* We ran out of space. Grab a new binding table block. */
1102 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1103 if (result != VK_SUCCESS)
1104 return result;
1105
1106 /* Re-emit state base addresses so we get the new surface state base
1107 * address before we start emitting binding tables etc.
1108 */
1109 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
1110
1111 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1112 state_offset);
1113 assert(bt_state->map != NULL);
1114 }
1115
1116 return VK_SUCCESS;
1117 }
1118
1119 static VkResult
1120 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1121 struct anv_state surface_state,
1122 uint32_t *bt_offset)
1123 {
1124 uint32_t state_offset;
1125 struct anv_state bt_state;
1126
1127 VkResult result =
1128 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1129 &bt_state);
1130 if (result != VK_SUCCESS)
1131 return result;
1132
1133 uint32_t *bt_map = bt_state.map;
1134 bt_map[0] = surface_state.offset + state_offset;
1135
1136 *bt_offset = bt_state.offset;
1137 return VK_SUCCESS;
1138 }
1139
1140 static void
1141 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1142 struct blorp_batch *batch,
1143 const VkClearAttachment *attachment,
1144 uint32_t rectCount, const VkClearRect *pRects)
1145 {
1146 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
1147 const uint32_t color_att = attachment->colorAttachment;
1148 assert(color_att < subpass->color_count);
1149 const uint32_t att_idx = subpass->color_attachments[color_att].attachment;
1150
1151 if (att_idx == VK_ATTACHMENT_UNUSED)
1152 return;
1153
1154 struct anv_render_pass_attachment *pass_att =
1155 &cmd_buffer->state.pass->attachments[att_idx];
1156 struct anv_attachment_state *att_state =
1157 &cmd_buffer->state.attachments[att_idx];
1158
1159 uint32_t binding_table;
1160 VkResult result =
1161 binding_table_for_surface_state(cmd_buffer, att_state->color.state,
1162 &binding_table);
1163 if (result != VK_SUCCESS)
1164 return;
1165
1166 union isl_color_value clear_color =
1167 vk_to_isl_color(attachment->clearValue.color);
1168
1169 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1170 if (subpass->view_mask) {
1171 uint32_t view_idx;
1172 for_each_bit(view_idx, subpass->view_mask) {
1173 for (uint32_t r = 0; r < rectCount; ++r) {
1174 const VkOffset2D offset = pRects[r].rect.offset;
1175 const VkExtent2D extent = pRects[r].rect.extent;
1176 blorp_clear_attachments(batch, binding_table,
1177 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
1178 view_idx, 1,
1179 offset.x, offset.y,
1180 offset.x + extent.width,
1181 offset.y + extent.height,
1182 true, clear_color, false, 0.0f, 0, 0);
1183 }
1184 }
1185 return;
1186 }
1187
1188 for (uint32_t r = 0; r < rectCount; ++r) {
1189 const VkOffset2D offset = pRects[r].rect.offset;
1190 const VkExtent2D extent = pRects[r].rect.extent;
1191 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1192 blorp_clear_attachments(batch, binding_table,
1193 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
1194 pRects[r].baseArrayLayer,
1195 pRects[r].layerCount,
1196 offset.x, offset.y,
1197 offset.x + extent.width, offset.y + extent.height,
1198 true, clear_color, false, 0.0f, 0, 0);
1199 }
1200 }
1201
1202 static void
1203 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1204 struct blorp_batch *batch,
1205 const VkClearAttachment *attachment,
1206 uint32_t rectCount, const VkClearRect *pRects)
1207 {
1208 static const union isl_color_value color_value = { .u32 = { 0, } };
1209 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
1210 if (!subpass->depth_stencil_attachment)
1211 return;
1212
1213 const uint32_t att_idx = subpass->depth_stencil_attachment->attachment;
1214 assert(att_idx != VK_ATTACHMENT_UNUSED);
1215 struct anv_render_pass_attachment *pass_att =
1216 &cmd_buffer->state.pass->attachments[att_idx];
1217
1218 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1219 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1220
1221 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1222 if (clear_depth) {
1223 depth_format = anv_get_isl_format(&cmd_buffer->device->info,
1224 pass_att->format,
1225 VK_IMAGE_ASPECT_DEPTH_BIT,
1226 VK_IMAGE_TILING_OPTIMAL);
1227 }
1228
1229 uint32_t binding_table;
1230 VkResult result =
1231 binding_table_for_surface_state(cmd_buffer,
1232 cmd_buffer->state.null_surface_state,
1233 &binding_table);
1234 if (result != VK_SUCCESS)
1235 return;
1236
1237 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1238 if (subpass->view_mask) {
1239 uint32_t view_idx;
1240 for_each_bit(view_idx, subpass->view_mask) {
1241 for (uint32_t r = 0; r < rectCount; ++r) {
1242 const VkOffset2D offset = pRects[r].rect.offset;
1243 const VkExtent2D extent = pRects[r].rect.extent;
1244 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1245 blorp_clear_attachments(batch, binding_table,
1246 depth_format, pass_att->samples,
1247 view_idx, 1,
1248 offset.x, offset.y,
1249 offset.x + extent.width,
1250 offset.y + extent.height,
1251 false, color_value,
1252 clear_depth, value.depth,
1253 clear_stencil ? 0xff : 0, value.stencil);
1254 }
1255 }
1256 return;
1257 }
1258
1259 for (uint32_t r = 0; r < rectCount; ++r) {
1260 const VkOffset2D offset = pRects[r].rect.offset;
1261 const VkExtent2D extent = pRects[r].rect.extent;
1262 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1263 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1264 blorp_clear_attachments(batch, binding_table,
1265 depth_format, pass_att->samples,
1266 pRects[r].baseArrayLayer,
1267 pRects[r].layerCount,
1268 offset.x, offset.y,
1269 offset.x + extent.width, offset.y + extent.height,
1270 false, color_value,
1271 clear_depth, value.depth,
1272 clear_stencil ? 0xff : 0, value.stencil);
1273 }
1274 }
1275
1276 void anv_CmdClearAttachments(
1277 VkCommandBuffer commandBuffer,
1278 uint32_t attachmentCount,
1279 const VkClearAttachment* pAttachments,
1280 uint32_t rectCount,
1281 const VkClearRect* pRects)
1282 {
1283 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1284
1285 /* Because this gets called within a render pass, we tell blorp not to
1286 * trash our depth and stencil buffers.
1287 */
1288 struct blorp_batch batch;
1289 enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
1290 if (cmd_buffer->state.conditional_render_enabled) {
1291 anv_cmd_emit_conditional_render_predicate(cmd_buffer);
1292 flags |= BLORP_BATCH_PREDICATE_ENABLE;
1293 }
1294 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, flags);
1295
1296 for (uint32_t a = 0; a < attachmentCount; ++a) {
1297 if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
1298 assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1299 clear_color_attachment(cmd_buffer, &batch,
1300 &pAttachments[a],
1301 rectCount, pRects);
1302 } else {
1303 clear_depth_stencil_attachment(cmd_buffer, &batch,
1304 &pAttachments[a],
1305 rectCount, pRects);
1306 }
1307 }
1308
1309 blorp_batch_finish(&batch);
1310 }
1311
1312 enum subpass_stage {
1313 SUBPASS_STAGE_LOAD,
1314 SUBPASS_STAGE_DRAW,
1315 SUBPASS_STAGE_RESOLVE,
1316 };
1317
1318 void
1319 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
1320 const struct anv_image *src_image,
1321 enum isl_aux_usage src_aux_usage,
1322 uint32_t src_level, uint32_t src_base_layer,
1323 const struct anv_image *dst_image,
1324 enum isl_aux_usage dst_aux_usage,
1325 uint32_t dst_level, uint32_t dst_base_layer,
1326 VkImageAspectFlagBits aspect,
1327 uint32_t src_x, uint32_t src_y,
1328 uint32_t dst_x, uint32_t dst_y,
1329 uint32_t width, uint32_t height,
1330 uint32_t layer_count,
1331 enum blorp_filter filter)
1332 {
1333 struct blorp_batch batch;
1334 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1335
1336 assert(src_image->type == VK_IMAGE_TYPE_2D);
1337 assert(src_image->samples > 1);
1338 assert(dst_image->type == VK_IMAGE_TYPE_2D);
1339 assert(dst_image->samples == 1);
1340 assert(src_image->n_planes == dst_image->n_planes);
1341 assert(!src_image->format->can_ycbcr);
1342 assert(!dst_image->format->can_ycbcr);
1343
1344 struct blorp_surf src_surf, dst_surf;
1345 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
1346 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1347 src_aux_usage, &src_surf);
1348 if (src_aux_usage == ISL_AUX_USAGE_MCS) {
1349 src_surf.clear_color_addr = anv_to_blorp_address(
1350 anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
1351 VK_IMAGE_ASPECT_COLOR_BIT));
1352 }
1353 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect,
1354 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1355 dst_aux_usage, &dst_surf);
1356 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
1357 aspect, dst_aux_usage,
1358 dst_level, dst_base_layer, layer_count);
1359
1360 if (filter == BLORP_FILTER_NONE) {
1361 /* If no explicit filter is provided, then it's implied by the type of
1362 * the source image.
1363 */
1364 if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
1365 (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
1366 isl_format_has_int_channel(src_surf.surf->format)) {
1367 filter = BLORP_FILTER_SAMPLE_0;
1368 } else {
1369 filter = BLORP_FILTER_AVERAGE;
1370 }
1371 }
1372
1373 for (uint32_t l = 0; l < layer_count; l++) {
1374 blorp_blit(&batch,
1375 &src_surf, src_level, src_base_layer + l,
1376 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1377 &dst_surf, dst_level, dst_base_layer + l,
1378 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1379 src_x, src_y, src_x + width, src_y + height,
1380 dst_x, dst_y, dst_x + width, dst_y + height,
1381 filter, false, false);
1382 }
1383
1384 blorp_batch_finish(&batch);
1385 }
1386
1387 void anv_CmdResolveImage(
1388 VkCommandBuffer commandBuffer,
1389 VkImage srcImage,
1390 VkImageLayout srcImageLayout,
1391 VkImage dstImage,
1392 VkImageLayout dstImageLayout,
1393 uint32_t regionCount,
1394 const VkImageResolve* pRegions)
1395 {
1396 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1397 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
1398 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
1399
1400 assert(!src_image->format->can_ycbcr);
1401
1402 for (uint32_t r = 0; r < regionCount; r++) {
1403 assert(pRegions[r].srcSubresource.aspectMask ==
1404 pRegions[r].dstSubresource.aspectMask);
1405 assert(anv_get_layerCount(src_image, &pRegions[r].srcSubresource) ==
1406 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource));
1407
1408 const uint32_t layer_count =
1409 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
1410
1411 uint32_t aspect_bit;
1412 anv_foreach_image_aspect_bit(aspect_bit, src_image,
1413 pRegions[r].srcSubresource.aspectMask) {
1414 enum isl_aux_usage src_aux_usage =
1415 anv_layout_to_aux_usage(&cmd_buffer->device->info, src_image,
1416 (1 << aspect_bit),
1417 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1418 srcImageLayout);
1419 enum isl_aux_usage dst_aux_usage =
1420 anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_image,
1421 (1 << aspect_bit),
1422 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1423 dstImageLayout);
1424
1425 anv_image_msaa_resolve(cmd_buffer,
1426 src_image, src_aux_usage,
1427 pRegions[r].srcSubresource.mipLevel,
1428 pRegions[r].srcSubresource.baseArrayLayer,
1429 dst_image, dst_aux_usage,
1430 pRegions[r].dstSubresource.mipLevel,
1431 pRegions[r].dstSubresource.baseArrayLayer,
1432 (1 << aspect_bit),
1433 pRegions[r].srcOffset.x,
1434 pRegions[r].srcOffset.y,
1435 pRegions[r].dstOffset.x,
1436 pRegions[r].dstOffset.y,
1437 pRegions[r].extent.width,
1438 pRegions[r].extent.height,
1439 layer_count, BLORP_FILTER_NONE);
1440 }
1441 }
1442 }
1443
1444 void
1445 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
1446 const struct anv_image *image,
1447 VkImageAspectFlagBits aspect,
1448 uint32_t base_level, uint32_t level_count,
1449 uint32_t base_layer, uint32_t layer_count)
1450 {
1451 struct blorp_batch batch;
1452 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1453
1454 /* We don't know who touched the main surface last so flush a bunch of
1455 * caches to ensure we get good data.
1456 */
1457 cmd_buffer->state.pending_pipe_bits |=
1458 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1459 ANV_PIPE_DATA_CACHE_FLUSH_BIT |
1460 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1461 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
1462
1463 struct blorp_surf surf;
1464 get_blorp_surf_for_anv_image(cmd_buffer->device,
1465 image, aspect,
1466 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1467 VK_IMAGE_LAYOUT_GENERAL,
1468 ISL_AUX_USAGE_NONE, &surf);
1469 assert(surf.aux_usage == ISL_AUX_USAGE_NONE);
1470
1471 struct blorp_surf shadow_surf;
1472 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
1473 image, aspect, &shadow_surf);
1474
1475 for (uint32_t l = 0; l < level_count; l++) {
1476 const uint32_t level = base_level + l;
1477
1478 const VkExtent3D extent = {
1479 .width = anv_minify(image->extent.width, level),
1480 .height = anv_minify(image->extent.height, level),
1481 .depth = anv_minify(image->extent.depth, level),
1482 };
1483
1484 if (image->type == VK_IMAGE_TYPE_3D)
1485 layer_count = extent.depth;
1486
1487 for (uint32_t a = 0; a < layer_count; a++) {
1488 const uint32_t layer = base_layer + a;
1489
1490 blorp_copy(&batch, &surf, level, layer,
1491 &shadow_surf, level, layer,
1492 0, 0, 0, 0, extent.width, extent.height);
1493 }
1494 }
1495
1496 /* We just wrote to the buffer with the render cache. Flush it. */
1497 cmd_buffer->state.pending_pipe_bits |=
1498 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
1499
1500 blorp_batch_finish(&batch);
1501 }
1502
1503 void
1504 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
1505 const struct anv_image *image,
1506 VkImageAspectFlagBits aspect,
1507 enum isl_aux_usage aux_usage,
1508 enum isl_format format, struct isl_swizzle swizzle,
1509 uint32_t level, uint32_t base_layer, uint32_t layer_count,
1510 VkRect2D area, union isl_color_value clear_color)
1511 {
1512 assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1513
1514 /* We don't support planar images with multisampling yet */
1515 assert(image->n_planes == 1);
1516
1517 struct blorp_batch batch;
1518 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1519
1520 struct blorp_surf surf;
1521 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1522 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1523 aux_usage, &surf);
1524 anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
1525 level, base_layer, layer_count);
1526
1527 blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
1528 level, base_layer, layer_count,
1529 area.offset.x, area.offset.y,
1530 area.offset.x + area.extent.width,
1531 area.offset.y + area.extent.height,
1532 clear_color, NULL);
1533
1534 blorp_batch_finish(&batch);
1535 }
1536
1537 void
1538 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1539 const struct anv_image *image,
1540 VkImageAspectFlags aspects,
1541 enum isl_aux_usage depth_aux_usage,
1542 uint32_t level,
1543 uint32_t base_layer, uint32_t layer_count,
1544 VkRect2D area,
1545 float depth_value, uint8_t stencil_value)
1546 {
1547 assert(image->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1548 VK_IMAGE_ASPECT_STENCIL_BIT));
1549
1550 struct blorp_batch batch;
1551 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1552
1553 struct blorp_surf depth = {};
1554 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1555 get_blorp_surf_for_anv_image(cmd_buffer->device,
1556 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1557 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1558 depth_aux_usage, &depth);
1559 depth.clear_color.f32[0] = ANV_HZ_FC_VAL;
1560 }
1561
1562 struct blorp_surf stencil = {};
1563 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1564 get_blorp_surf_for_anv_image(cmd_buffer->device,
1565 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1566 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1567 ISL_AUX_USAGE_NONE, &stencil);
1568 }
1569
1570 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1571 * performance. If it does this, we need to flush it out of the depth
1572 * cache before rendering to it.
1573 */
1574 cmd_buffer->state.pending_pipe_bits |=
1575 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1576
1577 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1578 level, base_layer, layer_count,
1579 area.offset.x, area.offset.y,
1580 area.offset.x + area.extent.width,
1581 area.offset.y + area.extent.height,
1582 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1583 depth_value,
1584 (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
1585 stencil_value);
1586
1587 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1588 * performance. If it does this, we need to flush it out of the render
1589 * cache before someone starts trying to do stencil on it.
1590 */
1591 cmd_buffer->state.pending_pipe_bits |=
1592 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1593
1594 struct blorp_surf stencil_shadow;
1595 if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1596 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1597 VK_IMAGE_ASPECT_STENCIL_BIT,
1598 &stencil_shadow)) {
1599 union isl_color_value stencil_color = {
1600 .u32 = { stencil_value },
1601 };
1602 blorp_clear(&batch, &stencil_shadow,
1603 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1604 level, base_layer, layer_count,
1605 area.offset.x, area.offset.y,
1606 area.offset.x + area.extent.width,
1607 area.offset.y + area.extent.height,
1608 stencil_color, NULL);
1609 }
1610
1611 blorp_batch_finish(&batch);
1612 }
1613
1614 void
1615 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
1616 const struct anv_image *image,
1617 VkImageAspectFlagBits aspect, uint32_t level,
1618 uint32_t base_layer, uint32_t layer_count,
1619 enum isl_aux_op hiz_op)
1620 {
1621 assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
1622 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
1623 assert(anv_image_aspect_to_plane(image->aspects,
1624 VK_IMAGE_ASPECT_DEPTH_BIT) == 0);
1625
1626 struct blorp_batch batch;
1627 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1628
1629 struct blorp_surf surf;
1630 get_blorp_surf_for_anv_image(cmd_buffer->device,
1631 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1632 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1633 ISL_AUX_USAGE_HIZ, &surf);
1634 surf.clear_color.f32[0] = ANV_HZ_FC_VAL;
1635
1636 blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
1637
1638 blorp_batch_finish(&batch);
1639 }
1640
1641 void
1642 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
1643 const struct anv_image *image,
1644 VkImageAspectFlags aspects,
1645 uint32_t level,
1646 uint32_t base_layer, uint32_t layer_count,
1647 VkRect2D area, uint8_t stencil_value)
1648 {
1649 assert(image->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1650 VK_IMAGE_ASPECT_STENCIL_BIT));
1651
1652 struct blorp_batch batch;
1653 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1654
1655 struct blorp_surf depth = {};
1656 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1657 assert(base_layer + layer_count <=
1658 anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1659 get_blorp_surf_for_anv_image(cmd_buffer->device,
1660 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1661 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1662 ISL_AUX_USAGE_HIZ, &depth);
1663 depth.clear_color.f32[0] = ANV_HZ_FC_VAL;
1664 }
1665
1666 struct blorp_surf stencil = {};
1667 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1668 get_blorp_surf_for_anv_image(cmd_buffer->device,
1669 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1670 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1671 ISL_AUX_USAGE_NONE, &stencil);
1672 }
1673
1674 /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1675 *
1676 * "The following is required when performing a depth buffer clear with
1677 * using the WM_STATE or 3DSTATE_WM:
1678 *
1679 * * If other rendering operations have preceded this clear, a
1680 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1681 * enabled must be issued before the rectangle primitive used for
1682 * the depth buffer clear operation.
1683 * * [...]"
1684 *
1685 * Even though the PRM only says that this is required if using 3DSTATE_WM
1686 * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1687 * hangs when doing a clear with WM_HZ_OP.
1688 */
1689 cmd_buffer->state.pending_pipe_bits |=
1690 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
1691
1692 blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil,
1693 level, base_layer, layer_count,
1694 area.offset.x, area.offset.y,
1695 area.offset.x + area.extent.width,
1696 area.offset.y + area.extent.height,
1697 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1698 ANV_HZ_FC_VAL,
1699 aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1700 stencil_value);
1701
1702 blorp_batch_finish(&batch);
1703
1704 /* From the SKL PRM, Depth Buffer Clear:
1705 *
1706 * "Depth Buffer Clear Workaround
1707 *
1708 * Depth buffer clear pass using any of the methods (WM_STATE,
1709 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1710 * command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1711 * starting to render. DepthStall and DepthFlush are not needed between
1712 * consecutive depth clear passes nor is it required if the depth-clear
1713 * pass was done with “full_surf_clear” bit set in the
1714 * 3DSTATE_WM_HZ_OP."
1715 *
1716 * Even though the PRM provides a bunch of conditions under which this is
1717 * supposedly unnecessary, we choose to perform the flush unconditionally
1718 * just to be safe.
1719 */
1720 cmd_buffer->state.pending_pipe_bits |=
1721 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
1722 }
1723
1724 void
1725 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1726 const struct anv_image *image,
1727 enum isl_format format,
1728 VkImageAspectFlagBits aspect,
1729 uint32_t base_layer, uint32_t layer_count,
1730 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
1731 bool predicate)
1732 {
1733 assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1734 assert(image->samples > 1);
1735 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1736
1737 /* Multisampling with multi-planar formats is not supported */
1738 assert(image->n_planes == 1);
1739
1740 struct blorp_batch batch;
1741 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1742 BLORP_BATCH_PREDICATE_ENABLE * predicate +
1743 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
1744
1745 struct blorp_surf surf;
1746 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1747 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1748 ISL_AUX_USAGE_MCS, &surf);
1749
1750 /* Blorp will store the clear color for us if we provide the clear color
1751 * address and we are doing a fast clear. So we save the clear value into
1752 * the blorp surface.
1753 */
1754 if (clear_value)
1755 surf.clear_color = *clear_value;
1756
1757 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1758 *
1759 * "After Render target fast clear, pipe-control with color cache
1760 * write-flush must be issued before sending any DRAW commands on
1761 * that render target."
1762 *
1763 * This comment is a bit cryptic and doesn't really tell you what's going
1764 * or what's really needed. It appears that fast clear ops are not
1765 * properly synchronized with other drawing. This means that we cannot
1766 * have a fast clear operation in the pipe at the same time as other
1767 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1768 * that the contents of the previous draw hit the render target before we
1769 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1770 * that it is completed before any additional drawing occurs.
1771 */
1772 cmd_buffer->state.pending_pipe_bits |=
1773 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1774
1775 switch (mcs_op) {
1776 case ISL_AUX_OP_FAST_CLEAR:
1777 blorp_fast_clear(&batch, &surf, format,
1778 0, base_layer, layer_count,
1779 0, 0, image->extent.width, image->extent.height);
1780 break;
1781 case ISL_AUX_OP_PARTIAL_RESOLVE:
1782 blorp_mcs_partial_resolve(&batch, &surf, format,
1783 base_layer, layer_count);
1784 break;
1785 case ISL_AUX_OP_FULL_RESOLVE:
1786 case ISL_AUX_OP_AMBIGUATE:
1787 default:
1788 unreachable("Unsupported MCS operation");
1789 }
1790
1791 cmd_buffer->state.pending_pipe_bits |=
1792 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1793
1794 blorp_batch_finish(&batch);
1795 }
1796
1797 void
1798 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1799 const struct anv_image *image,
1800 enum isl_format format,
1801 VkImageAspectFlagBits aspect, uint32_t level,
1802 uint32_t base_layer, uint32_t layer_count,
1803 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
1804 bool predicate)
1805 {
1806 assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1807 assert(image->samples == 1);
1808 assert(level < anv_image_aux_levels(image, aspect));
1809 /* Multi-LOD YcBcR is not allowed */
1810 assert(image->n_planes == 1 || level == 0);
1811 assert(base_layer + layer_count <=
1812 anv_image_aux_layers(image, aspect, level));
1813
1814 uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
1815 uint32_t width_div = image->format->planes[plane].denominator_scales[0];
1816 uint32_t height_div = image->format->planes[plane].denominator_scales[1];
1817 uint32_t level_width = anv_minify(image->extent.width, level) / width_div;
1818 uint32_t level_height = anv_minify(image->extent.height, level) / height_div;
1819
1820 struct blorp_batch batch;
1821 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1822 BLORP_BATCH_PREDICATE_ENABLE * predicate +
1823 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
1824
1825 struct blorp_surf surf;
1826 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1827 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1828 image->planes[plane].aux_usage,
1829 &surf);
1830
1831 /* Blorp will store the clear color for us if we provide the clear color
1832 * address and we are doing a fast clear. So we save the clear value into
1833 * the blorp surface.
1834 */
1835 if (clear_value)
1836 surf.clear_color = *clear_value;
1837
1838 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1839 *
1840 * "After Render target fast clear, pipe-control with color cache
1841 * write-flush must be issued before sending any DRAW commands on
1842 * that render target."
1843 *
1844 * This comment is a bit cryptic and doesn't really tell you what's going
1845 * or what's really needed. It appears that fast clear ops are not
1846 * properly synchronized with other drawing. This means that we cannot
1847 * have a fast clear operation in the pipe at the same time as other
1848 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1849 * that the contents of the previous draw hit the render target before we
1850 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1851 * that it is completed before any additional drawing occurs.
1852 */
1853 cmd_buffer->state.pending_pipe_bits |=
1854 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1855
1856 switch (ccs_op) {
1857 case ISL_AUX_OP_FAST_CLEAR:
1858 blorp_fast_clear(&batch, &surf, format,
1859 level, base_layer, layer_count,
1860 0, 0, level_width, level_height);
1861 break;
1862 case ISL_AUX_OP_FULL_RESOLVE:
1863 case ISL_AUX_OP_PARTIAL_RESOLVE:
1864 blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
1865 format, ccs_op);
1866 break;
1867 case ISL_AUX_OP_AMBIGUATE:
1868 for (uint32_t a = 0; a < layer_count; a++) {
1869 const uint32_t layer = base_layer + a;
1870 blorp_ccs_ambiguate(&batch, &surf, level, layer);
1871 }
1872 break;
1873 default:
1874 unreachable("Unsupported CCS operation");
1875 }
1876
1877 cmd_buffer->state.pending_pipe_bits |=
1878 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1879
1880 blorp_batch_finish(&batch);
1881 }