anv/cmd_buffer: Move the color portion of clear_subpass into begin_subpass
[mesa.git] / src / intel / vulkan / anv_blorp.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
27 lookup_blorp_shader(struct blorp_context *blorp,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct anv_device *device = blorp->driver_ctx;
32
33 /* The blorp cache must be a real cache */
34 assert(device->blorp_shader_cache.cache);
35
36 struct anv_shader_bin *bin =
37 anv_pipeline_cache_search(&device->blorp_shader_cache, key, key_size);
38 if (!bin)
39 return false;
40
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
43 */
44 anv_shader_bin_unref(device, bin);
45
46 *kernel_out = bin->kernel.offset;
47 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
48
49 return true;
50 }
51
52 static bool
53 upload_blorp_shader(struct blorp_context *blorp,
54 const void *key, uint32_t key_size,
55 const void *kernel, uint32_t kernel_size,
56 const struct brw_stage_prog_data *prog_data,
57 uint32_t prog_data_size,
58 uint32_t *kernel_out, void *prog_data_out)
59 {
60 struct anv_device *device = blorp->driver_ctx;
61
62 /* The blorp cache must be a real cache */
63 assert(device->blorp_shader_cache.cache);
64
65 struct anv_pipeline_bind_map bind_map = {
66 .surface_count = 0,
67 .sampler_count = 0,
68 };
69
70 struct anv_shader_bin *bin =
71 anv_pipeline_cache_upload_kernel(&device->blorp_shader_cache,
72 key, key_size, kernel, kernel_size,
73 prog_data, prog_data_size, &bind_map);
74
75 if (!bin)
76 return false;
77
78 /* The cache already has a reference and it's not going anywhere so there
79 * is no need to hold a second reference.
80 */
81 anv_shader_bin_unref(device, bin);
82
83 *kernel_out = bin->kernel.offset;
84 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
85
86 return true;
87 }
88
89 void
90 anv_device_init_blorp(struct anv_device *device)
91 {
92 anv_pipeline_cache_init(&device->blorp_shader_cache, device, true);
93 blorp_init(&device->blorp, device, &device->isl_dev);
94 device->blorp.compiler = device->instance->physicalDevice.compiler;
95 device->blorp.lookup_shader = lookup_blorp_shader;
96 device->blorp.upload_shader = upload_blorp_shader;
97 switch (device->info.gen) {
98 case 7:
99 if (device->info.is_haswell) {
100 device->blorp.exec = gen75_blorp_exec;
101 } else {
102 device->blorp.exec = gen7_blorp_exec;
103 }
104 break;
105 case 8:
106 device->blorp.exec = gen8_blorp_exec;
107 break;
108 case 9:
109 device->blorp.exec = gen9_blorp_exec;
110 break;
111 case 10:
112 device->blorp.exec = gen10_blorp_exec;
113 break;
114 case 11:
115 device->blorp.exec = gen11_blorp_exec;
116 break;
117 default:
118 unreachable("Unknown hardware generation");
119 }
120 }
121
122 void
123 anv_device_finish_blorp(struct anv_device *device)
124 {
125 blorp_finish(&device->blorp);
126 anv_pipeline_cache_finish(&device->blorp_shader_cache);
127 }
128
129 static void
130 get_blorp_surf_for_anv_buffer(struct anv_device *device,
131 struct anv_buffer *buffer, uint64_t offset,
132 uint32_t width, uint32_t height,
133 uint32_t row_pitch, enum isl_format format,
134 struct blorp_surf *blorp_surf,
135 struct isl_surf *isl_surf)
136 {
137 const struct isl_format_layout *fmtl =
138 isl_format_get_layout(format);
139 bool ok UNUSED;
140
141 /* ASTC is the only format which doesn't support linear layouts.
142 * Create an equivalently sized surface with ISL to get around this.
143 */
144 if (fmtl->txc == ISL_TXC_ASTC) {
145 /* Use an equivalently sized format */
146 format = ISL_FORMAT_R32G32B32A32_UINT;
147 assert(fmtl->bpb == isl_format_get_layout(format)->bpb);
148
149 /* Shrink the dimensions for the new format */
150 width = DIV_ROUND_UP(width, fmtl->bw);
151 height = DIV_ROUND_UP(height, fmtl->bh);
152 }
153
154 *blorp_surf = (struct blorp_surf) {
155 .surf = isl_surf,
156 .addr = {
157 .buffer = buffer->bo,
158 .offset = buffer->offset + offset,
159 .mocs = device->default_mocs,
160 },
161 };
162
163 ok = isl_surf_init(&device->isl_dev, isl_surf,
164 .dim = ISL_SURF_DIM_2D,
165 .format = format,
166 .width = width,
167 .height = height,
168 .depth = 1,
169 .levels = 1,
170 .array_len = 1,
171 .samples = 1,
172 .row_pitch = row_pitch,
173 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
174 ISL_SURF_USAGE_RENDER_TARGET_BIT,
175 .tiling_flags = ISL_TILING_LINEAR_BIT);
176 assert(ok);
177 }
178
179 #define ANV_AUX_USAGE_DEFAULT ((enum isl_aux_usage)0xff)
180
181 static struct blorp_address
182 anv_to_blorp_address(struct anv_address addr)
183 {
184 return (struct blorp_address) {
185 .buffer = addr.bo,
186 .offset = addr.offset,
187 };
188 }
189
190 static void
191 get_blorp_surf_for_anv_image(const struct anv_device *device,
192 const struct anv_image *image,
193 VkImageAspectFlags aspect,
194 enum isl_aux_usage aux_usage,
195 struct blorp_surf *blorp_surf)
196 {
197 uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
198
199 if (aux_usage == ANV_AUX_USAGE_DEFAULT) {
200 aux_usage = image->planes[plane].aux_usage;
201
202 /* Blorp copies and blits can't handle HiZ so disable it by default */
203 if (aux_usage == ISL_AUX_USAGE_HIZ)
204 aux_usage = ISL_AUX_USAGE_NONE;
205 }
206
207 const struct anv_surface *surface = &image->planes[plane].surface;
208 *blorp_surf = (struct blorp_surf) {
209 .surf = &surface->isl,
210 .addr = {
211 .buffer = image->planes[plane].bo,
212 .offset = image->planes[plane].bo_offset + surface->offset,
213 .mocs = device->default_mocs,
214 },
215 };
216
217 if (aux_usage != ISL_AUX_USAGE_NONE) {
218 const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
219 blorp_surf->aux_surf = &aux_surface->isl,
220 blorp_surf->aux_addr = (struct blorp_address) {
221 .buffer = image->planes[plane].bo,
222 .offset = image->planes[plane].bo_offset + aux_surface->offset,
223 .mocs = device->default_mocs,
224 };
225 blorp_surf->aux_usage = aux_usage;
226 }
227 }
228
229 void anv_CmdCopyImage(
230 VkCommandBuffer commandBuffer,
231 VkImage srcImage,
232 VkImageLayout srcImageLayout,
233 VkImage dstImage,
234 VkImageLayout dstImageLayout,
235 uint32_t regionCount,
236 const VkImageCopy* pRegions)
237 {
238 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
239 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
240 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
241
242 struct blorp_batch batch;
243 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
244
245 for (unsigned r = 0; r < regionCount; r++) {
246 VkOffset3D srcOffset =
247 anv_sanitize_image_offset(src_image->type, pRegions[r].srcOffset);
248 VkOffset3D dstOffset =
249 anv_sanitize_image_offset(dst_image->type, pRegions[r].dstOffset);
250 VkExtent3D extent =
251 anv_sanitize_image_extent(src_image->type, pRegions[r].extent);
252
253 const uint32_t dst_level = pRegions[r].dstSubresource.mipLevel;
254 unsigned dst_base_layer, layer_count;
255 if (dst_image->type == VK_IMAGE_TYPE_3D) {
256 dst_base_layer = pRegions[r].dstOffset.z;
257 layer_count = pRegions[r].extent.depth;
258 } else {
259 dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer;
260 layer_count =
261 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
262 }
263
264 const uint32_t src_level = pRegions[r].srcSubresource.mipLevel;
265 unsigned src_base_layer;
266 if (src_image->type == VK_IMAGE_TYPE_3D) {
267 src_base_layer = pRegions[r].srcOffset.z;
268 } else {
269 src_base_layer = pRegions[r].srcSubresource.baseArrayLayer;
270 assert(layer_count ==
271 anv_get_layerCount(src_image, &pRegions[r].srcSubresource));
272 }
273
274 VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask,
275 dst_mask = pRegions[r].dstSubresource.aspectMask;
276
277 assert(anv_image_aspects_compatible(src_mask, dst_mask));
278
279 if (_mesa_bitcount(src_mask) > 1) {
280 uint32_t aspect_bit;
281 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
282 struct blorp_surf src_surf, dst_surf;
283 get_blorp_surf_for_anv_image(cmd_buffer->device,
284 src_image, 1UL << aspect_bit,
285 ANV_AUX_USAGE_DEFAULT, &src_surf);
286 get_blorp_surf_for_anv_image(cmd_buffer->device,
287 dst_image, 1UL << aspect_bit,
288 ANV_AUX_USAGE_DEFAULT, &dst_surf);
289 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
290 1UL << aspect_bit,
291 dst_surf.aux_usage, dst_level,
292 dst_base_layer, layer_count);
293
294 for (unsigned i = 0; i < layer_count; i++) {
295 blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
296 &dst_surf, dst_level, dst_base_layer + i,
297 srcOffset.x, srcOffset.y,
298 dstOffset.x, dstOffset.y,
299 extent.width, extent.height);
300 }
301 }
302 } else {
303 struct blorp_surf src_surf, dst_surf;
304 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask,
305 ANV_AUX_USAGE_DEFAULT, &src_surf);
306 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask,
307 ANV_AUX_USAGE_DEFAULT, &dst_surf);
308 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
309 dst_surf.aux_usage, dst_level,
310 dst_base_layer, layer_count);
311
312 for (unsigned i = 0; i < layer_count; i++) {
313 blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
314 &dst_surf, dst_level, dst_base_layer + i,
315 srcOffset.x, srcOffset.y,
316 dstOffset.x, dstOffset.y,
317 extent.width, extent.height);
318 }
319 }
320 }
321
322 blorp_batch_finish(&batch);
323 }
324
325 static void
326 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
327 struct anv_buffer *anv_buffer,
328 struct anv_image *anv_image,
329 uint32_t regionCount,
330 const VkBufferImageCopy* pRegions,
331 bool buffer_to_image)
332 {
333 struct blorp_batch batch;
334 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
335
336 struct {
337 struct blorp_surf surf;
338 uint32_t level;
339 VkOffset3D offset;
340 } image, buffer, *src, *dst;
341
342 buffer.level = 0;
343 buffer.offset = (VkOffset3D) { 0, 0, 0 };
344
345 if (buffer_to_image) {
346 src = &buffer;
347 dst = &image;
348 } else {
349 src = &image;
350 dst = &buffer;
351 }
352
353 for (unsigned r = 0; r < regionCount; r++) {
354 const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
355
356 get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect,
357 ANV_AUX_USAGE_DEFAULT, &image.surf);
358 image.offset =
359 anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset);
360 image.level = pRegions[r].imageSubresource.mipLevel;
361
362 VkExtent3D extent =
363 anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent);
364 if (anv_image->type != VK_IMAGE_TYPE_3D) {
365 image.offset.z = pRegions[r].imageSubresource.baseArrayLayer;
366 extent.depth =
367 anv_get_layerCount(anv_image, &pRegions[r].imageSubresource);
368 }
369
370 const enum isl_format buffer_format =
371 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format,
372 aspect, VK_IMAGE_TILING_LINEAR);
373
374 const VkExtent3D bufferImageExtent = {
375 .width = pRegions[r].bufferRowLength ?
376 pRegions[r].bufferRowLength : extent.width,
377 .height = pRegions[r].bufferImageHeight ?
378 pRegions[r].bufferImageHeight : extent.height,
379 };
380
381 const struct isl_format_layout *buffer_fmtl =
382 isl_format_get_layout(buffer_format);
383
384 const uint32_t buffer_row_pitch =
385 DIV_ROUND_UP(bufferImageExtent.width, buffer_fmtl->bw) *
386 (buffer_fmtl->bpb / 8);
387
388 const uint32_t buffer_layer_stride =
389 DIV_ROUND_UP(bufferImageExtent.height, buffer_fmtl->bh) *
390 buffer_row_pitch;
391
392 struct isl_surf buffer_isl_surf;
393 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
394 anv_buffer, pRegions[r].bufferOffset,
395 extent.width, extent.height,
396 buffer_row_pitch, buffer_format,
397 &buffer.surf, &buffer_isl_surf);
398
399 if (&image == dst) {
400 anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
401 aspect, dst->surf.aux_usage,
402 dst->level,
403 dst->offset.z, extent.depth);
404 }
405
406 for (unsigned z = 0; z < extent.depth; z++) {
407 blorp_copy(&batch, &src->surf, src->level, src->offset.z,
408 &dst->surf, dst->level, dst->offset.z,
409 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
410 extent.width, extent.height);
411
412 image.offset.z++;
413 buffer.surf.addr.offset += buffer_layer_stride;
414 }
415 }
416
417 blorp_batch_finish(&batch);
418 }
419
420 void anv_CmdCopyBufferToImage(
421 VkCommandBuffer commandBuffer,
422 VkBuffer srcBuffer,
423 VkImage dstImage,
424 VkImageLayout dstImageLayout,
425 uint32_t regionCount,
426 const VkBufferImageCopy* pRegions)
427 {
428 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
429 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
430 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
431
432 copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
433 regionCount, pRegions, true);
434 }
435
436 void anv_CmdCopyImageToBuffer(
437 VkCommandBuffer commandBuffer,
438 VkImage srcImage,
439 VkImageLayout srcImageLayout,
440 VkBuffer dstBuffer,
441 uint32_t regionCount,
442 const VkBufferImageCopy* pRegions)
443 {
444 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
445 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
446 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
447
448 copy_buffer_to_image(cmd_buffer, dst_buffer, src_image,
449 regionCount, pRegions, false);
450 }
451
452 static bool
453 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
454 {
455 bool flip = false;
456 if (*src0 > *src1) {
457 unsigned tmp = *src0;
458 *src0 = *src1;
459 *src1 = tmp;
460 flip = !flip;
461 }
462
463 if (*dst0 > *dst1) {
464 unsigned tmp = *dst0;
465 *dst0 = *dst1;
466 *dst1 = tmp;
467 flip = !flip;
468 }
469
470 return flip;
471 }
472
473 void anv_CmdBlitImage(
474 VkCommandBuffer commandBuffer,
475 VkImage srcImage,
476 VkImageLayout srcImageLayout,
477 VkImage dstImage,
478 VkImageLayout dstImageLayout,
479 uint32_t regionCount,
480 const VkImageBlit* pRegions,
481 VkFilter filter)
482
483 {
484 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
485 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
486 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
487
488 struct blorp_surf src, dst;
489
490 uint32_t gl_filter;
491 switch (filter) {
492 case VK_FILTER_NEAREST:
493 gl_filter = 0x2600; /* GL_NEAREST */
494 break;
495 case VK_FILTER_LINEAR:
496 gl_filter = 0x2601; /* GL_LINEAR */
497 break;
498 default:
499 unreachable("Invalid filter");
500 }
501
502 struct blorp_batch batch;
503 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
504
505 for (unsigned r = 0; r < regionCount; r++) {
506 const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
507 const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
508
509 get_blorp_surf_for_anv_image(cmd_buffer->device,
510 src_image, src_res->aspectMask,
511 ANV_AUX_USAGE_DEFAULT, &src);
512 get_blorp_surf_for_anv_image(cmd_buffer->device,
513 dst_image, dst_res->aspectMask,
514 ANV_AUX_USAGE_DEFAULT, &dst);
515
516 struct anv_format_plane src_format =
517 anv_get_format_plane(&cmd_buffer->device->info, src_image->vk_format,
518 src_res->aspectMask, src_image->tiling);
519 struct anv_format_plane dst_format =
520 anv_get_format_plane(&cmd_buffer->device->info, dst_image->vk_format,
521 dst_res->aspectMask, dst_image->tiling);
522
523 unsigned dst_start, dst_end;
524 if (dst_image->type == VK_IMAGE_TYPE_3D) {
525 assert(dst_res->baseArrayLayer == 0);
526 dst_start = pRegions[r].dstOffsets[0].z;
527 dst_end = pRegions[r].dstOffsets[1].z;
528 } else {
529 dst_start = dst_res->baseArrayLayer;
530 dst_end = dst_start + anv_get_layerCount(dst_image, dst_res);
531 }
532
533 unsigned src_start, src_end;
534 if (src_image->type == VK_IMAGE_TYPE_3D) {
535 assert(src_res->baseArrayLayer == 0);
536 src_start = pRegions[r].srcOffsets[0].z;
537 src_end = pRegions[r].srcOffsets[1].z;
538 } else {
539 src_start = src_res->baseArrayLayer;
540 src_end = src_start + anv_get_layerCount(src_image, src_res);
541 }
542
543 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
544 float src_z_step = (float)(src_end + 1 - src_start) /
545 (float)(dst_end + 1 - dst_start);
546
547 if (flip_z) {
548 src_start = src_end;
549 src_z_step *= -1;
550 }
551
552 unsigned src_x0 = pRegions[r].srcOffsets[0].x;
553 unsigned src_x1 = pRegions[r].srcOffsets[1].x;
554 unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
555 unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
556 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
557
558 unsigned src_y0 = pRegions[r].srcOffsets[0].y;
559 unsigned src_y1 = pRegions[r].srcOffsets[1].y;
560 unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
561 unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
562 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
563
564 const unsigned num_layers = dst_end - dst_start;
565 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
566 dst_res->aspectMask,
567 dst.aux_usage,
568 dst_res->mipLevel,
569 dst_start, num_layers);
570
571 for (unsigned i = 0; i < num_layers; i++) {
572 unsigned dst_z = dst_start + i;
573 unsigned src_z = src_start + i * src_z_step;
574
575 blorp_blit(&batch, &src, src_res->mipLevel, src_z,
576 src_format.isl_format, src_format.swizzle,
577 &dst, dst_res->mipLevel, dst_z,
578 dst_format.isl_format,
579 anv_swizzle_for_render(dst_format.swizzle),
580 src_x0, src_y0, src_x1, src_y1,
581 dst_x0, dst_y0, dst_x1, dst_y1,
582 gl_filter, flip_x, flip_y);
583 }
584
585 }
586
587 blorp_batch_finish(&batch);
588 }
589
590 static enum isl_format
591 isl_format_for_size(unsigned size_B)
592 {
593 switch (size_B) {
594 case 4: return ISL_FORMAT_R32_UINT;
595 case 8: return ISL_FORMAT_R32G32_UINT;
596 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
597 default:
598 unreachable("Not a power-of-two format size");
599 }
600 }
601
602 /**
603 * Returns the greatest common divisor of a and b that is a power of two.
604 */
605 static uint64_t
606 gcd_pow2_u64(uint64_t a, uint64_t b)
607 {
608 assert(a > 0 || b > 0);
609
610 unsigned a_log2 = ffsll(a) - 1;
611 unsigned b_log2 = ffsll(b) - 1;
612
613 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
614 * case, the MIN2() will take the other one. If both are 0 then we will
615 * hit the assert above.
616 */
617 return 1 << MIN2(a_log2, b_log2);
618 }
619
620 /* This is maximum possible width/height our HW can handle */
621 #define MAX_SURFACE_DIM (1ull << 14)
622
623 void anv_CmdCopyBuffer(
624 VkCommandBuffer commandBuffer,
625 VkBuffer srcBuffer,
626 VkBuffer dstBuffer,
627 uint32_t regionCount,
628 const VkBufferCopy* pRegions)
629 {
630 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
631 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
632 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
633
634 struct blorp_batch batch;
635 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
636
637 for (unsigned r = 0; r < regionCount; r++) {
638 struct blorp_address src = {
639 .buffer = src_buffer->bo,
640 .offset = src_buffer->offset + pRegions[r].srcOffset,
641 .mocs = cmd_buffer->device->default_mocs,
642 };
643 struct blorp_address dst = {
644 .buffer = dst_buffer->bo,
645 .offset = dst_buffer->offset + pRegions[r].dstOffset,
646 .mocs = cmd_buffer->device->default_mocs,
647 };
648
649 blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
650 }
651
652 blorp_batch_finish(&batch);
653 }
654
655 void anv_CmdUpdateBuffer(
656 VkCommandBuffer commandBuffer,
657 VkBuffer dstBuffer,
658 VkDeviceSize dstOffset,
659 VkDeviceSize dataSize,
660 const void* pData)
661 {
662 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
663 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
664
665 struct blorp_batch batch;
666 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
667
668 /* We can't quite grab a full block because the state stream needs a
669 * little data at the top to build its linked list.
670 */
671 const uint32_t max_update_size =
672 cmd_buffer->device->dynamic_state_pool.block_size - 64;
673
674 assert(max_update_size < MAX_SURFACE_DIM * 4);
675
676 /* We're about to read data that was written from the CPU. Flush the
677 * texture cache so we don't get anything stale.
678 */
679 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
680
681 while (dataSize) {
682 const uint32_t copy_size = MIN2(dataSize, max_update_size);
683
684 struct anv_state tmp_data =
685 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
686
687 memcpy(tmp_data.map, pData, copy_size);
688
689 anv_state_flush(cmd_buffer->device, tmp_data);
690
691 struct blorp_address src = {
692 .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
693 .offset = tmp_data.offset,
694 .mocs = cmd_buffer->device->default_mocs,
695 };
696 struct blorp_address dst = {
697 .buffer = dst_buffer->bo,
698 .offset = dst_buffer->offset + dstOffset,
699 .mocs = cmd_buffer->device->default_mocs,
700 };
701
702 blorp_buffer_copy(&batch, src, dst, copy_size);
703
704 dataSize -= copy_size;
705 dstOffset += copy_size;
706 pData = (void *)pData + copy_size;
707 }
708
709 blorp_batch_finish(&batch);
710 }
711
712 void anv_CmdFillBuffer(
713 VkCommandBuffer commandBuffer,
714 VkBuffer dstBuffer,
715 VkDeviceSize dstOffset,
716 VkDeviceSize fillSize,
717 uint32_t data)
718 {
719 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
720 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
721 struct blorp_surf surf;
722 struct isl_surf isl_surf;
723
724 struct blorp_batch batch;
725 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
726
727 fillSize = anv_buffer_get_range(dst_buffer, dstOffset, fillSize);
728
729 /* From the Vulkan spec:
730 *
731 * "size is the number of bytes to fill, and must be either a multiple
732 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
733 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
734 * buffer is not a multiple of 4, then the nearest smaller multiple is
735 * used."
736 */
737 fillSize &= ~3ull;
738
739 /* First, we compute the biggest format that can be used with the
740 * given offsets and size.
741 */
742 int bs = 16;
743 bs = gcd_pow2_u64(bs, dstOffset);
744 bs = gcd_pow2_u64(bs, fillSize);
745 enum isl_format isl_format = isl_format_for_size(bs);
746
747 union isl_color_value color = {
748 .u32 = { data, data, data, data },
749 };
750
751 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
752 while (fillSize >= max_fill_size) {
753 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
754 dst_buffer, dstOffset,
755 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
756 MAX_SURFACE_DIM * bs, isl_format,
757 &surf, &isl_surf);
758
759 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
760 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
761 color, NULL);
762 fillSize -= max_fill_size;
763 dstOffset += max_fill_size;
764 }
765
766 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
767 assert(height < MAX_SURFACE_DIM);
768 if (height != 0) {
769 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
770 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
771 dst_buffer, dstOffset,
772 MAX_SURFACE_DIM, height,
773 MAX_SURFACE_DIM * bs, isl_format,
774 &surf, &isl_surf);
775
776 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
777 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
778 color, NULL);
779 fillSize -= rect_fill_size;
780 dstOffset += rect_fill_size;
781 }
782
783 if (fillSize != 0) {
784 const uint32_t width = fillSize / bs;
785 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
786 dst_buffer, dstOffset,
787 width, 1,
788 width * bs, isl_format,
789 &surf, &isl_surf);
790
791 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
792 0, 0, 1, 0, 0, width, 1,
793 color, NULL);
794 }
795
796 blorp_batch_finish(&batch);
797 }
798
799 void anv_CmdClearColorImage(
800 VkCommandBuffer commandBuffer,
801 VkImage _image,
802 VkImageLayout imageLayout,
803 const VkClearColorValue* pColor,
804 uint32_t rangeCount,
805 const VkImageSubresourceRange* pRanges)
806 {
807 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
808 ANV_FROM_HANDLE(anv_image, image, _image);
809
810 static const bool color_write_disable[4] = { false, false, false, false };
811
812 struct blorp_batch batch;
813 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
814
815
816 for (unsigned r = 0; r < rangeCount; r++) {
817 if (pRanges[r].aspectMask == 0)
818 continue;
819
820 assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
821
822 struct blorp_surf surf;
823 get_blorp_surf_for_anv_image(cmd_buffer->device,
824 image, pRanges[r].aspectMask,
825 ANV_AUX_USAGE_DEFAULT, &surf);
826
827 struct anv_format_plane src_format =
828 anv_get_format_plane(&cmd_buffer->device->info, image->vk_format,
829 VK_IMAGE_ASPECT_COLOR_BIT, image->tiling);
830
831 unsigned base_layer = pRanges[r].baseArrayLayer;
832 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]);
833
834 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
835 const unsigned level = pRanges[r].baseMipLevel + i;
836 const unsigned level_width = anv_minify(image->extent.width, level);
837 const unsigned level_height = anv_minify(image->extent.height, level);
838
839 if (image->type == VK_IMAGE_TYPE_3D) {
840 base_layer = 0;
841 layer_count = anv_minify(image->extent.depth, level);
842 }
843
844 anv_cmd_buffer_mark_image_written(cmd_buffer, image,
845 pRanges[r].aspectMask,
846 surf.aux_usage, level,
847 base_layer, layer_count);
848
849 blorp_clear(&batch, &surf,
850 src_format.isl_format, src_format.swizzle,
851 level, base_layer, layer_count,
852 0, 0, level_width, level_height,
853 vk_to_isl_color(*pColor), color_write_disable);
854 }
855 }
856
857 blorp_batch_finish(&batch);
858 }
859
860 void anv_CmdClearDepthStencilImage(
861 VkCommandBuffer commandBuffer,
862 VkImage image_h,
863 VkImageLayout imageLayout,
864 const VkClearDepthStencilValue* pDepthStencil,
865 uint32_t rangeCount,
866 const VkImageSubresourceRange* pRanges)
867 {
868 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
869 ANV_FROM_HANDLE(anv_image, image, image_h);
870
871 struct blorp_batch batch;
872 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
873
874 struct blorp_surf depth, stencil;
875 if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
876 get_blorp_surf_for_anv_image(cmd_buffer->device,
877 image, VK_IMAGE_ASPECT_DEPTH_BIT,
878 ISL_AUX_USAGE_NONE, &depth);
879 } else {
880 memset(&depth, 0, sizeof(depth));
881 }
882
883 if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
884 get_blorp_surf_for_anv_image(cmd_buffer->device,
885 image, VK_IMAGE_ASPECT_STENCIL_BIT,
886 ISL_AUX_USAGE_NONE, &stencil);
887 } else {
888 memset(&stencil, 0, sizeof(stencil));
889 }
890
891 for (unsigned r = 0; r < rangeCount; r++) {
892 if (pRanges[r].aspectMask == 0)
893 continue;
894
895 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
896 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
897
898 unsigned base_layer = pRanges[r].baseArrayLayer;
899 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]);
900
901 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
902 const unsigned level = pRanges[r].baseMipLevel + i;
903 const unsigned level_width = anv_minify(image->extent.width, level);
904 const unsigned level_height = anv_minify(image->extent.height, level);
905
906 if (image->type == VK_IMAGE_TYPE_3D)
907 layer_count = anv_minify(image->extent.depth, level);
908
909 blorp_clear_depth_stencil(&batch, &depth, &stencil,
910 level, base_layer, layer_count,
911 0, 0, level_width, level_height,
912 clear_depth, pDepthStencil->depth,
913 clear_stencil ? 0xff : 0,
914 pDepthStencil->stencil);
915 }
916 }
917
918 blorp_batch_finish(&batch);
919 }
920
921 VkResult
922 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
923 uint32_t num_entries,
924 uint32_t *state_offset,
925 struct anv_state *bt_state)
926 {
927 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
928 state_offset);
929 if (bt_state->map == NULL) {
930 /* We ran out of space. Grab a new binding table block. */
931 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
932 if (result != VK_SUCCESS)
933 return result;
934
935 /* Re-emit state base addresses so we get the new surface state base
936 * address before we start emitting binding tables etc.
937 */
938 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
939
940 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
941 state_offset);
942 assert(bt_state->map != NULL);
943 }
944
945 return VK_SUCCESS;
946 }
947
948 static VkResult
949 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
950 struct anv_state surface_state,
951 uint32_t *bt_offset)
952 {
953 uint32_t state_offset;
954 struct anv_state bt_state;
955
956 VkResult result =
957 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
958 &bt_state);
959 if (result != VK_SUCCESS)
960 return result;
961
962 uint32_t *bt_map = bt_state.map;
963 bt_map[0] = surface_state.offset + state_offset;
964
965 *bt_offset = bt_state.offset;
966 return VK_SUCCESS;
967 }
968
969 static void
970 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
971 struct blorp_batch *batch,
972 const VkClearAttachment *attachment,
973 uint32_t rectCount, const VkClearRect *pRects)
974 {
975 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
976 const uint32_t color_att = attachment->colorAttachment;
977 const uint32_t att_idx = subpass->color_attachments[color_att].attachment;
978
979 if (att_idx == VK_ATTACHMENT_UNUSED)
980 return;
981
982 struct anv_render_pass_attachment *pass_att =
983 &cmd_buffer->state.pass->attachments[att_idx];
984 struct anv_attachment_state *att_state =
985 &cmd_buffer->state.attachments[att_idx];
986
987 uint32_t binding_table;
988 VkResult result =
989 binding_table_for_surface_state(cmd_buffer, att_state->color.state,
990 &binding_table);
991 if (result != VK_SUCCESS)
992 return;
993
994 union isl_color_value clear_color =
995 vk_to_isl_color(attachment->clearValue.color);
996
997 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
998 if (subpass->view_mask) {
999 uint32_t view_idx;
1000 for_each_bit(view_idx, subpass->view_mask) {
1001 for (uint32_t r = 0; r < rectCount; ++r) {
1002 const VkOffset2D offset = pRects[r].rect.offset;
1003 const VkExtent2D extent = pRects[r].rect.extent;
1004 blorp_clear_attachments(batch, binding_table,
1005 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
1006 view_idx, 1,
1007 offset.x, offset.y,
1008 offset.x + extent.width,
1009 offset.y + extent.height,
1010 true, clear_color, false, 0.0f, 0, 0);
1011 }
1012 }
1013 return;
1014 }
1015
1016 for (uint32_t r = 0; r < rectCount; ++r) {
1017 const VkOffset2D offset = pRects[r].rect.offset;
1018 const VkExtent2D extent = pRects[r].rect.extent;
1019 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1020 blorp_clear_attachments(batch, binding_table,
1021 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
1022 pRects[r].baseArrayLayer,
1023 pRects[r].layerCount,
1024 offset.x, offset.y,
1025 offset.x + extent.width, offset.y + extent.height,
1026 true, clear_color, false, 0.0f, 0, 0);
1027 }
1028 }
1029
1030 static void
1031 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1032 struct blorp_batch *batch,
1033 const VkClearAttachment *attachment,
1034 uint32_t rectCount, const VkClearRect *pRects)
1035 {
1036 static const union isl_color_value color_value = { .u32 = { 0, } };
1037 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
1038 const uint32_t att_idx = subpass->depth_stencil_attachment.attachment;
1039
1040 if (att_idx == VK_ATTACHMENT_UNUSED)
1041 return;
1042
1043 struct anv_render_pass_attachment *pass_att =
1044 &cmd_buffer->state.pass->attachments[att_idx];
1045
1046 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1047 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1048
1049 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1050 if (clear_depth) {
1051 depth_format = anv_get_isl_format(&cmd_buffer->device->info,
1052 pass_att->format,
1053 VK_IMAGE_ASPECT_DEPTH_BIT,
1054 VK_IMAGE_TILING_OPTIMAL);
1055 }
1056
1057 uint32_t binding_table;
1058 VkResult result =
1059 binding_table_for_surface_state(cmd_buffer,
1060 cmd_buffer->state.null_surface_state,
1061 &binding_table);
1062 if (result != VK_SUCCESS)
1063 return;
1064
1065 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1066 if (subpass->view_mask) {
1067 uint32_t view_idx;
1068 for_each_bit(view_idx, subpass->view_mask) {
1069 for (uint32_t r = 0; r < rectCount; ++r) {
1070 const VkOffset2D offset = pRects[r].rect.offset;
1071 const VkExtent2D extent = pRects[r].rect.extent;
1072 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1073 blorp_clear_attachments(batch, binding_table,
1074 depth_format, pass_att->samples,
1075 view_idx, 1,
1076 offset.x, offset.y,
1077 offset.x + extent.width,
1078 offset.y + extent.height,
1079 false, color_value,
1080 clear_depth, value.depth,
1081 clear_stencil ? 0xff : 0, value.stencil);
1082 }
1083 }
1084 return;
1085 }
1086
1087 for (uint32_t r = 0; r < rectCount; ++r) {
1088 const VkOffset2D offset = pRects[r].rect.offset;
1089 const VkExtent2D extent = pRects[r].rect.extent;
1090 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1091 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1092 blorp_clear_attachments(batch, binding_table,
1093 depth_format, pass_att->samples,
1094 pRects[r].baseArrayLayer,
1095 pRects[r].layerCount,
1096 offset.x, offset.y,
1097 offset.x + extent.width, offset.y + extent.height,
1098 false, color_value,
1099 clear_depth, value.depth,
1100 clear_stencil ? 0xff : 0, value.stencil);
1101 }
1102 }
1103
1104 void anv_CmdClearAttachments(
1105 VkCommandBuffer commandBuffer,
1106 uint32_t attachmentCount,
1107 const VkClearAttachment* pAttachments,
1108 uint32_t rectCount,
1109 const VkClearRect* pRects)
1110 {
1111 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1112
1113 /* Because this gets called within a render pass, we tell blorp not to
1114 * trash our depth and stencil buffers.
1115 */
1116 struct blorp_batch batch;
1117 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1118 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1119
1120 for (uint32_t a = 0; a < attachmentCount; ++a) {
1121 if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
1122 assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1123 clear_color_attachment(cmd_buffer, &batch,
1124 &pAttachments[a],
1125 rectCount, pRects);
1126 } else {
1127 clear_depth_stencil_attachment(cmd_buffer, &batch,
1128 &pAttachments[a],
1129 rectCount, pRects);
1130 }
1131 }
1132
1133 blorp_batch_finish(&batch);
1134 }
1135
1136 enum subpass_stage {
1137 SUBPASS_STAGE_LOAD,
1138 SUBPASS_STAGE_DRAW,
1139 SUBPASS_STAGE_RESOLVE,
1140 };
1141
1142 static bool
1143 subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
1144 {
1145 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
1146 uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
1147
1148 if (ds != VK_ATTACHMENT_UNUSED) {
1149 assert(ds < cmd_state->pass->attachment_count);
1150 if (cmd_state->attachments[ds].pending_clear_aspects)
1151 return true;
1152 }
1153
1154 return false;
1155 }
1156
1157 void
1158 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
1159 {
1160 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
1161 const VkRect2D render_area = cmd_buffer->state.render_area;
1162
1163
1164 if (!subpass_needs_clear(cmd_buffer))
1165 return;
1166
1167 /* Because this gets called within a render pass, we tell blorp not to
1168 * trash our depth and stencil buffers.
1169 */
1170 struct blorp_batch batch;
1171 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1172 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1173
1174 VkClearRect clear_rect = {
1175 .rect = cmd_buffer->state.render_area,
1176 .baseArrayLayer = 0,
1177 .layerCount = cmd_buffer->state.framebuffer->layers,
1178 };
1179
1180 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1181
1182 const uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
1183 assert(ds == VK_ATTACHMENT_UNUSED || ds < cmd_state->pass->attachment_count);
1184
1185 if (ds != VK_ATTACHMENT_UNUSED &&
1186 cmd_state->attachments[ds].pending_clear_aspects) {
1187
1188 VkClearAttachment clear_att = {
1189 .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
1190 .clearValue = cmd_state->attachments[ds].clear_value,
1191 };
1192
1193
1194 const uint8_t gen = cmd_buffer->device->info.gen;
1195 bool clear_with_hiz = gen >= 8 && cmd_state->attachments[ds].aux_usage ==
1196 ISL_AUX_USAGE_HIZ;
1197 const struct anv_image_view *iview = fb->attachments[ds];
1198
1199 if (clear_with_hiz) {
1200 const bool clear_depth = clear_att.aspectMask &
1201 VK_IMAGE_ASPECT_DEPTH_BIT;
1202 const bool clear_stencil = clear_att.aspectMask &
1203 VK_IMAGE_ASPECT_STENCIL_BIT;
1204
1205 /* Check against restrictions for depth buffer clearing. A great GPU
1206 * performance benefit isn't expected when using the HZ sequence for
1207 * stencil-only clears. Therefore, we don't emit a HZ op sequence for
1208 * a stencil clear in addition to using the BLORP-fallback for depth.
1209 */
1210 if (clear_depth) {
1211 if (!blorp_can_hiz_clear_depth(gen, iview->planes[0].isl.format,
1212 iview->image->samples,
1213 render_area.offset.x,
1214 render_area.offset.y,
1215 render_area.offset.x +
1216 render_area.extent.width,
1217 render_area.offset.y +
1218 render_area.extent.height)) {
1219 clear_with_hiz = false;
1220 } else if (clear_att.clearValue.depthStencil.depth !=
1221 ANV_HZ_FC_VAL) {
1222 /* Don't enable fast depth clears for any color not equal to
1223 * ANV_HZ_FC_VAL.
1224 */
1225 clear_with_hiz = false;
1226 } else if (gen == 8 &&
1227 anv_can_sample_with_hiz(&cmd_buffer->device->info,
1228 iview->image)) {
1229 /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
1230 * fast-cleared portion of a HiZ buffer. Testing has revealed
1231 * that Gen8 only supports returning 0.0f. Gens prior to gen8 do
1232 * not support this feature at all.
1233 */
1234 clear_with_hiz = false;
1235 }
1236 }
1237
1238 if (clear_with_hiz) {
1239 blorp_gen8_hiz_clear_attachments(&batch, iview->image->samples,
1240 render_area.offset.x,
1241 render_area.offset.y,
1242 render_area.offset.x +
1243 render_area.extent.width,
1244 render_area.offset.y +
1245 render_area.extent.height,
1246 clear_depth, clear_stencil,
1247 clear_att.clearValue.
1248 depthStencil.stencil);
1249
1250 /* From the SKL PRM, Depth Buffer Clear:
1251 *
1252 * Depth Buffer Clear Workaround
1253 * Depth buffer clear pass using any of the methods (WM_STATE,
1254 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
1255 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
1256 * “set” before starting to render. DepthStall and DepthFlush are
1257 * not needed between consecutive depth clear passes nor is it
1258 * required if the depth-clear pass was done with “full_surf_clear”
1259 * bit set in the 3DSTATE_WM_HZ_OP.
1260 */
1261 if (clear_depth) {
1262 cmd_buffer->state.pending_pipe_bits |=
1263 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
1264 }
1265 }
1266 }
1267
1268 if (!clear_with_hiz) {
1269 clear_depth_stencil_attachment(cmd_buffer, &batch,
1270 &clear_att, 1, &clear_rect);
1271 }
1272
1273 cmd_state->attachments[ds].pending_clear_aspects = 0;
1274 }
1275
1276 blorp_batch_finish(&batch);
1277 }
1278
1279 static void
1280 resolve_surface(struct blorp_batch *batch,
1281 struct blorp_surf *src_surf,
1282 uint32_t src_level, uint32_t src_layer,
1283 struct blorp_surf *dst_surf,
1284 uint32_t dst_level, uint32_t dst_layer,
1285 uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
1286 uint32_t width, uint32_t height)
1287 {
1288 blorp_blit(batch,
1289 src_surf, src_level, src_layer,
1290 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1291 dst_surf, dst_level, dst_layer,
1292 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1293 src_x, src_y, src_x + width, src_y + height,
1294 dst_x, dst_y, dst_x + width, dst_y + height,
1295 0x2600 /* GL_NEAREST */, false, false);
1296 }
1297
1298 static void
1299 resolve_image(struct anv_device *device,
1300 struct blorp_batch *batch,
1301 const struct anv_image *src_image,
1302 uint32_t src_level, uint32_t src_layer,
1303 const struct anv_image *dst_image,
1304 uint32_t dst_level, uint32_t dst_layer,
1305 VkImageAspectFlags aspect_mask,
1306 uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
1307 uint32_t width, uint32_t height)
1308 {
1309 struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
1310
1311 assert(src_image->type == VK_IMAGE_TYPE_2D);
1312 assert(src_image->samples > 1);
1313 assert(dst_image->type == VK_IMAGE_TYPE_2D);
1314 assert(dst_image->samples == 1);
1315 assert(src_image->n_planes == dst_image->n_planes);
1316
1317 uint32_t aspect_bit;
1318
1319 anv_foreach_image_aspect_bit(aspect_bit, src_image, aspect_mask) {
1320 struct blorp_surf src_surf, dst_surf;
1321 get_blorp_surf_for_anv_image(device, src_image, 1UL << aspect_bit,
1322 ANV_AUX_USAGE_DEFAULT, &src_surf);
1323 get_blorp_surf_for_anv_image(device, dst_image, 1UL << aspect_bit,
1324 ANV_AUX_USAGE_DEFAULT, &dst_surf);
1325 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
1326 1UL << aspect_bit,
1327 dst_surf.aux_usage,
1328 dst_level, dst_layer, 1);
1329
1330 assert(!src_image->format->can_ycbcr);
1331 assert(!dst_image->format->can_ycbcr);
1332
1333 resolve_surface(batch,
1334 &src_surf, src_level, src_layer,
1335 &dst_surf, dst_level, dst_layer,
1336 src_x, src_y, dst_x, dst_y, width, height);
1337 }
1338 }
1339
1340 void anv_CmdResolveImage(
1341 VkCommandBuffer commandBuffer,
1342 VkImage srcImage,
1343 VkImageLayout srcImageLayout,
1344 VkImage dstImage,
1345 VkImageLayout dstImageLayout,
1346 uint32_t regionCount,
1347 const VkImageResolve* pRegions)
1348 {
1349 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1350 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
1351 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
1352
1353 struct blorp_batch batch;
1354 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1355
1356 for (uint32_t r = 0; r < regionCount; r++) {
1357 assert(pRegions[r].srcSubresource.aspectMask ==
1358 pRegions[r].dstSubresource.aspectMask);
1359 assert(anv_get_layerCount(src_image, &pRegions[r].srcSubresource) ==
1360 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource));
1361
1362 const uint32_t layer_count =
1363 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
1364
1365 VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask,
1366 dst_mask = pRegions[r].dstSubresource.aspectMask;
1367
1368 assert(anv_image_aspects_compatible(src_mask, dst_mask));
1369
1370 for (uint32_t layer = 0; layer < layer_count; layer++) {
1371 resolve_image(cmd_buffer->device, &batch,
1372 src_image,
1373 pRegions[r].srcSubresource.mipLevel,
1374 pRegions[r].srcSubresource.baseArrayLayer + layer,
1375 dst_image,
1376 pRegions[r].dstSubresource.mipLevel,
1377 pRegions[r].dstSubresource.baseArrayLayer + layer,
1378 pRegions[r].dstSubresource.aspectMask,
1379 pRegions[r].srcOffset.x, pRegions[r].srcOffset.y,
1380 pRegions[r].dstOffset.x, pRegions[r].dstOffset.y,
1381 pRegions[r].extent.width, pRegions[r].extent.height);
1382 }
1383 }
1384
1385 blorp_batch_finish(&batch);
1386 }
1387
1388 static enum isl_aux_usage
1389 fast_clear_aux_usage(const struct anv_image *image,
1390 VkImageAspectFlagBits aspect)
1391 {
1392 uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
1393 if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
1394 return ISL_AUX_USAGE_CCS_D;
1395 else
1396 return image->planes[plane].aux_usage;
1397 }
1398
1399 void
1400 anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
1401 {
1402 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1403 struct anv_subpass *subpass = cmd_buffer->state.subpass;
1404
1405 if (subpass->has_resolve) {
1406 struct blorp_batch batch;
1407 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1408
1409 /* We are about to do some MSAA resolves. We need to flush so that the
1410 * result of writes to the MSAA color attachments show up in the sampler
1411 * when we blit to the single-sampled resolve target.
1412 */
1413 cmd_buffer->state.pending_pipe_bits |=
1414 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
1415 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
1416
1417 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1418 uint32_t src_att = subpass->color_attachments[i].attachment;
1419 uint32_t dst_att = subpass->resolve_attachments[i].attachment;
1420
1421 if (dst_att == VK_ATTACHMENT_UNUSED)
1422 continue;
1423
1424 assert(src_att < cmd_buffer->state.pass->attachment_count);
1425 assert(dst_att < cmd_buffer->state.pass->attachment_count);
1426
1427 if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) {
1428 /* From the Vulkan 1.0 spec:
1429 *
1430 * If the first use of an attachment in a render pass is as a
1431 * resolve attachment, then the loadOp is effectively ignored
1432 * as the resolve is guaranteed to overwrite all pixels in the
1433 * render area.
1434 */
1435 cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0;
1436 }
1437
1438 struct anv_image_view *src_iview = fb->attachments[src_att];
1439 struct anv_image_view *dst_iview = fb->attachments[dst_att];
1440
1441 enum isl_aux_usage src_aux_usage =
1442 cmd_buffer->state.attachments[src_att].aux_usage;
1443 enum isl_aux_usage dst_aux_usage =
1444 cmd_buffer->state.attachments[dst_att].aux_usage;
1445
1446 const VkRect2D render_area = cmd_buffer->state.render_area;
1447
1448 assert(src_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT &&
1449 dst_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT);
1450
1451 struct blorp_surf src_surf, dst_surf;
1452 get_blorp_surf_for_anv_image(cmd_buffer->device, src_iview->image,
1453 VK_IMAGE_ASPECT_COLOR_BIT,
1454 src_aux_usage, &src_surf);
1455 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_iview->image,
1456 VK_IMAGE_ASPECT_COLOR_BIT,
1457 dst_aux_usage, &dst_surf);
1458 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_iview->image,
1459 VK_IMAGE_ASPECT_COLOR_BIT,
1460 dst_surf.aux_usage,
1461 dst_iview->planes[0].isl.base_level,
1462 dst_iview->planes[0].isl.base_array_layer, 1);
1463
1464 assert(!src_iview->image->format->can_ycbcr);
1465 assert(!dst_iview->image->format->can_ycbcr);
1466
1467 resolve_surface(&batch,
1468 &src_surf,
1469 src_iview->planes[0].isl.base_level,
1470 src_iview->planes[0].isl.base_array_layer,
1471 &dst_surf,
1472 dst_iview->planes[0].isl.base_level,
1473 dst_iview->planes[0].isl.base_array_layer,
1474 render_area.offset.x, render_area.offset.y,
1475 render_area.offset.x, render_area.offset.y,
1476 render_area.extent.width, render_area.extent.height);
1477 }
1478
1479 blorp_batch_finish(&batch);
1480 }
1481 }
1482
1483 void
1484 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
1485 const struct anv_image *image,
1486 uint32_t base_level, uint32_t level_count,
1487 uint32_t base_layer, uint32_t layer_count)
1488 {
1489 struct blorp_batch batch;
1490 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1491
1492 assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT && image->n_planes == 1);
1493
1494 struct blorp_surf surf;
1495 get_blorp_surf_for_anv_image(cmd_buffer->device,
1496 image, VK_IMAGE_ASPECT_COLOR_BIT,
1497 ISL_AUX_USAGE_NONE, &surf);
1498
1499 struct blorp_surf shadow_surf = {
1500 .surf = &image->planes[0].shadow_surface.isl,
1501 .addr = {
1502 .buffer = image->planes[0].bo,
1503 .offset = image->planes[0].bo_offset +
1504 image->planes[0].shadow_surface.offset,
1505 .mocs = cmd_buffer->device->default_mocs,
1506 },
1507 };
1508
1509 for (uint32_t l = 0; l < level_count; l++) {
1510 const uint32_t level = base_level + l;
1511
1512 const VkExtent3D extent = {
1513 .width = anv_minify(image->extent.width, level),
1514 .height = anv_minify(image->extent.height, level),
1515 .depth = anv_minify(image->extent.depth, level),
1516 };
1517
1518 if (image->type == VK_IMAGE_TYPE_3D)
1519 layer_count = extent.depth;
1520
1521 for (uint32_t a = 0; a < layer_count; a++) {
1522 const uint32_t layer = base_layer + a;
1523
1524 blorp_copy(&batch, &surf, level, layer,
1525 &shadow_surf, level, layer,
1526 0, 0, 0, 0, extent.width, extent.height);
1527 }
1528 }
1529
1530 blorp_batch_finish(&batch);
1531 }
1532
1533 void
1534 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
1535 const struct anv_image *image,
1536 VkImageAspectFlagBits aspect,
1537 enum isl_aux_usage aux_usage,
1538 enum isl_format format, struct isl_swizzle swizzle,
1539 uint32_t level, uint32_t base_layer, uint32_t layer_count,
1540 VkRect2D area, union isl_color_value clear_color)
1541 {
1542 assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1543
1544 /* We don't support planar images with multisampling yet */
1545 assert(image->n_planes == 1);
1546
1547 struct blorp_batch batch;
1548 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1549
1550 struct blorp_surf surf;
1551 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1552 aux_usage, &surf);
1553 anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
1554 level, base_layer, layer_count);
1555
1556 blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
1557 level, base_layer, layer_count,
1558 area.offset.x, area.offset.y,
1559 area.offset.x + area.extent.width,
1560 area.offset.y + area.extent.height,
1561 clear_color, NULL);
1562
1563 blorp_batch_finish(&batch);
1564 }
1565
1566 void
1567 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
1568 const struct anv_image *image,
1569 VkImageAspectFlagBits aspect, uint32_t level,
1570 uint32_t base_layer, uint32_t layer_count,
1571 enum isl_aux_op hiz_op)
1572 {
1573 assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
1574 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
1575 assert(anv_image_aspect_to_plane(image->aspects,
1576 VK_IMAGE_ASPECT_DEPTH_BIT) == 0);
1577
1578 struct blorp_batch batch;
1579 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1580
1581 struct blorp_surf surf;
1582 get_blorp_surf_for_anv_image(cmd_buffer->device,
1583 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1584 ISL_AUX_USAGE_HIZ, &surf);
1585 surf.clear_color.f32[0] = ANV_HZ_FC_VAL;
1586
1587 blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
1588
1589 blorp_batch_finish(&batch);
1590 }
1591
1592 void
1593 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1594 const struct anv_image *image,
1595 VkImageAspectFlagBits aspect,
1596 uint32_t base_layer, uint32_t layer_count,
1597 enum isl_aux_op mcs_op, bool predicate)
1598 {
1599 assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1600 assert(image->samples > 1);
1601 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1602
1603 /* Multisampling with multi-planar formats is not supported */
1604 assert(image->n_planes == 1);
1605
1606 struct blorp_batch batch;
1607 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1608 predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0);
1609
1610 struct blorp_surf surf;
1611 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1612 ANV_AUX_USAGE_DEFAULT, &surf);
1613
1614 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1615 *
1616 * "After Render target fast clear, pipe-control with color cache
1617 * write-flush must be issued before sending any DRAW commands on
1618 * that render target."
1619 *
1620 * This comment is a bit cryptic and doesn't really tell you what's going
1621 * or what's really needed. It appears that fast clear ops are not
1622 * properly synchronized with other drawing. This means that we cannot
1623 * have a fast clear operation in the pipe at the same time as other
1624 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1625 * that the contents of the previous draw hit the render target before we
1626 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1627 * that it is completed before any additional drawing occurs.
1628 */
1629 cmd_buffer->state.pending_pipe_bits |=
1630 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1631
1632 switch (mcs_op) {
1633 case ISL_AUX_OP_FAST_CLEAR:
1634 blorp_fast_clear(&batch, &surf, surf.surf->format,
1635 0, base_layer, layer_count,
1636 0, 0, image->extent.width, image->extent.height);
1637 break;
1638 case ISL_AUX_OP_FULL_RESOLVE:
1639 case ISL_AUX_OP_PARTIAL_RESOLVE:
1640 case ISL_AUX_OP_AMBIGUATE:
1641 default:
1642 unreachable("Unsupported MCS operation");
1643 }
1644
1645 cmd_buffer->state.pending_pipe_bits |=
1646 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1647
1648 blorp_batch_finish(&batch);
1649 }
1650
1651 void
1652 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1653 const struct anv_image *image,
1654 VkImageAspectFlagBits aspect, uint32_t level,
1655 uint32_t base_layer, uint32_t layer_count,
1656 enum isl_aux_op ccs_op, bool predicate)
1657 {
1658 assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1659 assert(image->samples == 1);
1660 assert(level < anv_image_aux_levels(image, aspect));
1661 /* Multi-LOD YcBcR is not allowed */
1662 assert(image->n_planes == 1 || level == 0);
1663 assert(base_layer + layer_count <=
1664 anv_image_aux_layers(image, aspect, level));
1665
1666 uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
1667 uint32_t width_div = image->format->planes[plane].denominator_scales[0];
1668 uint32_t height_div = image->format->planes[plane].denominator_scales[1];
1669 uint32_t level_width = anv_minify(image->extent.width, level) / width_div;
1670 uint32_t level_height = anv_minify(image->extent.height, level) / height_div;
1671
1672 struct blorp_batch batch;
1673 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1674 predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0);
1675
1676 struct blorp_surf surf;
1677 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1678 fast_clear_aux_usage(image, aspect),
1679 &surf);
1680
1681 if (ccs_op == ISL_AUX_OP_FULL_RESOLVE ||
1682 ccs_op == ISL_AUX_OP_PARTIAL_RESOLVE) {
1683 /* If we're doing a resolve operation, then we need the indirect clear
1684 * color. The clear and ambiguate operations just stomp the CCS to a
1685 * particular value and don't care about format or clear value.
1686 */
1687 const struct anv_address clear_color_addr =
1688 anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
1689 surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
1690 }
1691
1692 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1693 *
1694 * "After Render target fast clear, pipe-control with color cache
1695 * write-flush must be issued before sending any DRAW commands on
1696 * that render target."
1697 *
1698 * This comment is a bit cryptic and doesn't really tell you what's going
1699 * or what's really needed. It appears that fast clear ops are not
1700 * properly synchronized with other drawing. This means that we cannot
1701 * have a fast clear operation in the pipe at the same time as other
1702 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1703 * that the contents of the previous draw hit the render target before we
1704 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1705 * that it is completed before any additional drawing occurs.
1706 */
1707 cmd_buffer->state.pending_pipe_bits |=
1708 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1709
1710 switch (ccs_op) {
1711 case ISL_AUX_OP_FAST_CLEAR:
1712 blorp_fast_clear(&batch, &surf, surf.surf->format,
1713 level, base_layer, layer_count,
1714 0, 0, level_width, level_height);
1715 break;
1716 case ISL_AUX_OP_FULL_RESOLVE:
1717 case ISL_AUX_OP_PARTIAL_RESOLVE:
1718 blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
1719 surf.surf->format, ccs_op);
1720 break;
1721 case ISL_AUX_OP_AMBIGUATE:
1722 for (uint32_t a = 0; a < layer_count; a++) {
1723 const uint32_t layer = base_layer + a;
1724 blorp_ccs_ambiguate(&batch, &surf, level, layer);
1725 }
1726 break;
1727 default:
1728 unreachable("Unsupported CCS operation");
1729 }
1730
1731 cmd_buffer->state.pending_pipe_bits |=
1732 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1733
1734 blorp_batch_finish(&batch);
1735 }