anv: modify the internal concept of format to express multiple planes
[mesa.git] / src / intel / vulkan / anv_blorp.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
27 lookup_blorp_shader(struct blorp_context *blorp,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct anv_device *device = blorp->driver_ctx;
32
33 /* The blorp cache must be a real cache */
34 assert(device->blorp_shader_cache.cache);
35
36 struct anv_shader_bin *bin =
37 anv_pipeline_cache_search(&device->blorp_shader_cache, key, key_size);
38 if (!bin)
39 return false;
40
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
43 */
44 anv_shader_bin_unref(device, bin);
45
46 *kernel_out = bin->kernel.offset;
47 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
48
49 return true;
50 }
51
52 static bool
53 upload_blorp_shader(struct blorp_context *blorp,
54 const void *key, uint32_t key_size,
55 const void *kernel, uint32_t kernel_size,
56 const struct brw_stage_prog_data *prog_data,
57 uint32_t prog_data_size,
58 uint32_t *kernel_out, void *prog_data_out)
59 {
60 struct anv_device *device = blorp->driver_ctx;
61
62 /* The blorp cache must be a real cache */
63 assert(device->blorp_shader_cache.cache);
64
65 struct anv_pipeline_bind_map bind_map = {
66 .surface_count = 0,
67 .sampler_count = 0,
68 };
69
70 struct anv_shader_bin *bin =
71 anv_pipeline_cache_upload_kernel(&device->blorp_shader_cache,
72 key, key_size, kernel, kernel_size,
73 prog_data, prog_data_size, &bind_map);
74
75 if (!bin)
76 return false;
77
78 /* The cache already has a reference and it's not going anywhere so there
79 * is no need to hold a second reference.
80 */
81 anv_shader_bin_unref(device, bin);
82
83 *kernel_out = bin->kernel.offset;
84 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
85
86 return true;
87 }
88
89 void
90 anv_device_init_blorp(struct anv_device *device)
91 {
92 anv_pipeline_cache_init(&device->blorp_shader_cache, device, true);
93 blorp_init(&device->blorp, device, &device->isl_dev);
94 device->blorp.compiler = device->instance->physicalDevice.compiler;
95 device->blorp.mocs.tex = device->default_mocs;
96 device->blorp.mocs.rb = device->default_mocs;
97 device->blorp.mocs.vb = device->default_mocs;
98 device->blorp.lookup_shader = lookup_blorp_shader;
99 device->blorp.upload_shader = upload_blorp_shader;
100 switch (device->info.gen) {
101 case 7:
102 if (device->info.is_haswell) {
103 device->blorp.exec = gen75_blorp_exec;
104 } else {
105 device->blorp.exec = gen7_blorp_exec;
106 }
107 break;
108 case 8:
109 device->blorp.exec = gen8_blorp_exec;
110 break;
111 case 9:
112 device->blorp.exec = gen9_blorp_exec;
113 break;
114 case 10:
115 device->blorp.exec = gen10_blorp_exec;
116 break;
117 default:
118 unreachable("Unknown hardware generation");
119 }
120 }
121
122 void
123 anv_device_finish_blorp(struct anv_device *device)
124 {
125 blorp_finish(&device->blorp);
126 anv_pipeline_cache_finish(&device->blorp_shader_cache);
127 }
128
129 static void
130 get_blorp_surf_for_anv_buffer(struct anv_device *device,
131 struct anv_buffer *buffer, uint64_t offset,
132 uint32_t width, uint32_t height,
133 uint32_t row_pitch, enum isl_format format,
134 struct blorp_surf *blorp_surf,
135 struct isl_surf *isl_surf)
136 {
137 const struct isl_format_layout *fmtl =
138 isl_format_get_layout(format);
139 bool ok UNUSED;
140
141 /* ASTC is the only format which doesn't support linear layouts.
142 * Create an equivalently sized surface with ISL to get around this.
143 */
144 if (fmtl->txc == ISL_TXC_ASTC) {
145 /* Use an equivalently sized format */
146 format = ISL_FORMAT_R32G32B32A32_UINT;
147 assert(fmtl->bpb == isl_format_get_layout(format)->bpb);
148
149 /* Shrink the dimensions for the new format */
150 width = DIV_ROUND_UP(width, fmtl->bw);
151 height = DIV_ROUND_UP(height, fmtl->bh);
152 }
153
154 *blorp_surf = (struct blorp_surf) {
155 .surf = isl_surf,
156 .addr = {
157 .buffer = buffer->bo,
158 .offset = buffer->offset + offset,
159 },
160 };
161
162 ok = isl_surf_init(&device->isl_dev, isl_surf,
163 .dim = ISL_SURF_DIM_2D,
164 .format = format,
165 .width = width,
166 .height = height,
167 .depth = 1,
168 .levels = 1,
169 .array_len = 1,
170 .samples = 1,
171 .row_pitch = row_pitch,
172 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
173 ISL_SURF_USAGE_RENDER_TARGET_BIT,
174 .tiling_flags = ISL_TILING_LINEAR_BIT);
175 assert(ok);
176 }
177
178 static void
179 get_blorp_surf_for_anv_image(const struct anv_image *image,
180 VkImageAspectFlags aspect,
181 enum isl_aux_usage aux_usage,
182 struct blorp_surf *blorp_surf)
183 {
184 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT ||
185 aux_usage == ISL_AUX_USAGE_HIZ)
186 aux_usage = ISL_AUX_USAGE_NONE;
187
188 const struct anv_surface *surface =
189 anv_image_get_surface_for_aspect_mask(image, aspect);
190
191 *blorp_surf = (struct blorp_surf) {
192 .surf = &surface->isl,
193 .addr = {
194 .buffer = image->bo,
195 .offset = image->offset + surface->offset,
196 },
197 };
198
199 if (aux_usage != ISL_AUX_USAGE_NONE) {
200 blorp_surf->aux_surf = &image->aux_surface.isl,
201 blorp_surf->aux_addr = (struct blorp_address) {
202 .buffer = image->bo,
203 .offset = image->offset + image->aux_surface.offset,
204 };
205 blorp_surf->aux_usage = aux_usage;
206 }
207 }
208
209 void anv_CmdCopyImage(
210 VkCommandBuffer commandBuffer,
211 VkImage srcImage,
212 VkImageLayout srcImageLayout,
213 VkImage dstImage,
214 VkImageLayout dstImageLayout,
215 uint32_t regionCount,
216 const VkImageCopy* pRegions)
217 {
218 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
219 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
220 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
221
222 struct blorp_batch batch;
223 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
224
225 for (unsigned r = 0; r < regionCount; r++) {
226 VkOffset3D srcOffset =
227 anv_sanitize_image_offset(src_image->type, pRegions[r].srcOffset);
228 VkOffset3D dstOffset =
229 anv_sanitize_image_offset(dst_image->type, pRegions[r].dstOffset);
230 VkExtent3D extent =
231 anv_sanitize_image_extent(src_image->type, pRegions[r].extent);
232
233 unsigned dst_base_layer, layer_count;
234 if (dst_image->type == VK_IMAGE_TYPE_3D) {
235 dst_base_layer = pRegions[r].dstOffset.z;
236 layer_count = pRegions[r].extent.depth;
237 } else {
238 dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer;
239 layer_count =
240 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
241 }
242
243 unsigned src_base_layer;
244 if (src_image->type == VK_IMAGE_TYPE_3D) {
245 src_base_layer = pRegions[r].srcOffset.z;
246 } else {
247 src_base_layer = pRegions[r].srcSubresource.baseArrayLayer;
248 assert(layer_count ==
249 anv_get_layerCount(src_image, &pRegions[r].srcSubresource));
250 }
251
252 assert(pRegions[r].srcSubresource.aspectMask ==
253 pRegions[r].dstSubresource.aspectMask);
254
255 uint32_t a;
256 for_each_bit(a, pRegions[r].dstSubresource.aspectMask) {
257 VkImageAspectFlagBits aspect = (1 << a);
258
259 struct blorp_surf src_surf, dst_surf;
260 get_blorp_surf_for_anv_image(src_image, aspect, src_image->aux_usage,
261 &src_surf);
262 get_blorp_surf_for_anv_image(dst_image, aspect, dst_image->aux_usage,
263 &dst_surf);
264
265 for (unsigned i = 0; i < layer_count; i++) {
266 blorp_copy(&batch, &src_surf, pRegions[r].srcSubresource.mipLevel,
267 src_base_layer + i,
268 &dst_surf, pRegions[r].dstSubresource.mipLevel,
269 dst_base_layer + i,
270 srcOffset.x, srcOffset.y,
271 dstOffset.x, dstOffset.y,
272 extent.width, extent.height);
273 }
274 }
275 }
276
277 blorp_batch_finish(&batch);
278 }
279
280 static void
281 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
282 struct anv_buffer *anv_buffer,
283 struct anv_image *anv_image,
284 uint32_t regionCount,
285 const VkBufferImageCopy* pRegions,
286 bool buffer_to_image)
287 {
288 struct blorp_batch batch;
289 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
290
291 struct {
292 struct blorp_surf surf;
293 uint32_t level;
294 VkOffset3D offset;
295 } image, buffer, *src, *dst;
296
297 buffer.level = 0;
298 buffer.offset = (VkOffset3D) { 0, 0, 0 };
299
300 if (buffer_to_image) {
301 src = &buffer;
302 dst = &image;
303 } else {
304 src = &image;
305 dst = &buffer;
306 }
307
308 for (unsigned r = 0; r < regionCount; r++) {
309 const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
310
311 get_blorp_surf_for_anv_image(anv_image, aspect, anv_image->aux_usage,
312 &image.surf);
313 image.offset =
314 anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset);
315 image.level = pRegions[r].imageSubresource.mipLevel;
316
317 VkExtent3D extent =
318 anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent);
319 if (anv_image->type != VK_IMAGE_TYPE_3D) {
320 image.offset.z = pRegions[r].imageSubresource.baseArrayLayer;
321 extent.depth =
322 anv_get_layerCount(anv_image, &pRegions[r].imageSubresource);
323 }
324
325 const enum isl_format buffer_format =
326 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format,
327 aspect, VK_IMAGE_TILING_LINEAR);
328
329 const VkExtent3D bufferImageExtent = {
330 .width = pRegions[r].bufferRowLength ?
331 pRegions[r].bufferRowLength : extent.width,
332 .height = pRegions[r].bufferImageHeight ?
333 pRegions[r].bufferImageHeight : extent.height,
334 };
335
336 const struct isl_format_layout *buffer_fmtl =
337 isl_format_get_layout(buffer_format);
338
339 const uint32_t buffer_row_pitch =
340 DIV_ROUND_UP(bufferImageExtent.width, buffer_fmtl->bw) *
341 (buffer_fmtl->bpb / 8);
342
343 const uint32_t buffer_layer_stride =
344 DIV_ROUND_UP(bufferImageExtent.height, buffer_fmtl->bh) *
345 buffer_row_pitch;
346
347 struct isl_surf buffer_isl_surf;
348 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
349 anv_buffer, pRegions[r].bufferOffset,
350 extent.width, extent.height,
351 buffer_row_pitch, buffer_format,
352 &buffer.surf, &buffer_isl_surf);
353
354 for (unsigned z = 0; z < extent.depth; z++) {
355 blorp_copy(&batch, &src->surf, src->level, src->offset.z,
356 &dst->surf, dst->level, dst->offset.z,
357 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
358 extent.width, extent.height);
359
360 image.offset.z++;
361 buffer.surf.addr.offset += buffer_layer_stride;
362 }
363 }
364
365 blorp_batch_finish(&batch);
366 }
367
368 void anv_CmdCopyBufferToImage(
369 VkCommandBuffer commandBuffer,
370 VkBuffer srcBuffer,
371 VkImage dstImage,
372 VkImageLayout dstImageLayout,
373 uint32_t regionCount,
374 const VkBufferImageCopy* pRegions)
375 {
376 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
377 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
378 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
379
380 copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
381 regionCount, pRegions, true);
382 }
383
384 void anv_CmdCopyImageToBuffer(
385 VkCommandBuffer commandBuffer,
386 VkImage srcImage,
387 VkImageLayout srcImageLayout,
388 VkBuffer dstBuffer,
389 uint32_t regionCount,
390 const VkBufferImageCopy* pRegions)
391 {
392 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
393 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
394 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
395
396 copy_buffer_to_image(cmd_buffer, dst_buffer, src_image,
397 regionCount, pRegions, false);
398 }
399
400 static bool
401 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
402 {
403 bool flip = false;
404 if (*src0 > *src1) {
405 unsigned tmp = *src0;
406 *src0 = *src1;
407 *src1 = tmp;
408 flip = !flip;
409 }
410
411 if (*dst0 > *dst1) {
412 unsigned tmp = *dst0;
413 *dst0 = *dst1;
414 *dst1 = tmp;
415 flip = !flip;
416 }
417
418 return flip;
419 }
420
421 void anv_CmdBlitImage(
422 VkCommandBuffer commandBuffer,
423 VkImage srcImage,
424 VkImageLayout srcImageLayout,
425 VkImage dstImage,
426 VkImageLayout dstImageLayout,
427 uint32_t regionCount,
428 const VkImageBlit* pRegions,
429 VkFilter filter)
430
431 {
432 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
433 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
434 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
435
436 struct blorp_surf src, dst;
437
438 uint32_t gl_filter;
439 switch (filter) {
440 case VK_FILTER_NEAREST:
441 gl_filter = 0x2600; /* GL_NEAREST */
442 break;
443 case VK_FILTER_LINEAR:
444 gl_filter = 0x2601; /* GL_LINEAR */
445 break;
446 default:
447 unreachable("Invalid filter");
448 }
449
450 struct blorp_batch batch;
451 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
452
453 for (unsigned r = 0; r < regionCount; r++) {
454 const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
455 const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
456
457 get_blorp_surf_for_anv_image(src_image, src_res->aspectMask,
458 src_image->aux_usage, &src);
459 get_blorp_surf_for_anv_image(dst_image, dst_res->aspectMask,
460 dst_image->aux_usage, &dst);
461
462 struct anv_format_plane src_format =
463 anv_get_format_plane(&cmd_buffer->device->info, src_image->vk_format,
464 src_res->aspectMask, src_image->tiling);
465 struct anv_format_plane dst_format =
466 anv_get_format_plane(&cmd_buffer->device->info, dst_image->vk_format,
467 dst_res->aspectMask, dst_image->tiling);
468
469 unsigned dst_start, dst_end;
470 if (dst_image->type == VK_IMAGE_TYPE_3D) {
471 assert(dst_res->baseArrayLayer == 0);
472 dst_start = pRegions[r].dstOffsets[0].z;
473 dst_end = pRegions[r].dstOffsets[1].z;
474 } else {
475 dst_start = dst_res->baseArrayLayer;
476 dst_end = dst_start + anv_get_layerCount(dst_image, dst_res);
477 }
478
479 unsigned src_start, src_end;
480 if (src_image->type == VK_IMAGE_TYPE_3D) {
481 assert(src_res->baseArrayLayer == 0);
482 src_start = pRegions[r].srcOffsets[0].z;
483 src_end = pRegions[r].srcOffsets[1].z;
484 } else {
485 src_start = src_res->baseArrayLayer;
486 src_end = src_start + anv_get_layerCount(src_image, src_res);
487 }
488
489 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
490 float src_z_step = (float)(src_end + 1 - src_start) /
491 (float)(dst_end + 1 - dst_start);
492
493 if (flip_z) {
494 src_start = src_end;
495 src_z_step *= -1;
496 }
497
498 unsigned src_x0 = pRegions[r].srcOffsets[0].x;
499 unsigned src_x1 = pRegions[r].srcOffsets[1].x;
500 unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
501 unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
502 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
503
504 unsigned src_y0 = pRegions[r].srcOffsets[0].y;
505 unsigned src_y1 = pRegions[r].srcOffsets[1].y;
506 unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
507 unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
508 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
509
510 const unsigned num_layers = dst_end - dst_start;
511 for (unsigned i = 0; i < num_layers; i++) {
512 unsigned dst_z = dst_start + i;
513 unsigned src_z = src_start + i * src_z_step;
514
515 blorp_blit(&batch, &src, src_res->mipLevel, src_z,
516 src_format.isl_format, src_format.swizzle,
517 &dst, dst_res->mipLevel, dst_z,
518 dst_format.isl_format,
519 anv_swizzle_for_render(dst_format.swizzle),
520 src_x0, src_y0, src_x1, src_y1,
521 dst_x0, dst_y0, dst_x1, dst_y1,
522 gl_filter, flip_x, flip_y);
523 }
524
525 }
526
527 blorp_batch_finish(&batch);
528 }
529
530 static enum isl_format
531 isl_format_for_size(unsigned size_B)
532 {
533 switch (size_B) {
534 case 4: return ISL_FORMAT_R32_UINT;
535 case 8: return ISL_FORMAT_R32G32_UINT;
536 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
537 default:
538 unreachable("Not a power-of-two format size");
539 }
540 }
541
542 /**
543 * Returns the greatest common divisor of a and b that is a power of two.
544 */
545 static uint64_t
546 gcd_pow2_u64(uint64_t a, uint64_t b)
547 {
548 assert(a > 0 || b > 0);
549
550 unsigned a_log2 = ffsll(a) - 1;
551 unsigned b_log2 = ffsll(b) - 1;
552
553 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
554 * case, the MIN2() will take the other one. If both are 0 then we will
555 * hit the assert above.
556 */
557 return 1 << MIN2(a_log2, b_log2);
558 }
559
560 /* This is maximum possible width/height our HW can handle */
561 #define MAX_SURFACE_DIM (1ull << 14)
562
563 void anv_CmdCopyBuffer(
564 VkCommandBuffer commandBuffer,
565 VkBuffer srcBuffer,
566 VkBuffer dstBuffer,
567 uint32_t regionCount,
568 const VkBufferCopy* pRegions)
569 {
570 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
571 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
572 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
573
574 struct blorp_batch batch;
575 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
576
577 for (unsigned r = 0; r < regionCount; r++) {
578 struct blorp_address src = {
579 .buffer = src_buffer->bo,
580 .offset = src_buffer->offset + pRegions[r].srcOffset,
581 };
582 struct blorp_address dst = {
583 .buffer = dst_buffer->bo,
584 .offset = dst_buffer->offset + pRegions[r].dstOffset,
585 };
586
587 blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
588 }
589
590 blorp_batch_finish(&batch);
591 }
592
593 void anv_CmdUpdateBuffer(
594 VkCommandBuffer commandBuffer,
595 VkBuffer dstBuffer,
596 VkDeviceSize dstOffset,
597 VkDeviceSize dataSize,
598 const void* pData)
599 {
600 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
601 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
602
603 struct blorp_batch batch;
604 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
605
606 /* We can't quite grab a full block because the state stream needs a
607 * little data at the top to build its linked list.
608 */
609 const uint32_t max_update_size =
610 cmd_buffer->device->dynamic_state_pool.block_size - 64;
611
612 assert(max_update_size < MAX_SURFACE_DIM * 4);
613
614 /* We're about to read data that was written from the CPU. Flush the
615 * texture cache so we don't get anything stale.
616 */
617 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
618
619 while (dataSize) {
620 const uint32_t copy_size = MIN2(dataSize, max_update_size);
621
622 struct anv_state tmp_data =
623 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
624
625 memcpy(tmp_data.map, pData, copy_size);
626
627 anv_state_flush(cmd_buffer->device, tmp_data);
628
629 struct blorp_address src = {
630 .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
631 .offset = tmp_data.offset,
632 };
633 struct blorp_address dst = {
634 .buffer = dst_buffer->bo,
635 .offset = dst_buffer->offset + dstOffset,
636 };
637
638 blorp_buffer_copy(&batch, src, dst, copy_size);
639
640 dataSize -= copy_size;
641 dstOffset += copy_size;
642 pData = (void *)pData + copy_size;
643 }
644
645 blorp_batch_finish(&batch);
646 }
647
648 void anv_CmdFillBuffer(
649 VkCommandBuffer commandBuffer,
650 VkBuffer dstBuffer,
651 VkDeviceSize dstOffset,
652 VkDeviceSize fillSize,
653 uint32_t data)
654 {
655 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
656 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
657 struct blorp_surf surf;
658 struct isl_surf isl_surf;
659
660 struct blorp_batch batch;
661 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
662
663 fillSize = anv_buffer_get_range(dst_buffer, dstOffset, fillSize);
664
665 /* From the Vulkan spec:
666 *
667 * "size is the number of bytes to fill, and must be either a multiple
668 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
669 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
670 * buffer is not a multiple of 4, then the nearest smaller multiple is
671 * used."
672 */
673 fillSize &= ~3ull;
674
675 /* First, we compute the biggest format that can be used with the
676 * given offsets and size.
677 */
678 int bs = 16;
679 bs = gcd_pow2_u64(bs, dstOffset);
680 bs = gcd_pow2_u64(bs, fillSize);
681 enum isl_format isl_format = isl_format_for_size(bs);
682
683 union isl_color_value color = {
684 .u32 = { data, data, data, data },
685 };
686
687 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
688 while (fillSize >= max_fill_size) {
689 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
690 dst_buffer, dstOffset,
691 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
692 MAX_SURFACE_DIM * bs, isl_format,
693 &surf, &isl_surf);
694
695 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
696 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
697 color, NULL);
698 fillSize -= max_fill_size;
699 dstOffset += max_fill_size;
700 }
701
702 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
703 assert(height < MAX_SURFACE_DIM);
704 if (height != 0) {
705 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
706 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
707 dst_buffer, dstOffset,
708 MAX_SURFACE_DIM, height,
709 MAX_SURFACE_DIM * bs, isl_format,
710 &surf, &isl_surf);
711
712 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
713 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
714 color, NULL);
715 fillSize -= rect_fill_size;
716 dstOffset += rect_fill_size;
717 }
718
719 if (fillSize != 0) {
720 const uint32_t width = fillSize / bs;
721 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
722 dst_buffer, dstOffset,
723 width, 1,
724 width * bs, isl_format,
725 &surf, &isl_surf);
726
727 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
728 0, 0, 1, 0, 0, width, 1,
729 color, NULL);
730 }
731
732 blorp_batch_finish(&batch);
733 }
734
735 void anv_CmdClearColorImage(
736 VkCommandBuffer commandBuffer,
737 VkImage _image,
738 VkImageLayout imageLayout,
739 const VkClearColorValue* pColor,
740 uint32_t rangeCount,
741 const VkImageSubresourceRange* pRanges)
742 {
743 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
744 ANV_FROM_HANDLE(anv_image, image, _image);
745
746 static const bool color_write_disable[4] = { false, false, false, false };
747
748 struct blorp_batch batch;
749 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
750
751 struct blorp_surf surf;
752 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
753 image->aux_usage, &surf);
754
755 for (unsigned r = 0; r < rangeCount; r++) {
756 if (pRanges[r].aspectMask == 0)
757 continue;
758
759 assert(pRanges[r].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
760
761 struct anv_format_plane src_format =
762 anv_get_format_plane(&cmd_buffer->device->info, image->vk_format,
763 VK_IMAGE_ASPECT_COLOR_BIT, image->tiling);
764
765 unsigned base_layer = pRanges[r].baseArrayLayer;
766 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]);
767
768 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
769 const unsigned level = pRanges[r].baseMipLevel + i;
770 const unsigned level_width = anv_minify(image->extent.width, level);
771 const unsigned level_height = anv_minify(image->extent.height, level);
772
773 if (image->type == VK_IMAGE_TYPE_3D) {
774 base_layer = 0;
775 layer_count = anv_minify(image->extent.depth, level);
776 }
777
778 blorp_clear(&batch, &surf,
779 src_format.isl_format, src_format.swizzle,
780 level, base_layer, layer_count,
781 0, 0, level_width, level_height,
782 vk_to_isl_color(*pColor), color_write_disable);
783 }
784 }
785
786 blorp_batch_finish(&batch);
787 }
788
789 void anv_CmdClearDepthStencilImage(
790 VkCommandBuffer commandBuffer,
791 VkImage image_h,
792 VkImageLayout imageLayout,
793 const VkClearDepthStencilValue* pDepthStencil,
794 uint32_t rangeCount,
795 const VkImageSubresourceRange* pRanges)
796 {
797 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
798 ANV_FROM_HANDLE(anv_image, image, image_h);
799
800 struct blorp_batch batch;
801 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
802
803 struct blorp_surf depth, stencil;
804 if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
805 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
806 ISL_AUX_USAGE_NONE, &depth);
807 } else {
808 memset(&depth, 0, sizeof(depth));
809 }
810
811 if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
812 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
813 ISL_AUX_USAGE_NONE, &stencil);
814 } else {
815 memset(&stencil, 0, sizeof(stencil));
816 }
817
818 for (unsigned r = 0; r < rangeCount; r++) {
819 if (pRanges[r].aspectMask == 0)
820 continue;
821
822 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
823 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
824
825 unsigned base_layer = pRanges[r].baseArrayLayer;
826 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]);
827
828 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
829 const unsigned level = pRanges[r].baseMipLevel + i;
830 const unsigned level_width = anv_minify(image->extent.width, level);
831 const unsigned level_height = anv_minify(image->extent.height, level);
832
833 if (image->type == VK_IMAGE_TYPE_3D)
834 layer_count = anv_minify(image->extent.depth, level);
835
836 blorp_clear_depth_stencil(&batch, &depth, &stencil,
837 level, base_layer, layer_count,
838 0, 0, level_width, level_height,
839 clear_depth, pDepthStencil->depth,
840 clear_stencil ? 0xff : 0,
841 pDepthStencil->stencil);
842 }
843 }
844
845 blorp_batch_finish(&batch);
846 }
847
848 VkResult
849 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
850 uint32_t num_entries,
851 uint32_t *state_offset,
852 struct anv_state *bt_state)
853 {
854 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
855 state_offset);
856 if (bt_state->map == NULL) {
857 /* We ran out of space. Grab a new binding table block. */
858 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
859 if (result != VK_SUCCESS)
860 return result;
861
862 /* Re-emit state base addresses so we get the new surface state base
863 * address before we start emitting binding tables etc.
864 */
865 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
866
867 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
868 state_offset);
869 assert(bt_state->map != NULL);
870 }
871
872 return VK_SUCCESS;
873 }
874
875 static VkResult
876 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
877 struct anv_state surface_state,
878 uint32_t *bt_offset)
879 {
880 uint32_t state_offset;
881 struct anv_state bt_state;
882
883 VkResult result =
884 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
885 &bt_state);
886 if (result != VK_SUCCESS)
887 return result;
888
889 uint32_t *bt_map = bt_state.map;
890 bt_map[0] = surface_state.offset + state_offset;
891
892 *bt_offset = bt_state.offset;
893 return VK_SUCCESS;
894 }
895
896 static void
897 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
898 struct blorp_batch *batch,
899 const VkClearAttachment *attachment,
900 uint32_t rectCount, const VkClearRect *pRects)
901 {
902 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
903 const uint32_t color_att = attachment->colorAttachment;
904 const uint32_t att_idx = subpass->color_attachments[color_att].attachment;
905
906 if (att_idx == VK_ATTACHMENT_UNUSED)
907 return;
908
909 struct anv_render_pass_attachment *pass_att =
910 &cmd_buffer->state.pass->attachments[att_idx];
911 struct anv_attachment_state *att_state =
912 &cmd_buffer->state.attachments[att_idx];
913
914 uint32_t binding_table;
915 VkResult result =
916 binding_table_for_surface_state(cmd_buffer, att_state->color.state,
917 &binding_table);
918 if (result != VK_SUCCESS)
919 return;
920
921 union isl_color_value clear_color =
922 vk_to_isl_color(attachment->clearValue.color);
923
924 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
925 if (subpass->view_mask) {
926 uint32_t view_idx;
927 for_each_bit(view_idx, subpass->view_mask) {
928 for (uint32_t r = 0; r < rectCount; ++r) {
929 const VkOffset2D offset = pRects[r].rect.offset;
930 const VkExtent2D extent = pRects[r].rect.extent;
931 blorp_clear_attachments(batch, binding_table,
932 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
933 view_idx, 1,
934 offset.x, offset.y,
935 offset.x + extent.width,
936 offset.y + extent.height,
937 true, clear_color, false, 0.0f, 0, 0);
938 }
939 }
940 return;
941 }
942
943 for (uint32_t r = 0; r < rectCount; ++r) {
944 const VkOffset2D offset = pRects[r].rect.offset;
945 const VkExtent2D extent = pRects[r].rect.extent;
946 blorp_clear_attachments(batch, binding_table,
947 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
948 pRects[r].baseArrayLayer,
949 pRects[r].layerCount,
950 offset.x, offset.y,
951 offset.x + extent.width, offset.y + extent.height,
952 true, clear_color, false, 0.0f, 0, 0);
953 }
954 }
955
956 static void
957 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
958 struct blorp_batch *batch,
959 const VkClearAttachment *attachment,
960 uint32_t rectCount, const VkClearRect *pRects)
961 {
962 static const union isl_color_value color_value = { .u32 = { 0, } };
963 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
964 const uint32_t att_idx = subpass->depth_stencil_attachment.attachment;
965
966 if (att_idx == VK_ATTACHMENT_UNUSED)
967 return;
968
969 struct anv_render_pass_attachment *pass_att =
970 &cmd_buffer->state.pass->attachments[att_idx];
971
972 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
973 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
974
975 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
976 if (clear_depth) {
977 depth_format = anv_get_isl_format(&cmd_buffer->device->info,
978 pass_att->format,
979 VK_IMAGE_ASPECT_DEPTH_BIT,
980 VK_IMAGE_TILING_OPTIMAL);
981 }
982
983 uint32_t binding_table;
984 VkResult result =
985 binding_table_for_surface_state(cmd_buffer,
986 cmd_buffer->state.null_surface_state,
987 &binding_table);
988 if (result != VK_SUCCESS)
989 return;
990
991 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
992 if (subpass->view_mask) {
993 uint32_t view_idx;
994 for_each_bit(view_idx, subpass->view_mask) {
995 for (uint32_t r = 0; r < rectCount; ++r) {
996 const VkOffset2D offset = pRects[r].rect.offset;
997 const VkExtent2D extent = pRects[r].rect.extent;
998 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
999 blorp_clear_attachments(batch, binding_table,
1000 depth_format, pass_att->samples,
1001 view_idx, 1,
1002 offset.x, offset.y,
1003 offset.x + extent.width,
1004 offset.y + extent.height,
1005 false, color_value,
1006 clear_depth, value.depth,
1007 clear_stencil ? 0xff : 0, value.stencil);
1008 }
1009 }
1010 return;
1011 }
1012
1013 for (uint32_t r = 0; r < rectCount; ++r) {
1014 const VkOffset2D offset = pRects[r].rect.offset;
1015 const VkExtent2D extent = pRects[r].rect.extent;
1016 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1017 blorp_clear_attachments(batch, binding_table,
1018 depth_format, pass_att->samples,
1019 pRects[r].baseArrayLayer,
1020 pRects[r].layerCount,
1021 offset.x, offset.y,
1022 offset.x + extent.width, offset.y + extent.height,
1023 false, color_value,
1024 clear_depth, value.depth,
1025 clear_stencil ? 0xff : 0, value.stencil);
1026 }
1027 }
1028
1029 void anv_CmdClearAttachments(
1030 VkCommandBuffer commandBuffer,
1031 uint32_t attachmentCount,
1032 const VkClearAttachment* pAttachments,
1033 uint32_t rectCount,
1034 const VkClearRect* pRects)
1035 {
1036 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1037
1038 /* Because this gets called within a render pass, we tell blorp not to
1039 * trash our depth and stencil buffers.
1040 */
1041 struct blorp_batch batch;
1042 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1043 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1044
1045 for (uint32_t a = 0; a < attachmentCount; ++a) {
1046 if (pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1047 clear_color_attachment(cmd_buffer, &batch,
1048 &pAttachments[a],
1049 rectCount, pRects);
1050 } else {
1051 clear_depth_stencil_attachment(cmd_buffer, &batch,
1052 &pAttachments[a],
1053 rectCount, pRects);
1054 }
1055 }
1056
1057 blorp_batch_finish(&batch);
1058 }
1059
1060 enum subpass_stage {
1061 SUBPASS_STAGE_LOAD,
1062 SUBPASS_STAGE_DRAW,
1063 SUBPASS_STAGE_RESOLVE,
1064 };
1065
1066 static bool
1067 subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
1068 {
1069 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
1070 uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
1071
1072 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1073 uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
1074 if (a == VK_ATTACHMENT_UNUSED)
1075 continue;
1076
1077 assert(a < cmd_state->pass->attachment_count);
1078 if (cmd_state->attachments[a].pending_clear_aspects) {
1079 return true;
1080 }
1081 }
1082
1083 if (ds != VK_ATTACHMENT_UNUSED) {
1084 assert(ds < cmd_state->pass->attachment_count);
1085 if (cmd_state->attachments[ds].pending_clear_aspects)
1086 return true;
1087 }
1088
1089 return false;
1090 }
1091
1092 void
1093 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
1094 {
1095 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
1096 const VkRect2D render_area = cmd_buffer->state.render_area;
1097
1098
1099 if (!subpass_needs_clear(cmd_buffer))
1100 return;
1101
1102 /* Because this gets called within a render pass, we tell blorp not to
1103 * trash our depth and stencil buffers.
1104 */
1105 struct blorp_batch batch;
1106 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1107 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1108
1109 VkClearRect clear_rect = {
1110 .rect = cmd_buffer->state.render_area,
1111 .baseArrayLayer = 0,
1112 .layerCount = cmd_buffer->state.framebuffer->layers,
1113 };
1114
1115 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1116 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1117 const uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
1118 if (a == VK_ATTACHMENT_UNUSED)
1119 continue;
1120
1121 assert(a < cmd_state->pass->attachment_count);
1122 struct anv_attachment_state *att_state = &cmd_state->attachments[a];
1123
1124 if (!att_state->pending_clear_aspects)
1125 continue;
1126
1127 assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1128
1129 struct anv_image_view *iview = fb->attachments[a];
1130 const struct anv_image *image = iview->image;
1131 struct blorp_surf surf;
1132 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
1133 att_state->aux_usage, &surf);
1134
1135 if (att_state->fast_clear) {
1136 surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
1137
1138 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1139 *
1140 * "After Render target fast clear, pipe-control with color cache
1141 * write-flush must be issued before sending any DRAW commands on
1142 * that render target."
1143 *
1144 * This comment is a bit cryptic and doesn't really tell you what's
1145 * going or what's really needed. It appears that fast clear ops are
1146 * not properly synchronized with other drawing. This means that we
1147 * cannot have a fast clear operation in the pipe at the same time as
1148 * other regular drawing operations. We need to use a PIPE_CONTROL
1149 * to ensure that the contents of the previous draw hit the render
1150 * target before we resolve and then use a second PIPE_CONTROL after
1151 * the resolve to ensure that it is completed before any additional
1152 * drawing occurs.
1153 */
1154 cmd_buffer->state.pending_pipe_bits |=
1155 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1156
1157 blorp_fast_clear(&batch, &surf, iview->isl.format,
1158 iview->isl.base_level,
1159 iview->isl.base_array_layer, fb->layers,
1160 render_area.offset.x, render_area.offset.y,
1161 render_area.offset.x + render_area.extent.width,
1162 render_area.offset.y + render_area.extent.height);
1163
1164 cmd_buffer->state.pending_pipe_bits |=
1165 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1166 } else {
1167 blorp_clear(&batch, &surf, iview->isl.format,
1168 anv_swizzle_for_render(iview->isl.swizzle),
1169 iview->isl.base_level,
1170 iview->isl.base_array_layer, fb->layers,
1171 render_area.offset.x, render_area.offset.y,
1172 render_area.offset.x + render_area.extent.width,
1173 render_area.offset.y + render_area.extent.height,
1174 vk_to_isl_color(att_state->clear_value.color), NULL);
1175 }
1176
1177 att_state->pending_clear_aspects = 0;
1178 }
1179
1180 const uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
1181 assert(ds == VK_ATTACHMENT_UNUSED || ds < cmd_state->pass->attachment_count);
1182
1183 if (ds != VK_ATTACHMENT_UNUSED &&
1184 cmd_state->attachments[ds].pending_clear_aspects) {
1185
1186 VkClearAttachment clear_att = {
1187 .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
1188 .clearValue = cmd_state->attachments[ds].clear_value,
1189 };
1190
1191
1192 const uint8_t gen = cmd_buffer->device->info.gen;
1193 bool clear_with_hiz = gen >= 8 && cmd_state->attachments[ds].aux_usage ==
1194 ISL_AUX_USAGE_HIZ;
1195 const struct anv_image_view *iview = fb->attachments[ds];
1196
1197 if (clear_with_hiz) {
1198 const bool clear_depth = clear_att.aspectMask &
1199 VK_IMAGE_ASPECT_DEPTH_BIT;
1200 const bool clear_stencil = clear_att.aspectMask &
1201 VK_IMAGE_ASPECT_STENCIL_BIT;
1202
1203 /* Check against restrictions for depth buffer clearing. A great GPU
1204 * performance benefit isn't expected when using the HZ sequence for
1205 * stencil-only clears. Therefore, we don't emit a HZ op sequence for
1206 * a stencil clear in addition to using the BLORP-fallback for depth.
1207 */
1208 if (clear_depth) {
1209 if (!blorp_can_hiz_clear_depth(gen, iview->isl.format,
1210 iview->image->samples,
1211 render_area.offset.x,
1212 render_area.offset.y,
1213 render_area.offset.x +
1214 render_area.extent.width,
1215 render_area.offset.y +
1216 render_area.extent.height)) {
1217 clear_with_hiz = false;
1218 } else if (clear_att.clearValue.depthStencil.depth !=
1219 ANV_HZ_FC_VAL) {
1220 /* Don't enable fast depth clears for any color not equal to
1221 * ANV_HZ_FC_VAL.
1222 */
1223 clear_with_hiz = false;
1224 } else if (gen == 8 &&
1225 anv_can_sample_with_hiz(&cmd_buffer->device->info,
1226 iview->aspect_mask,
1227 iview->image->samples)) {
1228 /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
1229 * fast-cleared portion of a HiZ buffer. Testing has revealed
1230 * that Gen8 only supports returning 0.0f. Gens prior to gen8 do
1231 * not support this feature at all.
1232 */
1233 clear_with_hiz = false;
1234 }
1235 }
1236
1237 if (clear_with_hiz) {
1238 blorp_gen8_hiz_clear_attachments(&batch, iview->image->samples,
1239 render_area.offset.x,
1240 render_area.offset.y,
1241 render_area.offset.x +
1242 render_area.extent.width,
1243 render_area.offset.y +
1244 render_area.extent.height,
1245 clear_depth, clear_stencil,
1246 clear_att.clearValue.
1247 depthStencil.stencil);
1248
1249 /* From the SKL PRM, Depth Buffer Clear:
1250 *
1251 * Depth Buffer Clear Workaround
1252 * Depth buffer clear pass using any of the methods (WM_STATE,
1253 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
1254 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
1255 * “set” before starting to render. DepthStall and DepthFlush are
1256 * not needed between consecutive depth clear passes nor is it
1257 * required if the depth-clear pass was done with “full_surf_clear”
1258 * bit set in the 3DSTATE_WM_HZ_OP.
1259 */
1260 if (clear_depth) {
1261 cmd_buffer->state.pending_pipe_bits |=
1262 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
1263 }
1264 }
1265 }
1266
1267 if (!clear_with_hiz) {
1268 clear_depth_stencil_attachment(cmd_buffer, &batch,
1269 &clear_att, 1, &clear_rect);
1270 }
1271
1272 cmd_state->attachments[ds].pending_clear_aspects = 0;
1273 }
1274
1275 blorp_batch_finish(&batch);
1276 }
1277
1278 static void
1279 resolve_image(struct blorp_batch *batch,
1280 const struct anv_image *src_image,
1281 enum isl_aux_usage src_aux_usage,
1282 uint32_t src_level, uint32_t src_layer,
1283 const struct anv_image *dst_image,
1284 enum isl_aux_usage dst_aux_usage,
1285 uint32_t dst_level, uint32_t dst_layer,
1286 VkImageAspectFlags aspect_mask,
1287 uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
1288 uint32_t width, uint32_t height)
1289 {
1290 assert(src_image->type == VK_IMAGE_TYPE_2D);
1291 assert(src_image->samples > 1);
1292 assert(dst_image->type == VK_IMAGE_TYPE_2D);
1293 assert(dst_image->samples == 1);
1294
1295 uint32_t a;
1296 for_each_bit(a, aspect_mask) {
1297 VkImageAspectFlagBits aspect = 1 << a;
1298
1299 struct blorp_surf src_surf, dst_surf;
1300 get_blorp_surf_for_anv_image(src_image, aspect,
1301 src_aux_usage, &src_surf);
1302 get_blorp_surf_for_anv_image(dst_image, aspect,
1303 dst_aux_usage, &dst_surf);
1304
1305 blorp_blit(batch,
1306 &src_surf, src_level, src_layer,
1307 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1308 &dst_surf, dst_level, dst_layer,
1309 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1310 src_x, src_y, src_x + width, src_y + height,
1311 dst_x, dst_y, dst_x + width, dst_y + height,
1312 0x2600 /* GL_NEAREST */, false, false);
1313 }
1314 }
1315
1316 void anv_CmdResolveImage(
1317 VkCommandBuffer commandBuffer,
1318 VkImage srcImage,
1319 VkImageLayout srcImageLayout,
1320 VkImage dstImage,
1321 VkImageLayout dstImageLayout,
1322 uint32_t regionCount,
1323 const VkImageResolve* pRegions)
1324 {
1325 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1326 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
1327 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
1328
1329 struct blorp_batch batch;
1330 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1331
1332 for (uint32_t r = 0; r < regionCount; r++) {
1333 assert(pRegions[r].srcSubresource.aspectMask ==
1334 pRegions[r].dstSubresource.aspectMask);
1335 assert(anv_get_layerCount(src_image, &pRegions[r].srcSubresource) ==
1336 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource));
1337
1338 const uint32_t layer_count =
1339 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
1340
1341 for (uint32_t layer = 0; layer < layer_count; layer++) {
1342 resolve_image(&batch,
1343 src_image, src_image->aux_usage,
1344 pRegions[r].srcSubresource.mipLevel,
1345 pRegions[r].srcSubresource.baseArrayLayer + layer,
1346 dst_image, dst_image->aux_usage,
1347 pRegions[r].dstSubresource.mipLevel,
1348 pRegions[r].dstSubresource.baseArrayLayer + layer,
1349 pRegions[r].dstSubresource.aspectMask,
1350 pRegions[r].srcOffset.x, pRegions[r].srcOffset.y,
1351 pRegions[r].dstOffset.x, pRegions[r].dstOffset.y,
1352 pRegions[r].extent.width, pRegions[r].extent.height);
1353 }
1354 }
1355
1356 blorp_batch_finish(&batch);
1357 }
1358
1359 void
1360 anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer,
1361 const struct anv_image *image,
1362 const uint32_t base_level, const uint32_t level_count,
1363 const uint32_t base_layer, uint32_t layer_count)
1364 {
1365 assert(image->type == VK_IMAGE_TYPE_3D || image->extent.depth == 1);
1366
1367 if (image->type == VK_IMAGE_TYPE_3D) {
1368 assert(base_layer == 0);
1369 assert(layer_count == anv_minify(image->extent.depth, base_level));
1370 }
1371
1372 struct blorp_batch batch;
1373 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1374
1375 struct blorp_surf surf;
1376 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
1377 image->aux_usage == ISL_AUX_USAGE_NONE ?
1378 ISL_AUX_USAGE_CCS_D : image->aux_usage,
1379 &surf);
1380
1381 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1382 *
1383 * "After Render target fast clear, pipe-control with color cache
1384 * write-flush must be issued before sending any DRAW commands on
1385 * that render target."
1386 *
1387 * This comment is a bit cryptic and doesn't really tell you what's going
1388 * or what's really needed. It appears that fast clear ops are not
1389 * properly synchronized with other drawing. This means that we cannot
1390 * have a fast clear operation in the pipe at the same time as other
1391 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1392 * that the contents of the previous draw hit the render target before we
1393 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1394 * that it is completed before any additional drawing occurs.
1395 */
1396 cmd_buffer->state.pending_pipe_bits |=
1397 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1398
1399 for (uint32_t l = 0; l < level_count; l++) {
1400 const uint32_t level = base_level + l;
1401
1402 const VkExtent3D extent = {
1403 .width = anv_minify(image->extent.width, level),
1404 .height = anv_minify(image->extent.height, level),
1405 .depth = anv_minify(image->extent.depth, level),
1406 };
1407
1408 if (image->type == VK_IMAGE_TYPE_3D)
1409 layer_count = extent.depth;
1410
1411 assert(level < anv_image_aux_levels(image));
1412 assert(base_layer + layer_count <= anv_image_aux_layers(image, level));
1413 blorp_fast_clear(&batch, &surf, surf.surf->format,
1414 level, base_layer, layer_count,
1415 0, 0, extent.width, extent.height);
1416 }
1417
1418 cmd_buffer->state.pending_pipe_bits |=
1419 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1420 }
1421
1422 void
1423 anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
1424 {
1425 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1426 struct anv_subpass *subpass = cmd_buffer->state.subpass;
1427
1428 if (subpass->has_resolve) {
1429 struct blorp_batch batch;
1430 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1431
1432 /* We are about to do some MSAA resolves. We need to flush so that the
1433 * result of writes to the MSAA color attachments show up in the sampler
1434 * when we blit to the single-sampled resolve target.
1435 */
1436 cmd_buffer->state.pending_pipe_bits |=
1437 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
1438 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
1439
1440 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1441 uint32_t src_att = subpass->color_attachments[i].attachment;
1442 uint32_t dst_att = subpass->resolve_attachments[i].attachment;
1443
1444 if (dst_att == VK_ATTACHMENT_UNUSED)
1445 continue;
1446
1447 assert(src_att < cmd_buffer->state.pass->attachment_count);
1448 assert(dst_att < cmd_buffer->state.pass->attachment_count);
1449
1450 if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) {
1451 /* From the Vulkan 1.0 spec:
1452 *
1453 * If the first use of an attachment in a render pass is as a
1454 * resolve attachment, then the loadOp is effectively ignored
1455 * as the resolve is guaranteed to overwrite all pixels in the
1456 * render area.
1457 */
1458 cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0;
1459 }
1460
1461 struct anv_image_view *src_iview = fb->attachments[src_att];
1462 struct anv_image_view *dst_iview = fb->attachments[dst_att];
1463
1464 enum isl_aux_usage src_aux_usage =
1465 cmd_buffer->state.attachments[src_att].aux_usage;
1466 enum isl_aux_usage dst_aux_usage =
1467 cmd_buffer->state.attachments[dst_att].aux_usage;
1468
1469 const VkRect2D render_area = cmd_buffer->state.render_area;
1470
1471 assert(src_iview->aspect_mask == dst_iview->aspect_mask);
1472
1473 resolve_image(&batch, src_iview->image, src_aux_usage,
1474 src_iview->isl.base_level,
1475 src_iview->isl.base_array_layer,
1476 dst_iview->image, dst_aux_usage,
1477 dst_iview->isl.base_level,
1478 dst_iview->isl.base_array_layer,
1479 src_iview->aspect_mask,
1480 render_area.offset.x, render_area.offset.y,
1481 render_area.offset.x, render_area.offset.y,
1482 render_area.extent.width, render_area.extent.height);
1483 }
1484
1485 blorp_batch_finish(&batch);
1486 }
1487 }
1488
1489 void
1490 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
1491 const struct anv_image *image,
1492 VkImageAspectFlagBits aspect,
1493 uint32_t base_level, uint32_t level_count,
1494 uint32_t base_layer, uint32_t layer_count)
1495 {
1496 struct blorp_batch batch;
1497 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1498
1499 struct blorp_surf surf;
1500 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
1501 ISL_AUX_USAGE_NONE, &surf);
1502
1503 struct blorp_surf shadow_surf = {
1504 .surf = &image->shadow_surface.isl,
1505 .addr = {
1506 .buffer = image->bo,
1507 .offset = image->offset + image->shadow_surface.offset,
1508 },
1509 };
1510
1511 for (uint32_t l = 0; l < level_count; l++) {
1512 const uint32_t level = base_level + l;
1513
1514 const VkExtent3D extent = {
1515 .width = anv_minify(image->extent.width, level),
1516 .height = anv_minify(image->extent.height, level),
1517 .depth = anv_minify(image->extent.depth, level),
1518 };
1519
1520 if (image->type == VK_IMAGE_TYPE_3D)
1521 layer_count = extent.depth;
1522
1523 for (uint32_t a = 0; a < layer_count; a++) {
1524 const uint32_t layer = base_layer + a;
1525
1526 blorp_copy(&batch, &surf, level, layer,
1527 &shadow_surf, level, layer,
1528 0, 0, 0, 0, extent.width, extent.height);
1529 }
1530 }
1531
1532 blorp_batch_finish(&batch);
1533 }
1534
1535 void
1536 anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
1537 const struct anv_image *image,
1538 enum blorp_hiz_op op)
1539 {
1540 assert(image);
1541
1542 /* Don't resolve depth buffers without an auxiliary HiZ buffer and
1543 * don't perform such a resolve on gens that don't support it.
1544 */
1545 if (cmd_buffer->device->info.gen < 8 ||
1546 image->aux_usage != ISL_AUX_USAGE_HIZ)
1547 return;
1548
1549 assert(op == BLORP_HIZ_OP_HIZ_RESOLVE ||
1550 op == BLORP_HIZ_OP_DEPTH_RESOLVE);
1551
1552 struct blorp_batch batch;
1553 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1554
1555 struct blorp_surf surf;
1556 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
1557 ISL_AUX_USAGE_NONE, &surf);
1558
1559 /* Manually add the aux HiZ surf */
1560 surf.aux_surf = &image->aux_surface.isl,
1561 surf.aux_addr = (struct blorp_address) {
1562 .buffer = image->bo,
1563 .offset = image->offset + image->aux_surface.offset,
1564 };
1565 surf.aux_usage = ISL_AUX_USAGE_HIZ;
1566
1567 surf.clear_color.f32[0] = ANV_HZ_FC_VAL;
1568
1569 blorp_hiz_op(&batch, &surf, 0, 0, 1, op);
1570 blorp_batch_finish(&batch);
1571 }
1572
1573 void
1574 anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
1575 const struct anv_state surface_state,
1576 const struct anv_image * const image,
1577 const uint8_t level, const uint32_t layer_count,
1578 const enum blorp_fast_clear_op op)
1579 {
1580 assert(cmd_buffer && image);
1581
1582 /* The resolved subresource range must have a CCS buffer. */
1583 assert(level < anv_image_aux_levels(image));
1584 assert(layer_count <= anv_image_aux_layers(image, level));
1585 assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT && image->samples == 1);
1586
1587 /* Create a binding table for this surface state. */
1588 uint32_t binding_table;
1589 VkResult result =
1590 binding_table_for_surface_state(cmd_buffer, surface_state,
1591 &binding_table);
1592 if (result != VK_SUCCESS)
1593 return;
1594
1595 struct blorp_batch batch;
1596 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1597 BLORP_BATCH_PREDICATE_ENABLE);
1598
1599 struct blorp_surf surf;
1600 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
1601 image->aux_usage == ISL_AUX_USAGE_CCS_E ?
1602 ISL_AUX_USAGE_CCS_E : ISL_AUX_USAGE_CCS_D,
1603 &surf);
1604
1605 blorp_ccs_resolve_attachment(&batch, binding_table, &surf, level,
1606 layer_count, image->color_surface.isl.format,
1607 op);
1608
1609 blorp_batch_finish(&batch);
1610 }