anv/cnl: Generate and use gen10 functions
[mesa.git] / src / intel / vulkan / anv_blorp.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
27 lookup_blorp_shader(struct blorp_context *blorp,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct anv_device *device = blorp->driver_ctx;
32
33 /* The blorp cache must be a real cache */
34 assert(device->blorp_shader_cache.cache);
35
36 struct anv_shader_bin *bin =
37 anv_pipeline_cache_search(&device->blorp_shader_cache, key, key_size);
38 if (!bin)
39 return false;
40
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
43 */
44 anv_shader_bin_unref(device, bin);
45
46 *kernel_out = bin->kernel.offset;
47 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
48
49 return true;
50 }
51
52 static bool
53 upload_blorp_shader(struct blorp_context *blorp,
54 const void *key, uint32_t key_size,
55 const void *kernel, uint32_t kernel_size,
56 const struct brw_stage_prog_data *prog_data,
57 uint32_t prog_data_size,
58 uint32_t *kernel_out, void *prog_data_out)
59 {
60 struct anv_device *device = blorp->driver_ctx;
61
62 /* The blorp cache must be a real cache */
63 assert(device->blorp_shader_cache.cache);
64
65 struct anv_pipeline_bind_map bind_map = {
66 .surface_count = 0,
67 .sampler_count = 0,
68 };
69
70 struct anv_shader_bin *bin =
71 anv_pipeline_cache_upload_kernel(&device->blorp_shader_cache,
72 key, key_size, kernel, kernel_size,
73 prog_data, prog_data_size, &bind_map);
74
75 if (!bin)
76 return false;
77
78 /* The cache already has a reference and it's not going anywhere so there
79 * is no need to hold a second reference.
80 */
81 anv_shader_bin_unref(device, bin);
82
83 *kernel_out = bin->kernel.offset;
84 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
85
86 return true;
87 }
88
89 void
90 anv_device_init_blorp(struct anv_device *device)
91 {
92 anv_pipeline_cache_init(&device->blorp_shader_cache, device, true);
93 blorp_init(&device->blorp, device, &device->isl_dev);
94 device->blorp.compiler = device->instance->physicalDevice.compiler;
95 device->blorp.mocs.tex = device->default_mocs;
96 device->blorp.mocs.rb = device->default_mocs;
97 device->blorp.mocs.vb = device->default_mocs;
98 device->blorp.lookup_shader = lookup_blorp_shader;
99 device->blorp.upload_shader = upload_blorp_shader;
100 switch (device->info.gen) {
101 case 7:
102 if (device->info.is_haswell) {
103 device->blorp.exec = gen75_blorp_exec;
104 } else {
105 device->blorp.exec = gen7_blorp_exec;
106 }
107 break;
108 case 8:
109 device->blorp.exec = gen8_blorp_exec;
110 break;
111 case 9:
112 device->blorp.exec = gen9_blorp_exec;
113 break;
114 case 10:
115 device->blorp.exec = gen10_blorp_exec;
116 break;
117 default:
118 unreachable("Unknown hardware generation");
119 }
120 }
121
122 void
123 anv_device_finish_blorp(struct anv_device *device)
124 {
125 blorp_finish(&device->blorp);
126 anv_pipeline_cache_finish(&device->blorp_shader_cache);
127 }
128
129 static void
130 get_blorp_surf_for_anv_buffer(struct anv_device *device,
131 struct anv_buffer *buffer, uint64_t offset,
132 uint32_t width, uint32_t height,
133 uint32_t row_pitch, enum isl_format format,
134 struct blorp_surf *blorp_surf,
135 struct isl_surf *isl_surf)
136 {
137 const struct isl_format_layout *fmtl =
138 isl_format_get_layout(format);
139 bool ok UNUSED;
140
141 /* ASTC is the only format which doesn't support linear layouts.
142 * Create an equivalently sized surface with ISL to get around this.
143 */
144 if (fmtl->txc == ISL_TXC_ASTC) {
145 /* Use an equivalently sized format */
146 format = ISL_FORMAT_R32G32B32A32_UINT;
147 assert(fmtl->bpb == isl_format_get_layout(format)->bpb);
148
149 /* Shrink the dimensions for the new format */
150 width = DIV_ROUND_UP(width, fmtl->bw);
151 height = DIV_ROUND_UP(height, fmtl->bh);
152 }
153
154 *blorp_surf = (struct blorp_surf) {
155 .surf = isl_surf,
156 .addr = {
157 .buffer = buffer->bo,
158 .offset = buffer->offset + offset,
159 },
160 };
161
162 ok = isl_surf_init(&device->isl_dev, isl_surf,
163 .dim = ISL_SURF_DIM_2D,
164 .format = format,
165 .width = width,
166 .height = height,
167 .depth = 1,
168 .levels = 1,
169 .array_len = 1,
170 .samples = 1,
171 .row_pitch = row_pitch,
172 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
173 ISL_SURF_USAGE_RENDER_TARGET_BIT,
174 .tiling_flags = ISL_TILING_LINEAR_BIT);
175 assert(ok);
176 }
177
178 static void
179 get_blorp_surf_for_anv_image(const struct anv_image *image,
180 VkImageAspectFlags aspect,
181 enum isl_aux_usage aux_usage,
182 struct blorp_surf *blorp_surf)
183 {
184 if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT ||
185 aux_usage == ISL_AUX_USAGE_HIZ)
186 aux_usage = ISL_AUX_USAGE_NONE;
187
188 const struct anv_surface *surface =
189 anv_image_get_surface_for_aspect_mask(image, aspect);
190
191 *blorp_surf = (struct blorp_surf) {
192 .surf = &surface->isl,
193 .addr = {
194 .buffer = image->bo,
195 .offset = image->offset + surface->offset,
196 },
197 };
198
199 if (aux_usage != ISL_AUX_USAGE_NONE) {
200 blorp_surf->aux_surf = &image->aux_surface.isl,
201 blorp_surf->aux_addr = (struct blorp_address) {
202 .buffer = image->bo,
203 .offset = image->offset + image->aux_surface.offset,
204 };
205 blorp_surf->aux_usage = aux_usage;
206 }
207 }
208
209 void anv_CmdCopyImage(
210 VkCommandBuffer commandBuffer,
211 VkImage srcImage,
212 VkImageLayout srcImageLayout,
213 VkImage dstImage,
214 VkImageLayout dstImageLayout,
215 uint32_t regionCount,
216 const VkImageCopy* pRegions)
217 {
218 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
219 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
220 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
221
222 struct blorp_batch batch;
223 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
224
225 for (unsigned r = 0; r < regionCount; r++) {
226 VkOffset3D srcOffset =
227 anv_sanitize_image_offset(src_image->type, pRegions[r].srcOffset);
228 VkOffset3D dstOffset =
229 anv_sanitize_image_offset(dst_image->type, pRegions[r].dstOffset);
230 VkExtent3D extent =
231 anv_sanitize_image_extent(src_image->type, pRegions[r].extent);
232
233 unsigned dst_base_layer, layer_count;
234 if (dst_image->type == VK_IMAGE_TYPE_3D) {
235 dst_base_layer = pRegions[r].dstOffset.z;
236 layer_count = pRegions[r].extent.depth;
237 } else {
238 dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer;
239 layer_count =
240 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
241 }
242
243 unsigned src_base_layer;
244 if (src_image->type == VK_IMAGE_TYPE_3D) {
245 src_base_layer = pRegions[r].srcOffset.z;
246 } else {
247 src_base_layer = pRegions[r].srcSubresource.baseArrayLayer;
248 assert(layer_count ==
249 anv_get_layerCount(src_image, &pRegions[r].srcSubresource));
250 }
251
252 assert(pRegions[r].srcSubresource.aspectMask ==
253 pRegions[r].dstSubresource.aspectMask);
254
255 uint32_t a;
256 for_each_bit(a, pRegions[r].dstSubresource.aspectMask) {
257 VkImageAspectFlagBits aspect = (1 << a);
258
259 struct blorp_surf src_surf, dst_surf;
260 get_blorp_surf_for_anv_image(src_image, aspect, src_image->aux_usage,
261 &src_surf);
262 get_blorp_surf_for_anv_image(dst_image, aspect, dst_image->aux_usage,
263 &dst_surf);
264
265 for (unsigned i = 0; i < layer_count; i++) {
266 blorp_copy(&batch, &src_surf, pRegions[r].srcSubresource.mipLevel,
267 src_base_layer + i,
268 &dst_surf, pRegions[r].dstSubresource.mipLevel,
269 dst_base_layer + i,
270 srcOffset.x, srcOffset.y,
271 dstOffset.x, dstOffset.y,
272 extent.width, extent.height);
273 }
274 }
275 }
276
277 blorp_batch_finish(&batch);
278 }
279
280 static void
281 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
282 struct anv_buffer *anv_buffer,
283 struct anv_image *anv_image,
284 uint32_t regionCount,
285 const VkBufferImageCopy* pRegions,
286 bool buffer_to_image)
287 {
288 struct blorp_batch batch;
289 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
290
291 struct {
292 struct blorp_surf surf;
293 uint32_t level;
294 VkOffset3D offset;
295 } image, buffer, *src, *dst;
296
297 buffer.level = 0;
298 buffer.offset = (VkOffset3D) { 0, 0, 0 };
299
300 if (buffer_to_image) {
301 src = &buffer;
302 dst = &image;
303 } else {
304 src = &image;
305 dst = &buffer;
306 }
307
308 for (unsigned r = 0; r < regionCount; r++) {
309 const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
310
311 get_blorp_surf_for_anv_image(anv_image, aspect, anv_image->aux_usage,
312 &image.surf);
313 image.offset =
314 anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset);
315 image.level = pRegions[r].imageSubresource.mipLevel;
316
317 VkExtent3D extent =
318 anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent);
319 if (anv_image->type != VK_IMAGE_TYPE_3D) {
320 image.offset.z = pRegions[r].imageSubresource.baseArrayLayer;
321 extent.depth =
322 anv_get_layerCount(anv_image, &pRegions[r].imageSubresource);
323 }
324
325 const enum isl_format buffer_format =
326 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format,
327 aspect, VK_IMAGE_TILING_LINEAR);
328
329 const VkExtent3D bufferImageExtent = {
330 .width = pRegions[r].bufferRowLength ?
331 pRegions[r].bufferRowLength : extent.width,
332 .height = pRegions[r].bufferImageHeight ?
333 pRegions[r].bufferImageHeight : extent.height,
334 };
335
336 const struct isl_format_layout *buffer_fmtl =
337 isl_format_get_layout(buffer_format);
338
339 const uint32_t buffer_row_pitch =
340 DIV_ROUND_UP(bufferImageExtent.width, buffer_fmtl->bw) *
341 (buffer_fmtl->bpb / 8);
342
343 const uint32_t buffer_layer_stride =
344 DIV_ROUND_UP(bufferImageExtent.height, buffer_fmtl->bh) *
345 buffer_row_pitch;
346
347 struct isl_surf buffer_isl_surf;
348 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
349 anv_buffer, pRegions[r].bufferOffset,
350 extent.width, extent.height,
351 buffer_row_pitch, buffer_format,
352 &buffer.surf, &buffer_isl_surf);
353
354 for (unsigned z = 0; z < extent.depth; z++) {
355 blorp_copy(&batch, &src->surf, src->level, src->offset.z,
356 &dst->surf, dst->level, dst->offset.z,
357 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
358 extent.width, extent.height);
359
360 image.offset.z++;
361 buffer.surf.addr.offset += buffer_layer_stride;
362 }
363 }
364
365 blorp_batch_finish(&batch);
366 }
367
368 void anv_CmdCopyBufferToImage(
369 VkCommandBuffer commandBuffer,
370 VkBuffer srcBuffer,
371 VkImage dstImage,
372 VkImageLayout dstImageLayout,
373 uint32_t regionCount,
374 const VkBufferImageCopy* pRegions)
375 {
376 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
377 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
378 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
379
380 copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
381 regionCount, pRegions, true);
382 }
383
384 void anv_CmdCopyImageToBuffer(
385 VkCommandBuffer commandBuffer,
386 VkImage srcImage,
387 VkImageLayout srcImageLayout,
388 VkBuffer dstBuffer,
389 uint32_t regionCount,
390 const VkBufferImageCopy* pRegions)
391 {
392 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
393 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
394 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
395
396 copy_buffer_to_image(cmd_buffer, dst_buffer, src_image,
397 regionCount, pRegions, false);
398 }
399
400 static bool
401 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
402 {
403 bool flip = false;
404 if (*src0 > *src1) {
405 unsigned tmp = *src0;
406 *src0 = *src1;
407 *src1 = tmp;
408 flip = !flip;
409 }
410
411 if (*dst0 > *dst1) {
412 unsigned tmp = *dst0;
413 *dst0 = *dst1;
414 *dst1 = tmp;
415 flip = !flip;
416 }
417
418 return flip;
419 }
420
421 void anv_CmdBlitImage(
422 VkCommandBuffer commandBuffer,
423 VkImage srcImage,
424 VkImageLayout srcImageLayout,
425 VkImage dstImage,
426 VkImageLayout dstImageLayout,
427 uint32_t regionCount,
428 const VkImageBlit* pRegions,
429 VkFilter filter)
430
431 {
432 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
433 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
434 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
435
436 struct blorp_surf src, dst;
437
438 uint32_t gl_filter;
439 switch (filter) {
440 case VK_FILTER_NEAREST:
441 gl_filter = 0x2600; /* GL_NEAREST */
442 break;
443 case VK_FILTER_LINEAR:
444 gl_filter = 0x2601; /* GL_LINEAR */
445 break;
446 default:
447 unreachable("Invalid filter");
448 }
449
450 struct blorp_batch batch;
451 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
452
453 for (unsigned r = 0; r < regionCount; r++) {
454 const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
455 const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
456
457 get_blorp_surf_for_anv_image(src_image, src_res->aspectMask,
458 src_image->aux_usage, &src);
459 get_blorp_surf_for_anv_image(dst_image, dst_res->aspectMask,
460 dst_image->aux_usage, &dst);
461
462 struct anv_format src_format =
463 anv_get_format(&cmd_buffer->device->info, src_image->vk_format,
464 src_res->aspectMask, src_image->tiling);
465 struct anv_format dst_format =
466 anv_get_format(&cmd_buffer->device->info, dst_image->vk_format,
467 dst_res->aspectMask, dst_image->tiling);
468
469 unsigned dst_start, dst_end;
470 if (dst_image->type == VK_IMAGE_TYPE_3D) {
471 assert(dst_res->baseArrayLayer == 0);
472 dst_start = pRegions[r].dstOffsets[0].z;
473 dst_end = pRegions[r].dstOffsets[1].z;
474 } else {
475 dst_start = dst_res->baseArrayLayer;
476 dst_end = dst_start + anv_get_layerCount(dst_image, dst_res);
477 }
478
479 unsigned src_start, src_end;
480 if (src_image->type == VK_IMAGE_TYPE_3D) {
481 assert(src_res->baseArrayLayer == 0);
482 src_start = pRegions[r].srcOffsets[0].z;
483 src_end = pRegions[r].srcOffsets[1].z;
484 } else {
485 src_start = src_res->baseArrayLayer;
486 src_end = src_start + anv_get_layerCount(src_image, src_res);
487 }
488
489 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
490 float src_z_step = (float)(src_end + 1 - src_start) /
491 (float)(dst_end + 1 - dst_start);
492
493 if (flip_z) {
494 src_start = src_end;
495 src_z_step *= -1;
496 }
497
498 unsigned src_x0 = pRegions[r].srcOffsets[0].x;
499 unsigned src_x1 = pRegions[r].srcOffsets[1].x;
500 unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
501 unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
502 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
503
504 unsigned src_y0 = pRegions[r].srcOffsets[0].y;
505 unsigned src_y1 = pRegions[r].srcOffsets[1].y;
506 unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
507 unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
508 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
509
510 const unsigned num_layers = dst_end - dst_start;
511 for (unsigned i = 0; i < num_layers; i++) {
512 unsigned dst_z = dst_start + i;
513 unsigned src_z = src_start + i * src_z_step;
514
515 blorp_blit(&batch, &src, src_res->mipLevel, src_z,
516 src_format.isl_format, src_format.swizzle,
517 &dst, dst_res->mipLevel, dst_z,
518 dst_format.isl_format,
519 anv_swizzle_for_render(dst_format.swizzle),
520 src_x0, src_y0, src_x1, src_y1,
521 dst_x0, dst_y0, dst_x1, dst_y1,
522 gl_filter, flip_x, flip_y);
523 }
524
525 }
526
527 blorp_batch_finish(&batch);
528 }
529
530 static enum isl_format
531 isl_format_for_size(unsigned size_B)
532 {
533 switch (size_B) {
534 case 1: return ISL_FORMAT_R8_UINT;
535 case 2: return ISL_FORMAT_R8G8_UINT;
536 case 4: return ISL_FORMAT_R8G8B8A8_UINT;
537 case 8: return ISL_FORMAT_R16G16B16A16_UINT;
538 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
539 default:
540 unreachable("Not a power-of-two format size");
541 }
542 }
543
544 static void
545 do_buffer_copy(struct blorp_batch *batch,
546 struct anv_bo *src, uint64_t src_offset,
547 struct anv_bo *dst, uint64_t dst_offset,
548 int width, int height, int block_size)
549 {
550 struct anv_device *device = batch->blorp->driver_ctx;
551
552 /* The actual format we pick doesn't matter as blorp will throw it away.
553 * The only thing that actually matters is the size.
554 */
555 enum isl_format format = isl_format_for_size(block_size);
556
557 struct isl_surf surf;
558 isl_surf_init(&device->isl_dev, &surf,
559 .dim = ISL_SURF_DIM_2D,
560 .format = format,
561 .width = width,
562 .height = height,
563 .depth = 1,
564 .levels = 1,
565 .array_len = 1,
566 .samples = 1,
567 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
568 ISL_SURF_USAGE_RENDER_TARGET_BIT,
569 .tiling_flags = ISL_TILING_LINEAR_BIT);
570 assert(surf.row_pitch == width * block_size);
571
572 struct blorp_surf src_blorp_surf = {
573 .surf = &surf,
574 .addr = {
575 .buffer = src,
576 .offset = src_offset,
577 },
578 };
579
580 struct blorp_surf dst_blorp_surf = {
581 .surf = &surf,
582 .addr = {
583 .buffer = dst,
584 .offset = dst_offset,
585 },
586 };
587
588 blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,
589 0, 0, 0, 0, width, height);
590 }
591
592 /**
593 * Returns the greatest common divisor of a and b that is a power of two.
594 */
595 static inline uint64_t
596 gcd_pow2_u64(uint64_t a, uint64_t b)
597 {
598 assert(a > 0 || b > 0);
599
600 unsigned a_log2 = ffsll(a) - 1;
601 unsigned b_log2 = ffsll(b) - 1;
602
603 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
604 * case, the MIN2() will take the other one. If both are 0 then we will
605 * hit the assert above.
606 */
607 return 1 << MIN2(a_log2, b_log2);
608 }
609
610 /* This is maximum possible width/height our HW can handle */
611 #define MAX_SURFACE_DIM (1ull << 14)
612
613 void anv_CmdCopyBuffer(
614 VkCommandBuffer commandBuffer,
615 VkBuffer srcBuffer,
616 VkBuffer dstBuffer,
617 uint32_t regionCount,
618 const VkBufferCopy* pRegions)
619 {
620 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
621 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
622 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
623
624 struct blorp_batch batch;
625 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
626
627 for (unsigned r = 0; r < regionCount; r++) {
628 uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
629 uint64_t dst_offset = dst_buffer->offset + pRegions[r].dstOffset;
630 uint64_t copy_size = pRegions[r].size;
631
632 /* First, we compute the biggest format that can be used with the
633 * given offsets and size.
634 */
635 int bs = 16;
636 bs = gcd_pow2_u64(bs, src_offset);
637 bs = gcd_pow2_u64(bs, dst_offset);
638 bs = gcd_pow2_u64(bs, pRegions[r].size);
639
640 /* First, we make a bunch of max-sized copies */
641 uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
642 while (copy_size >= max_copy_size) {
643 do_buffer_copy(&batch, src_buffer->bo, src_offset,
644 dst_buffer->bo, dst_offset,
645 MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);
646 copy_size -= max_copy_size;
647 src_offset += max_copy_size;
648 dst_offset += max_copy_size;
649 }
650
651 /* Now make a max-width copy */
652 uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);
653 assert(height < MAX_SURFACE_DIM);
654 if (height != 0) {
655 uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs;
656 do_buffer_copy(&batch, src_buffer->bo, src_offset,
657 dst_buffer->bo, dst_offset,
658 MAX_SURFACE_DIM, height, bs);
659 copy_size -= rect_copy_size;
660 src_offset += rect_copy_size;
661 dst_offset += rect_copy_size;
662 }
663
664 /* Finally, make a small copy to finish it off */
665 if (copy_size != 0) {
666 do_buffer_copy(&batch, src_buffer->bo, src_offset,
667 dst_buffer->bo, dst_offset,
668 copy_size / bs, 1, bs);
669 }
670 }
671
672 blorp_batch_finish(&batch);
673 }
674
675 void anv_CmdUpdateBuffer(
676 VkCommandBuffer commandBuffer,
677 VkBuffer dstBuffer,
678 VkDeviceSize dstOffset,
679 VkDeviceSize dataSize,
680 const void* pData)
681 {
682 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
683 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
684
685 struct blorp_batch batch;
686 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
687
688 /* We can't quite grab a full block because the state stream needs a
689 * little data at the top to build its linked list.
690 */
691 const uint32_t max_update_size =
692 cmd_buffer->device->dynamic_state_pool.block_size - 64;
693
694 assert(max_update_size < MAX_SURFACE_DIM * 4);
695
696 /* We're about to read data that was written from the CPU. Flush the
697 * texture cache so we don't get anything stale.
698 */
699 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
700
701 while (dataSize) {
702 const uint32_t copy_size = MIN2(dataSize, max_update_size);
703
704 struct anv_state tmp_data =
705 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
706
707 memcpy(tmp_data.map, pData, copy_size);
708
709 anv_state_flush(cmd_buffer->device, tmp_data);
710
711 int bs = 16;
712 bs = gcd_pow2_u64(bs, dstOffset);
713 bs = gcd_pow2_u64(bs, copy_size);
714
715 do_buffer_copy(&batch,
716 &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
717 tmp_data.offset,
718 dst_buffer->bo, dst_buffer->offset + dstOffset,
719 copy_size / bs, 1, bs);
720
721 dataSize -= copy_size;
722 dstOffset += copy_size;
723 pData = (void *)pData + copy_size;
724 }
725
726 blorp_batch_finish(&batch);
727 }
728
729 void anv_CmdFillBuffer(
730 VkCommandBuffer commandBuffer,
731 VkBuffer dstBuffer,
732 VkDeviceSize dstOffset,
733 VkDeviceSize fillSize,
734 uint32_t data)
735 {
736 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
737 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
738 struct blorp_surf surf;
739 struct isl_surf isl_surf;
740
741 struct blorp_batch batch;
742 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
743
744 fillSize = anv_buffer_get_range(dst_buffer, dstOffset, fillSize);
745
746 /* From the Vulkan spec:
747 *
748 * "size is the number of bytes to fill, and must be either a multiple
749 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
750 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
751 * buffer is not a multiple of 4, then the nearest smaller multiple is
752 * used."
753 */
754 fillSize &= ~3ull;
755
756 /* First, we compute the biggest format that can be used with the
757 * given offsets and size.
758 */
759 int bs = 16;
760 bs = gcd_pow2_u64(bs, dstOffset);
761 bs = gcd_pow2_u64(bs, fillSize);
762 enum isl_format isl_format = isl_format_for_size(bs);
763
764 union isl_color_value color = {
765 .u32 = { data, data, data, data },
766 };
767
768 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
769 while (fillSize >= max_fill_size) {
770 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
771 dst_buffer, dstOffset,
772 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
773 MAX_SURFACE_DIM * bs, isl_format,
774 &surf, &isl_surf);
775
776 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
777 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
778 color, NULL);
779 fillSize -= max_fill_size;
780 dstOffset += max_fill_size;
781 }
782
783 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
784 assert(height < MAX_SURFACE_DIM);
785 if (height != 0) {
786 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
787 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
788 dst_buffer, dstOffset,
789 MAX_SURFACE_DIM, height,
790 MAX_SURFACE_DIM * bs, isl_format,
791 &surf, &isl_surf);
792
793 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
794 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
795 color, NULL);
796 fillSize -= rect_fill_size;
797 dstOffset += rect_fill_size;
798 }
799
800 if (fillSize != 0) {
801 const uint32_t width = fillSize / bs;
802 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
803 dst_buffer, dstOffset,
804 width, 1,
805 width * bs, isl_format,
806 &surf, &isl_surf);
807
808 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
809 0, 0, 1, 0, 0, width, 1,
810 color, NULL);
811 }
812
813 blorp_batch_finish(&batch);
814 }
815
816 void anv_CmdClearColorImage(
817 VkCommandBuffer commandBuffer,
818 VkImage _image,
819 VkImageLayout imageLayout,
820 const VkClearColorValue* pColor,
821 uint32_t rangeCount,
822 const VkImageSubresourceRange* pRanges)
823 {
824 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
825 ANV_FROM_HANDLE(anv_image, image, _image);
826
827 static const bool color_write_disable[4] = { false, false, false, false };
828
829 struct blorp_batch batch;
830 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
831
832 struct blorp_surf surf;
833 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
834 image->aux_usage, &surf);
835
836 for (unsigned r = 0; r < rangeCount; r++) {
837 if (pRanges[r].aspectMask == 0)
838 continue;
839
840 assert(pRanges[r].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
841
842 struct anv_format src_format =
843 anv_get_format(&cmd_buffer->device->info, image->vk_format,
844 VK_IMAGE_ASPECT_COLOR_BIT, image->tiling);
845
846 unsigned base_layer = pRanges[r].baseArrayLayer;
847 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]);
848
849 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
850 const unsigned level = pRanges[r].baseMipLevel + i;
851 const unsigned level_width = anv_minify(image->extent.width, level);
852 const unsigned level_height = anv_minify(image->extent.height, level);
853
854 if (image->type == VK_IMAGE_TYPE_3D) {
855 base_layer = 0;
856 layer_count = anv_minify(image->extent.depth, level);
857 }
858
859 blorp_clear(&batch, &surf,
860 src_format.isl_format, src_format.swizzle,
861 level, base_layer, layer_count,
862 0, 0, level_width, level_height,
863 vk_to_isl_color(*pColor), color_write_disable);
864 }
865 }
866
867 blorp_batch_finish(&batch);
868 }
869
870 void anv_CmdClearDepthStencilImage(
871 VkCommandBuffer commandBuffer,
872 VkImage image_h,
873 VkImageLayout imageLayout,
874 const VkClearDepthStencilValue* pDepthStencil,
875 uint32_t rangeCount,
876 const VkImageSubresourceRange* pRanges)
877 {
878 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
879 ANV_FROM_HANDLE(anv_image, image, image_h);
880
881 struct blorp_batch batch;
882 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
883
884 struct blorp_surf depth, stencil;
885 if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
886 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
887 ISL_AUX_USAGE_NONE, &depth);
888 } else {
889 memset(&depth, 0, sizeof(depth));
890 }
891
892 if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
893 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
894 ISL_AUX_USAGE_NONE, &stencil);
895 } else {
896 memset(&stencil, 0, sizeof(stencil));
897 }
898
899 for (unsigned r = 0; r < rangeCount; r++) {
900 if (pRanges[r].aspectMask == 0)
901 continue;
902
903 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
904 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
905
906 unsigned base_layer = pRanges[r].baseArrayLayer;
907 unsigned layer_count = anv_get_layerCount(image, &pRanges[r]);
908
909 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
910 const unsigned level = pRanges[r].baseMipLevel + i;
911 const unsigned level_width = anv_minify(image->extent.width, level);
912 const unsigned level_height = anv_minify(image->extent.height, level);
913
914 if (image->type == VK_IMAGE_TYPE_3D)
915 layer_count = anv_minify(image->extent.depth, level);
916
917 blorp_clear_depth_stencil(&batch, &depth, &stencil,
918 level, base_layer, layer_count,
919 0, 0, level_width, level_height,
920 clear_depth, pDepthStencil->depth,
921 clear_stencil ? 0xff : 0,
922 pDepthStencil->stencil);
923 }
924 }
925
926 blorp_batch_finish(&batch);
927 }
928
929 VkResult
930 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
931 uint32_t num_entries,
932 uint32_t *state_offset,
933 struct anv_state *bt_state)
934 {
935 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
936 state_offset);
937 if (bt_state->map == NULL) {
938 /* We ran out of space. Grab a new binding table block. */
939 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
940 if (result != VK_SUCCESS)
941 return result;
942
943 /* Re-emit state base addresses so we get the new surface state base
944 * address before we start emitting binding tables etc.
945 */
946 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
947
948 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
949 state_offset);
950 assert(bt_state->map != NULL);
951 }
952
953 return VK_SUCCESS;
954 }
955
956 static VkResult
957 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
958 struct anv_state surface_state,
959 uint32_t *bt_offset)
960 {
961 uint32_t state_offset;
962 struct anv_state bt_state;
963
964 VkResult result =
965 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
966 &bt_state);
967 if (result != VK_SUCCESS)
968 return result;
969
970 uint32_t *bt_map = bt_state.map;
971 bt_map[0] = surface_state.offset + state_offset;
972
973 *bt_offset = bt_state.offset;
974 return VK_SUCCESS;
975 }
976
977 static void
978 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
979 struct blorp_batch *batch,
980 const VkClearAttachment *attachment,
981 uint32_t rectCount, const VkClearRect *pRects)
982 {
983 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
984 const uint32_t color_att = attachment->colorAttachment;
985 const uint32_t att_idx = subpass->color_attachments[color_att].attachment;
986
987 if (att_idx == VK_ATTACHMENT_UNUSED)
988 return;
989
990 struct anv_render_pass_attachment *pass_att =
991 &cmd_buffer->state.pass->attachments[att_idx];
992 struct anv_attachment_state *att_state =
993 &cmd_buffer->state.attachments[att_idx];
994
995 uint32_t binding_table;
996 VkResult result =
997 binding_table_for_surface_state(cmd_buffer, att_state->color_rt_state,
998 &binding_table);
999 if (result != VK_SUCCESS)
1000 return;
1001
1002 union isl_color_value clear_color =
1003 vk_to_isl_color(attachment->clearValue.color);
1004
1005 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1006 if (subpass->view_mask) {
1007 uint32_t view_idx;
1008 for_each_bit(view_idx, subpass->view_mask) {
1009 for (uint32_t r = 0; r < rectCount; ++r) {
1010 const VkOffset2D offset = pRects[r].rect.offset;
1011 const VkExtent2D extent = pRects[r].rect.extent;
1012 blorp_clear_attachments(batch, binding_table,
1013 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
1014 view_idx, 1,
1015 offset.x, offset.y,
1016 offset.x + extent.width,
1017 offset.y + extent.height,
1018 true, clear_color, false, 0.0f, 0, 0);
1019 }
1020 }
1021 return;
1022 }
1023
1024 for (uint32_t r = 0; r < rectCount; ++r) {
1025 const VkOffset2D offset = pRects[r].rect.offset;
1026 const VkExtent2D extent = pRects[r].rect.extent;
1027 blorp_clear_attachments(batch, binding_table,
1028 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
1029 pRects[r].baseArrayLayer,
1030 pRects[r].layerCount,
1031 offset.x, offset.y,
1032 offset.x + extent.width, offset.y + extent.height,
1033 true, clear_color, false, 0.0f, 0, 0);
1034 }
1035 }
1036
1037 static void
1038 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1039 struct blorp_batch *batch,
1040 const VkClearAttachment *attachment,
1041 uint32_t rectCount, const VkClearRect *pRects)
1042 {
1043 static const union isl_color_value color_value = { .u32 = { 0, } };
1044 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
1045 const uint32_t att_idx = subpass->depth_stencil_attachment.attachment;
1046
1047 if (att_idx == VK_ATTACHMENT_UNUSED)
1048 return;
1049
1050 struct anv_render_pass_attachment *pass_att =
1051 &cmd_buffer->state.pass->attachments[att_idx];
1052
1053 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1054 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1055
1056 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1057 if (clear_depth) {
1058 depth_format = anv_get_isl_format(&cmd_buffer->device->info,
1059 pass_att->format,
1060 VK_IMAGE_ASPECT_DEPTH_BIT,
1061 VK_IMAGE_TILING_OPTIMAL);
1062 }
1063
1064 uint32_t binding_table;
1065 VkResult result =
1066 binding_table_for_surface_state(cmd_buffer,
1067 cmd_buffer->state.null_surface_state,
1068 &binding_table);
1069 if (result != VK_SUCCESS)
1070 return;
1071
1072 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1073 if (subpass->view_mask) {
1074 uint32_t view_idx;
1075 for_each_bit(view_idx, subpass->view_mask) {
1076 for (uint32_t r = 0; r < rectCount; ++r) {
1077 const VkOffset2D offset = pRects[r].rect.offset;
1078 const VkExtent2D extent = pRects[r].rect.extent;
1079 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1080 blorp_clear_attachments(batch, binding_table,
1081 depth_format, pass_att->samples,
1082 view_idx, 1,
1083 offset.x, offset.y,
1084 offset.x + extent.width,
1085 offset.y + extent.height,
1086 false, color_value,
1087 clear_depth, value.depth,
1088 clear_stencil ? 0xff : 0, value.stencil);
1089 }
1090 }
1091 return;
1092 }
1093
1094 for (uint32_t r = 0; r < rectCount; ++r) {
1095 const VkOffset2D offset = pRects[r].rect.offset;
1096 const VkExtent2D extent = pRects[r].rect.extent;
1097 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1098 blorp_clear_attachments(batch, binding_table,
1099 depth_format, pass_att->samples,
1100 pRects[r].baseArrayLayer,
1101 pRects[r].layerCount,
1102 offset.x, offset.y,
1103 offset.x + extent.width, offset.y + extent.height,
1104 false, color_value,
1105 clear_depth, value.depth,
1106 clear_stencil ? 0xff : 0, value.stencil);
1107 }
1108 }
1109
1110 void anv_CmdClearAttachments(
1111 VkCommandBuffer commandBuffer,
1112 uint32_t attachmentCount,
1113 const VkClearAttachment* pAttachments,
1114 uint32_t rectCount,
1115 const VkClearRect* pRects)
1116 {
1117 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1118
1119 /* Because this gets called within a render pass, we tell blorp not to
1120 * trash our depth and stencil buffers.
1121 */
1122 struct blorp_batch batch;
1123 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1124 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1125
1126 for (uint32_t a = 0; a < attachmentCount; ++a) {
1127 if (pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1128 clear_color_attachment(cmd_buffer, &batch,
1129 &pAttachments[a],
1130 rectCount, pRects);
1131 } else {
1132 clear_depth_stencil_attachment(cmd_buffer, &batch,
1133 &pAttachments[a],
1134 rectCount, pRects);
1135 }
1136 }
1137
1138 blorp_batch_finish(&batch);
1139 }
1140
1141 enum subpass_stage {
1142 SUBPASS_STAGE_LOAD,
1143 SUBPASS_STAGE_DRAW,
1144 SUBPASS_STAGE_RESOLVE,
1145 };
1146
1147 static bool
1148 subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
1149 {
1150 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
1151 uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
1152
1153 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1154 uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
1155 if (a == VK_ATTACHMENT_UNUSED)
1156 continue;
1157
1158 assert(a < cmd_state->pass->attachment_count);
1159 if (cmd_state->attachments[a].pending_clear_aspects) {
1160 return true;
1161 }
1162 }
1163
1164 if (ds != VK_ATTACHMENT_UNUSED) {
1165 assert(ds < cmd_state->pass->attachment_count);
1166 if (cmd_state->attachments[ds].pending_clear_aspects)
1167 return true;
1168 }
1169
1170 return false;
1171 }
1172
1173 void
1174 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
1175 {
1176 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
1177 const VkRect2D render_area = cmd_buffer->state.render_area;
1178
1179
1180 if (!subpass_needs_clear(cmd_buffer))
1181 return;
1182
1183 /* Because this gets called within a render pass, we tell blorp not to
1184 * trash our depth and stencil buffers.
1185 */
1186 struct blorp_batch batch;
1187 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1188 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1189
1190 VkClearRect clear_rect = {
1191 .rect = cmd_buffer->state.render_area,
1192 .baseArrayLayer = 0,
1193 .layerCount = cmd_buffer->state.framebuffer->layers,
1194 };
1195
1196 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1197 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1198 const uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
1199 if (a == VK_ATTACHMENT_UNUSED)
1200 continue;
1201
1202 assert(a < cmd_state->pass->attachment_count);
1203 struct anv_attachment_state *att_state = &cmd_state->attachments[a];
1204
1205 if (!att_state->pending_clear_aspects)
1206 continue;
1207
1208 assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1209
1210 struct anv_image_view *iview = fb->attachments[a];
1211 const struct anv_image *image = iview->image;
1212 struct blorp_surf surf;
1213 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
1214 att_state->aux_usage, &surf);
1215
1216 if (att_state->fast_clear) {
1217 surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
1218
1219 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1220 *
1221 * "After Render target fast clear, pipe-control with color cache
1222 * write-flush must be issued before sending any DRAW commands on
1223 * that render target."
1224 *
1225 * This comment is a bit cryptic and doesn't really tell you what's
1226 * going or what's really needed. It appears that fast clear ops are
1227 * not properly synchronized with other drawing. This means that we
1228 * cannot have a fast clear operation in the pipe at the same time as
1229 * other regular drawing operations. We need to use a PIPE_CONTROL
1230 * to ensure that the contents of the previous draw hit the render
1231 * target before we resolve and then use a second PIPE_CONTROL after
1232 * the resolve to ensure that it is completed before any additional
1233 * drawing occurs.
1234 */
1235 cmd_buffer->state.pending_pipe_bits |=
1236 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1237
1238 blorp_fast_clear(&batch, &surf, iview->isl.format,
1239 iview->isl.base_level,
1240 iview->isl.base_array_layer, fb->layers,
1241 render_area.offset.x, render_area.offset.y,
1242 render_area.offset.x + render_area.extent.width,
1243 render_area.offset.y + render_area.extent.height);
1244
1245 cmd_buffer->state.pending_pipe_bits |=
1246 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1247 } else {
1248 blorp_clear(&batch, &surf, iview->isl.format,
1249 anv_swizzle_for_render(iview->isl.swizzle),
1250 iview->isl.base_level,
1251 iview->isl.base_array_layer, fb->layers,
1252 render_area.offset.x, render_area.offset.y,
1253 render_area.offset.x + render_area.extent.width,
1254 render_area.offset.y + render_area.extent.height,
1255 vk_to_isl_color(att_state->clear_value.color), NULL);
1256 }
1257
1258 att_state->pending_clear_aspects = 0;
1259 }
1260
1261 const uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
1262 assert(ds == VK_ATTACHMENT_UNUSED || ds < cmd_state->pass->attachment_count);
1263
1264 if (ds != VK_ATTACHMENT_UNUSED &&
1265 cmd_state->attachments[ds].pending_clear_aspects) {
1266
1267 VkClearAttachment clear_att = {
1268 .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
1269 .clearValue = cmd_state->attachments[ds].clear_value,
1270 };
1271
1272
1273 const uint8_t gen = cmd_buffer->device->info.gen;
1274 bool clear_with_hiz = gen >= 8 && cmd_state->attachments[ds].aux_usage ==
1275 ISL_AUX_USAGE_HIZ;
1276 const struct anv_image_view *iview = fb->attachments[ds];
1277
1278 if (clear_with_hiz) {
1279 const bool clear_depth = clear_att.aspectMask &
1280 VK_IMAGE_ASPECT_DEPTH_BIT;
1281 const bool clear_stencil = clear_att.aspectMask &
1282 VK_IMAGE_ASPECT_STENCIL_BIT;
1283
1284 /* Check against restrictions for depth buffer clearing. A great GPU
1285 * performance benefit isn't expected when using the HZ sequence for
1286 * stencil-only clears. Therefore, we don't emit a HZ op sequence for
1287 * a stencil clear in addition to using the BLORP-fallback for depth.
1288 */
1289 if (clear_depth) {
1290 if (!blorp_can_hiz_clear_depth(gen, iview->isl.format,
1291 iview->image->samples,
1292 render_area.offset.x,
1293 render_area.offset.y,
1294 render_area.offset.x +
1295 render_area.extent.width,
1296 render_area.offset.y +
1297 render_area.extent.height)) {
1298 clear_with_hiz = false;
1299 } else if (clear_att.clearValue.depthStencil.depth !=
1300 ANV_HZ_FC_VAL) {
1301 /* Don't enable fast depth clears for any color not equal to
1302 * ANV_HZ_FC_VAL.
1303 */
1304 clear_with_hiz = false;
1305 } else if (gen == 8 &&
1306 anv_can_sample_with_hiz(&cmd_buffer->device->info,
1307 iview->aspect_mask,
1308 iview->image->samples)) {
1309 /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a
1310 * fast-cleared portion of a HiZ buffer. Testing has revealed
1311 * that Gen8 only supports returning 0.0f. Gens prior to gen8 do
1312 * not support this feature at all.
1313 */
1314 clear_with_hiz = false;
1315 }
1316 }
1317
1318 if (clear_with_hiz) {
1319 blorp_gen8_hiz_clear_attachments(&batch, iview->image->samples,
1320 render_area.offset.x,
1321 render_area.offset.y,
1322 render_area.offset.x +
1323 render_area.extent.width,
1324 render_area.offset.y +
1325 render_area.extent.height,
1326 clear_depth, clear_stencil,
1327 clear_att.clearValue.
1328 depthStencil.stencil);
1329
1330 /* From the SKL PRM, Depth Buffer Clear:
1331 *
1332 * Depth Buffer Clear Workaround
1333 * Depth buffer clear pass using any of the methods (WM_STATE,
1334 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
1335 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
1336 * “set” before starting to render. DepthStall and DepthFlush are
1337 * not needed between consecutive depth clear passes nor is it
1338 * required if the depth-clear pass was done with “full_surf_clear”
1339 * bit set in the 3DSTATE_WM_HZ_OP.
1340 */
1341 if (clear_depth) {
1342 cmd_buffer->state.pending_pipe_bits |=
1343 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
1344 }
1345 }
1346 }
1347
1348 if (!clear_with_hiz) {
1349 clear_depth_stencil_attachment(cmd_buffer, &batch,
1350 &clear_att, 1, &clear_rect);
1351 }
1352
1353 cmd_state->attachments[ds].pending_clear_aspects = 0;
1354 }
1355
1356 blorp_batch_finish(&batch);
1357 }
1358
1359 static void
1360 resolve_image(struct blorp_batch *batch,
1361 const struct anv_image *src_image,
1362 uint32_t src_level, uint32_t src_layer,
1363 const struct anv_image *dst_image,
1364 uint32_t dst_level, uint32_t dst_layer,
1365 VkImageAspectFlags aspect_mask,
1366 uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
1367 uint32_t width, uint32_t height)
1368 {
1369 assert(src_image->type == VK_IMAGE_TYPE_2D);
1370 assert(src_image->samples > 1);
1371 assert(dst_image->type == VK_IMAGE_TYPE_2D);
1372 assert(dst_image->samples == 1);
1373
1374 uint32_t a;
1375 for_each_bit(a, aspect_mask) {
1376 VkImageAspectFlagBits aspect = 1 << a;
1377
1378 struct blorp_surf src_surf, dst_surf;
1379 get_blorp_surf_for_anv_image(src_image, aspect,
1380 src_image->aux_usage, &src_surf);
1381 get_blorp_surf_for_anv_image(dst_image, aspect,
1382 dst_image->aux_usage, &dst_surf);
1383
1384 blorp_blit(batch,
1385 &src_surf, src_level, src_layer,
1386 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1387 &dst_surf, dst_level, dst_layer,
1388 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1389 src_x, src_y, src_x + width, src_y + height,
1390 dst_x, dst_y, dst_x + width, dst_y + height,
1391 0x2600 /* GL_NEAREST */, false, false);
1392 }
1393 }
1394
1395 void anv_CmdResolveImage(
1396 VkCommandBuffer commandBuffer,
1397 VkImage srcImage,
1398 VkImageLayout srcImageLayout,
1399 VkImage dstImage,
1400 VkImageLayout dstImageLayout,
1401 uint32_t regionCount,
1402 const VkImageResolve* pRegions)
1403 {
1404 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1405 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
1406 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
1407
1408 struct blorp_batch batch;
1409 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1410
1411 for (uint32_t r = 0; r < regionCount; r++) {
1412 assert(pRegions[r].srcSubresource.aspectMask ==
1413 pRegions[r].dstSubresource.aspectMask);
1414 assert(anv_get_layerCount(src_image, &pRegions[r].srcSubresource) ==
1415 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource));
1416
1417 const uint32_t layer_count =
1418 anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
1419
1420 for (uint32_t layer = 0; layer < layer_count; layer++) {
1421 resolve_image(&batch,
1422 src_image, pRegions[r].srcSubresource.mipLevel,
1423 pRegions[r].srcSubresource.baseArrayLayer + layer,
1424 dst_image, pRegions[r].dstSubresource.mipLevel,
1425 pRegions[r].dstSubresource.baseArrayLayer + layer,
1426 pRegions[r].dstSubresource.aspectMask,
1427 pRegions[r].srcOffset.x, pRegions[r].srcOffset.y,
1428 pRegions[r].dstOffset.x, pRegions[r].dstOffset.y,
1429 pRegions[r].extent.width, pRegions[r].extent.height);
1430 }
1431 }
1432
1433 blorp_batch_finish(&batch);
1434 }
1435
1436 void
1437 anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
1438 const struct anv_image *image,
1439 const struct isl_view *view,
1440 const VkImageSubresourceRange *subresourceRange)
1441 {
1442 assert(image->type == VK_IMAGE_TYPE_3D || image->extent.depth == 1);
1443
1444 struct blorp_batch batch;
1445 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1446
1447 struct blorp_surf surf;
1448 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
1449 image->aux_usage, &surf);
1450
1451 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1452 *
1453 * "After Render target fast clear, pipe-control with color cache
1454 * write-flush must be issued before sending any DRAW commands on
1455 * that render target."
1456 *
1457 * This comment is a bit cryptic and doesn't really tell you what's going
1458 * or what's really needed. It appears that fast clear ops are not
1459 * properly synchronized with other drawing. This means that we cannot
1460 * have a fast clear operation in the pipe at the same time as other
1461 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1462 * that the contents of the previous draw hit the render target before we
1463 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1464 * that it is completed before any additional drawing occurs.
1465 */
1466 cmd_buffer->state.pending_pipe_bits |=
1467 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1468
1469 const uint32_t level_count =
1470 view ? view->levels : anv_get_levelCount(image, subresourceRange);
1471 for (uint32_t l = 0; l < level_count; l++) {
1472 const uint32_t level =
1473 (view ? view->base_level : subresourceRange->baseMipLevel) + l;
1474
1475 const VkExtent3D extent = {
1476 .width = anv_minify(image->extent.width, level),
1477 .height = anv_minify(image->extent.height, level),
1478 .depth = anv_minify(image->extent.depth, level),
1479 };
1480
1481 /* Blorp likes to treat 2D_ARRAY and 3D the same. */
1482 uint32_t blorp_base_layer, blorp_layer_count;
1483 if (view) {
1484 blorp_base_layer = view->base_array_layer;
1485 blorp_layer_count = view->array_len;
1486 } else if (image->type == VK_IMAGE_TYPE_3D) {
1487 blorp_base_layer = 0;
1488 blorp_layer_count = extent.depth;
1489 } else {
1490 blorp_base_layer = subresourceRange->baseArrayLayer;
1491 blorp_layer_count = anv_get_layerCount(image, subresourceRange);
1492 }
1493
1494 blorp_fast_clear(&batch, &surf, surf.surf->format,
1495 level, blorp_base_layer, blorp_layer_count,
1496 0, 0, extent.width, extent.height);
1497 }
1498
1499 cmd_buffer->state.pending_pipe_bits |=
1500 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1501 }
1502
1503 static void
1504 ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
1505 struct blorp_batch *batch,
1506 uint32_t att)
1507 {
1508 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1509 struct anv_attachment_state *att_state =
1510 &cmd_buffer->state.attachments[att];
1511
1512 if (att_state->aux_usage == ISL_AUX_USAGE_NONE ||
1513 att_state->aux_usage == ISL_AUX_USAGE_MCS)
1514 return; /* Nothing to resolve */
1515
1516 assert(att_state->aux_usage == ISL_AUX_USAGE_CCS_E ||
1517 att_state->aux_usage == ISL_AUX_USAGE_CCS_D);
1518
1519 struct anv_render_pass *pass = cmd_buffer->state.pass;
1520 const uint32_t subpass_idx = anv_get_subpass_id(&cmd_buffer->state);
1521
1522 /* Scan forward to see what all ways this attachment will be used.
1523 * Ideally, we would like to resolve in the same subpass as the last write
1524 * of a particular attachment. That way we only resolve once but it's
1525 * still hot in the cache.
1526 */
1527 bool found_draw = false;
1528 enum anv_subpass_usage usage = 0;
1529 for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
1530 usage |= pass->attachments[att].subpass_usage[s];
1531
1532 if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) {
1533 /* We found another subpass that draws to this attachment. We'll
1534 * wait to resolve until then.
1535 */
1536 found_draw = true;
1537 break;
1538 }
1539 }
1540
1541 struct anv_image_view *iview = fb->attachments[att];
1542 const struct anv_image *image = iview->image;
1543 assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1544
1545 enum blorp_fast_clear_op resolve_op = BLORP_FAST_CLEAR_OP_NONE;
1546 if (!found_draw) {
1547 /* This is the last subpass that writes to this attachment so we need to
1548 * resolve here. Ideally, we would like to only resolve if the storeOp
1549 * is set to VK_ATTACHMENT_STORE_OP_STORE. However, we need to ensure
1550 * that the CCS bits are set to "resolved" because there may be copy or
1551 * blit operations (which may ignore CCS) between now and the next time
1552 * we render and we need to ensure that anything they write will be
1553 * respected in the next render. Unfortunately, the hardware does not
1554 * provide us with any sort of "invalidate" pass that sets the CCS to
1555 * "resolved" without writing to the render target.
1556 */
1557 if (iview->image->aux_usage != ISL_AUX_USAGE_CCS_E) {
1558 /* The image destination surface doesn't support compression outside
1559 * the render pass. We need a full resolve.
1560 */
1561 resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1562 } else if (att_state->fast_clear) {
1563 /* We don't know what to do with clear colors outside the render
1564 * pass. We need a partial resolve. Only transparent black is
1565 * built into the surface state object and thus no resolve is
1566 * required for this case.
1567 */
1568 if (att_state->clear_value.color.uint32[0] ||
1569 att_state->clear_value.color.uint32[1] ||
1570 att_state->clear_value.color.uint32[2] ||
1571 att_state->clear_value.color.uint32[3])
1572 resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
1573 } else {
1574 /* The image "natively" supports all the compression we care about
1575 * and we don't need to resolve at all. If this is the case, we also
1576 * don't need to resolve for any of the input attachment cases below.
1577 */
1578 }
1579 } else if (usage & ANV_SUBPASS_USAGE_INPUT) {
1580 /* Input attachments are clear-color aware so, at least on Sky Lake, we
1581 * can frequently sample from them with no resolves at all.
1582 */
1583 if (att_state->aux_usage != att_state->input_aux_usage) {
1584 assert(att_state->input_aux_usage == ISL_AUX_USAGE_NONE);
1585 resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
1586 } else if (!att_state->clear_color_is_zero_one) {
1587 /* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color:
1588 *
1589 * "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this RT
1590 * is fast cleared with non-0/1 clear value, this RT must be
1591 * partially resolved (refer to Partial Resolve operation) before
1592 * binding this surface to Sampler."
1593 */
1594 resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
1595 }
1596 }
1597
1598 if (resolve_op == BLORP_FAST_CLEAR_OP_NONE)
1599 return;
1600
1601 struct blorp_surf surf;
1602 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
1603 att_state->aux_usage, &surf);
1604 if (att_state->fast_clear)
1605 surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
1606
1607 /* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
1608 *
1609 * "When performing a render target resolve, PIPE_CONTROL with end of
1610 * pipe sync must be delivered."
1611 *
1612 * This comment is a bit cryptic and doesn't really tell you what's going
1613 * or what's really needed. It appears that fast clear ops are not
1614 * properly synchronized with other drawing. We need to use a PIPE_CONTROL
1615 * to ensure that the contents of the previous draw hit the render target
1616 * before we resolve and then use a second PIPE_CONTROL after the resolve
1617 * to ensure that it is completed before any additional drawing occurs.
1618 */
1619 cmd_buffer->state.pending_pipe_bits |=
1620 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1621
1622 for (uint32_t layer = 0; layer < fb->layers; layer++) {
1623 blorp_ccs_resolve(batch, &surf,
1624 iview->isl.base_level,
1625 iview->isl.base_array_layer + layer,
1626 iview->isl.format, resolve_op);
1627 }
1628
1629 cmd_buffer->state.pending_pipe_bits |=
1630 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
1631
1632 /* Once we've done any sort of resolve, we're no longer fast-cleared */
1633 att_state->fast_clear = false;
1634 if (att_state->aux_usage == ISL_AUX_USAGE_CCS_D)
1635 att_state->aux_usage = ISL_AUX_USAGE_NONE;
1636 }
1637
1638 void
1639 anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
1640 {
1641 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1642 struct anv_subpass *subpass = cmd_buffer->state.subpass;
1643
1644
1645 struct blorp_batch batch;
1646 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1647
1648 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1649 const uint32_t att = subpass->color_attachments[i].attachment;
1650 if (att == VK_ATTACHMENT_UNUSED)
1651 continue;
1652
1653 assert(att < cmd_buffer->state.pass->attachment_count);
1654 ccs_resolve_attachment(cmd_buffer, &batch, att);
1655 }
1656
1657 if (subpass->has_resolve) {
1658 /* We are about to do some MSAA resolves. We need to flush so that the
1659 * result of writes to the MSAA color attachments show up in the sampler
1660 * when we blit to the single-sampled resolve target.
1661 */
1662 cmd_buffer->state.pending_pipe_bits |=
1663 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
1664 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
1665
1666 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1667 uint32_t src_att = subpass->color_attachments[i].attachment;
1668 uint32_t dst_att = subpass->resolve_attachments[i].attachment;
1669
1670 if (dst_att == VK_ATTACHMENT_UNUSED)
1671 continue;
1672
1673 assert(src_att < cmd_buffer->state.pass->attachment_count);
1674 assert(dst_att < cmd_buffer->state.pass->attachment_count);
1675
1676 if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) {
1677 /* From the Vulkan 1.0 spec:
1678 *
1679 * If the first use of an attachment in a render pass is as a
1680 * resolve attachment, then the loadOp is effectively ignored
1681 * as the resolve is guaranteed to overwrite all pixels in the
1682 * render area.
1683 */
1684 cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0;
1685 }
1686
1687 struct anv_image_view *src_iview = fb->attachments[src_att];
1688 struct anv_image_view *dst_iview = fb->attachments[dst_att];
1689
1690 const VkRect2D render_area = cmd_buffer->state.render_area;
1691
1692 assert(src_iview->aspect_mask == dst_iview->aspect_mask);
1693 resolve_image(&batch, src_iview->image,
1694 src_iview->isl.base_level,
1695 src_iview->isl.base_array_layer,
1696 dst_iview->image,
1697 dst_iview->isl.base_level,
1698 dst_iview->isl.base_array_layer,
1699 src_iview->aspect_mask,
1700 render_area.offset.x, render_area.offset.y,
1701 render_area.offset.x, render_area.offset.y,
1702 render_area.extent.width, render_area.extent.height);
1703
1704 ccs_resolve_attachment(cmd_buffer, &batch, dst_att);
1705 }
1706 }
1707
1708 blorp_batch_finish(&batch);
1709 }
1710
1711 void
1712 anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
1713 const struct anv_image *image,
1714 enum blorp_hiz_op op)
1715 {
1716 assert(image);
1717
1718 /* Don't resolve depth buffers without an auxiliary HiZ buffer and
1719 * don't perform such a resolve on gens that don't support it.
1720 */
1721 if (cmd_buffer->device->info.gen < 8 ||
1722 image->aux_usage != ISL_AUX_USAGE_HIZ)
1723 return;
1724
1725 assert(op == BLORP_HIZ_OP_HIZ_RESOLVE ||
1726 op == BLORP_HIZ_OP_DEPTH_RESOLVE);
1727
1728 struct blorp_batch batch;
1729 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1730
1731 struct blorp_surf surf;
1732 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
1733 ISL_AUX_USAGE_NONE, &surf);
1734
1735 /* Manually add the aux HiZ surf */
1736 surf.aux_surf = &image->aux_surface.isl,
1737 surf.aux_addr = (struct blorp_address) {
1738 .buffer = image->bo,
1739 .offset = image->offset + image->aux_surface.offset,
1740 };
1741 surf.aux_usage = ISL_AUX_USAGE_HIZ;
1742
1743 surf.clear_color.f32[0] = ANV_HZ_FC_VAL;
1744
1745 blorp_hiz_op(&batch, &surf, 0, 0, 1, op);
1746 blorp_batch_finish(&batch);
1747 }