anv: fix multi level clears with VK_REMAINING_MIP_LEVELS
[mesa.git] / src / intel / vulkan / anv_blorp.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
27 lookup_blorp_shader(struct blorp_context *blorp,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct anv_device *device = blorp->driver_ctx;
32
33 /* The blorp cache must be a real cache */
34 assert(device->blorp_shader_cache.cache);
35
36 struct anv_shader_bin *bin =
37 anv_pipeline_cache_search(&device->blorp_shader_cache, key, key_size);
38 if (!bin)
39 return false;
40
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
43 */
44 anv_shader_bin_unref(device, bin);
45
46 *kernel_out = bin->kernel.offset;
47 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
48
49 return true;
50 }
51
52 static void
53 upload_blorp_shader(struct blorp_context *blorp,
54 const void *key, uint32_t key_size,
55 const void *kernel, uint32_t kernel_size,
56 const struct brw_stage_prog_data *prog_data,
57 uint32_t prog_data_size,
58 uint32_t *kernel_out, void *prog_data_out)
59 {
60 struct anv_device *device = blorp->driver_ctx;
61
62 /* The blorp cache must be a real cache */
63 assert(device->blorp_shader_cache.cache);
64
65 struct anv_pipeline_bind_map bind_map = {
66 .surface_count = 0,
67 .sampler_count = 0,
68 };
69
70 struct anv_shader_bin *bin =
71 anv_pipeline_cache_upload_kernel(&device->blorp_shader_cache,
72 key, key_size, kernel, kernel_size,
73 prog_data, prog_data_size, &bind_map);
74
75 /* The cache already has a reference and it's not going anywhere so there
76 * is no need to hold a second reference.
77 */
78 anv_shader_bin_unref(device, bin);
79
80 *kernel_out = bin->kernel.offset;
81 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
82 }
83
84 void
85 anv_device_init_blorp(struct anv_device *device)
86 {
87 anv_pipeline_cache_init(&device->blorp_shader_cache, device, true);
88 blorp_init(&device->blorp, device, &device->isl_dev);
89 device->blorp.compiler = device->instance->physicalDevice.compiler;
90 device->blorp.mocs.tex = device->default_mocs;
91 device->blorp.mocs.rb = device->default_mocs;
92 device->blorp.mocs.vb = device->default_mocs;
93 device->blorp.lookup_shader = lookup_blorp_shader;
94 device->blorp.upload_shader = upload_blorp_shader;
95 switch (device->info.gen) {
96 case 7:
97 if (device->info.is_haswell) {
98 device->blorp.exec = gen75_blorp_exec;
99 } else {
100 device->blorp.exec = gen7_blorp_exec;
101 }
102 break;
103 case 8:
104 device->blorp.exec = gen8_blorp_exec;
105 break;
106 case 9:
107 device->blorp.exec = gen9_blorp_exec;
108 break;
109 default:
110 unreachable("Unknown hardware generation");
111 }
112 }
113
114 void
115 anv_device_finish_blorp(struct anv_device *device)
116 {
117 blorp_finish(&device->blorp);
118 anv_pipeline_cache_finish(&device->blorp_shader_cache);
119 }
120
121 static void
122 get_blorp_surf_for_anv_buffer(struct anv_device *device,
123 struct anv_buffer *buffer, uint64_t offset,
124 uint32_t width, uint32_t height,
125 uint32_t row_pitch, enum isl_format format,
126 struct blorp_surf *blorp_surf,
127 struct isl_surf *isl_surf)
128 {
129 const struct isl_format_layout *fmtl =
130 isl_format_get_layout(format);
131
132 /* ASTC is the only format which doesn't support linear layouts.
133 * Create an equivalently sized surface with ISL to get around this.
134 */
135 if (fmtl->txc == ISL_TXC_ASTC) {
136 /* Use an equivalently sized format */
137 format = ISL_FORMAT_R32G32B32A32_UINT;
138 assert(fmtl->bpb == isl_format_get_layout(format)->bpb);
139
140 /* Shrink the dimensions for the new format */
141 width = DIV_ROUND_UP(width, fmtl->bw);
142 height = DIV_ROUND_UP(height, fmtl->bh);
143 }
144
145 *blorp_surf = (struct blorp_surf) {
146 .surf = isl_surf,
147 .addr = {
148 .buffer = buffer->bo,
149 .offset = buffer->offset + offset,
150 },
151 };
152
153 isl_surf_init(&device->isl_dev, isl_surf,
154 .dim = ISL_SURF_DIM_2D,
155 .format = format,
156 .width = width,
157 .height = height,
158 .depth = 1,
159 .levels = 1,
160 .array_len = 1,
161 .samples = 1,
162 .min_pitch = row_pitch,
163 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
164 ISL_SURF_USAGE_RENDER_TARGET_BIT,
165 .tiling_flags = ISL_TILING_LINEAR_BIT);
166 assert(isl_surf->row_pitch == row_pitch);
167 }
168
169 static void
170 get_blorp_surf_for_anv_image(const struct anv_image *image,
171 VkImageAspectFlags aspect,
172 struct blorp_surf *blorp_surf)
173 {
174 const struct anv_surface *surface =
175 anv_image_get_surface_for_aspect_mask(image, aspect);
176
177 *blorp_surf = (struct blorp_surf) {
178 .surf = &surface->isl,
179 .addr = {
180 .buffer = image->bo,
181 .offset = image->offset + surface->offset,
182 },
183 };
184 }
185
186 void anv_CmdCopyImage(
187 VkCommandBuffer commandBuffer,
188 VkImage srcImage,
189 VkImageLayout srcImageLayout,
190 VkImage dstImage,
191 VkImageLayout dstImageLayout,
192 uint32_t regionCount,
193 const VkImageCopy* pRegions)
194 {
195 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
196 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
197 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
198
199 struct blorp_batch batch;
200 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
201
202 for (unsigned r = 0; r < regionCount; r++) {
203 VkOffset3D srcOffset =
204 anv_sanitize_image_offset(src_image->type, pRegions[r].srcOffset);
205 VkOffset3D dstOffset =
206 anv_sanitize_image_offset(dst_image->type, pRegions[r].dstOffset);
207 VkExtent3D extent =
208 anv_sanitize_image_extent(src_image->type, pRegions[r].extent);
209
210 unsigned dst_base_layer, layer_count;
211 if (dst_image->type == VK_IMAGE_TYPE_3D) {
212 dst_base_layer = pRegions[r].dstOffset.z;
213 layer_count = pRegions[r].extent.depth;
214 } else {
215 dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer;
216 layer_count = pRegions[r].dstSubresource.layerCount;
217 }
218
219 unsigned src_base_layer;
220 if (src_image->type == VK_IMAGE_TYPE_3D) {
221 src_base_layer = pRegions[r].srcOffset.z;
222 } else {
223 src_base_layer = pRegions[r].srcSubresource.baseArrayLayer;
224 assert(pRegions[r].srcSubresource.layerCount == layer_count);
225 }
226
227 assert(pRegions[r].srcSubresource.aspectMask ==
228 pRegions[r].dstSubresource.aspectMask);
229
230 uint32_t a;
231 for_each_bit(a, pRegions[r].dstSubresource.aspectMask) {
232 VkImageAspectFlagBits aspect = (1 << a);
233
234 struct blorp_surf src_surf, dst_surf;
235 get_blorp_surf_for_anv_image(src_image, aspect, &src_surf);
236 get_blorp_surf_for_anv_image(dst_image, aspect, &dst_surf);
237
238 for (unsigned i = 0; i < layer_count; i++) {
239 blorp_copy(&batch, &src_surf, pRegions[r].srcSubresource.mipLevel,
240 src_base_layer + i,
241 &dst_surf, pRegions[r].dstSubresource.mipLevel,
242 dst_base_layer + i,
243 srcOffset.x, srcOffset.y,
244 dstOffset.x, dstOffset.y,
245 extent.width, extent.height);
246 }
247 }
248 }
249
250 blorp_batch_finish(&batch);
251 }
252
253 static void
254 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
255 struct anv_buffer *anv_buffer,
256 struct anv_image *anv_image,
257 uint32_t regionCount,
258 const VkBufferImageCopy* pRegions,
259 bool buffer_to_image)
260 {
261 struct blorp_batch batch;
262 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
263
264 struct {
265 struct blorp_surf surf;
266 uint32_t level;
267 VkOffset3D offset;
268 } image, buffer, *src, *dst;
269
270 buffer.level = 0;
271 buffer.offset = (VkOffset3D) { 0, 0, 0 };
272
273 if (buffer_to_image) {
274 src = &buffer;
275 dst = &image;
276 } else {
277 src = &image;
278 dst = &buffer;
279 }
280
281 for (unsigned r = 0; r < regionCount; r++) {
282 const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
283
284 get_blorp_surf_for_anv_image(anv_image, aspect, &image.surf);
285 image.offset =
286 anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset);
287 image.level = pRegions[r].imageSubresource.mipLevel;
288
289 VkExtent3D extent =
290 anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent);
291 if (anv_image->type != VK_IMAGE_TYPE_3D) {
292 image.offset.z = pRegions[r].imageSubresource.baseArrayLayer;
293 extent.depth = pRegions[r].imageSubresource.layerCount;
294 }
295
296 const enum isl_format buffer_format =
297 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format,
298 aspect, VK_IMAGE_TILING_LINEAR);
299
300 const VkExtent3D bufferImageExtent = {
301 .width = pRegions[r].bufferRowLength ?
302 pRegions[r].bufferRowLength : extent.width,
303 .height = pRegions[r].bufferImageHeight ?
304 pRegions[r].bufferImageHeight : extent.height,
305 };
306
307 const struct isl_format_layout *buffer_fmtl =
308 isl_format_get_layout(buffer_format);
309
310 const uint32_t buffer_row_pitch =
311 DIV_ROUND_UP(bufferImageExtent.width, buffer_fmtl->bw) *
312 (buffer_fmtl->bpb / 8);
313
314 const uint32_t buffer_layer_stride =
315 DIV_ROUND_UP(bufferImageExtent.height, buffer_fmtl->bh) *
316 buffer_row_pitch;
317
318 struct isl_surf buffer_isl_surf;
319 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
320 anv_buffer, pRegions[r].bufferOffset,
321 extent.width, extent.height,
322 buffer_row_pitch, buffer_format,
323 &buffer.surf, &buffer_isl_surf);
324
325 for (unsigned z = 0; z < extent.depth; z++) {
326 blorp_copy(&batch, &src->surf, src->level, src->offset.z,
327 &dst->surf, dst->level, dst->offset.z,
328 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
329 extent.width, extent.height);
330
331 image.offset.z++;
332 buffer.surf.addr.offset += buffer_layer_stride;
333 }
334 }
335
336 blorp_batch_finish(&batch);
337 }
338
339 void anv_CmdCopyBufferToImage(
340 VkCommandBuffer commandBuffer,
341 VkBuffer srcBuffer,
342 VkImage dstImage,
343 VkImageLayout dstImageLayout,
344 uint32_t regionCount,
345 const VkBufferImageCopy* pRegions)
346 {
347 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
348 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
349 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
350
351 copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
352 regionCount, pRegions, true);
353 }
354
355 void anv_CmdCopyImageToBuffer(
356 VkCommandBuffer commandBuffer,
357 VkImage srcImage,
358 VkImageLayout srcImageLayout,
359 VkBuffer dstBuffer,
360 uint32_t regionCount,
361 const VkBufferImageCopy* pRegions)
362 {
363 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
364 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
365 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
366
367 copy_buffer_to_image(cmd_buffer, dst_buffer, src_image,
368 regionCount, pRegions, false);
369 }
370
371 static bool
372 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
373 {
374 bool flip = false;
375 if (*src0 > *src1) {
376 unsigned tmp = *src0;
377 *src0 = *src1;
378 *src1 = tmp;
379 flip = !flip;
380 }
381
382 if (*dst0 > *dst1) {
383 unsigned tmp = *dst0;
384 *dst0 = *dst1;
385 *dst1 = tmp;
386 flip = !flip;
387 }
388
389 return flip;
390 }
391
392 void anv_CmdBlitImage(
393 VkCommandBuffer commandBuffer,
394 VkImage srcImage,
395 VkImageLayout srcImageLayout,
396 VkImage dstImage,
397 VkImageLayout dstImageLayout,
398 uint32_t regionCount,
399 const VkImageBlit* pRegions,
400 VkFilter filter)
401
402 {
403 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
404 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
405 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
406
407 struct blorp_surf src, dst;
408
409 uint32_t gl_filter;
410 switch (filter) {
411 case VK_FILTER_NEAREST:
412 gl_filter = 0x2600; /* GL_NEAREST */
413 break;
414 case VK_FILTER_LINEAR:
415 gl_filter = 0x2601; /* GL_LINEAR */
416 break;
417 default:
418 unreachable("Invalid filter");
419 }
420
421 struct blorp_batch batch;
422 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
423
424 for (unsigned r = 0; r < regionCount; r++) {
425 const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
426 const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
427
428 get_blorp_surf_for_anv_image(src_image, src_res->aspectMask, &src);
429 get_blorp_surf_for_anv_image(dst_image, dst_res->aspectMask, &dst);
430
431 struct anv_format src_format =
432 anv_get_format(&cmd_buffer->device->info, src_image->vk_format,
433 src_res->aspectMask, src_image->tiling);
434 struct anv_format dst_format =
435 anv_get_format(&cmd_buffer->device->info, dst_image->vk_format,
436 dst_res->aspectMask, dst_image->tiling);
437
438 unsigned dst_start, dst_end;
439 if (dst_image->type == VK_IMAGE_TYPE_3D) {
440 assert(dst_res->baseArrayLayer == 0);
441 dst_start = pRegions[r].dstOffsets[0].z;
442 dst_end = pRegions[r].dstOffsets[1].z;
443 } else {
444 dst_start = dst_res->baseArrayLayer;
445 dst_end = dst_start + dst_res->layerCount;
446 }
447
448 unsigned src_start, src_end;
449 if (src_image->type == VK_IMAGE_TYPE_3D) {
450 assert(src_res->baseArrayLayer == 0);
451 src_start = pRegions[r].srcOffsets[0].z;
452 src_end = pRegions[r].srcOffsets[1].z;
453 } else {
454 src_start = src_res->baseArrayLayer;
455 src_end = src_start + src_res->layerCount;
456 }
457
458 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
459 float src_z_step = (float)(src_end + 1 - src_start) /
460 (float)(dst_end + 1 - dst_start);
461
462 if (flip_z) {
463 src_start = src_end;
464 src_z_step *= -1;
465 }
466
467 unsigned src_x0 = pRegions[r].srcOffsets[0].x;
468 unsigned src_x1 = pRegions[r].srcOffsets[1].x;
469 unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
470 unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
471 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
472
473 unsigned src_y0 = pRegions[r].srcOffsets[0].y;
474 unsigned src_y1 = pRegions[r].srcOffsets[1].y;
475 unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
476 unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
477 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
478
479 const unsigned num_layers = dst_end - dst_start;
480 for (unsigned i = 0; i < num_layers; i++) {
481 unsigned dst_z = dst_start + i;
482 unsigned src_z = src_start + i * src_z_step;
483
484 blorp_blit(&batch, &src, src_res->mipLevel, src_z,
485 src_format.isl_format, src_format.swizzle,
486 &dst, dst_res->mipLevel, dst_z,
487 dst_format.isl_format, dst_format.swizzle,
488 src_x0, src_y0, src_x1, src_y1,
489 dst_x0, dst_y0, dst_x1, dst_y1,
490 gl_filter, flip_x, flip_y);
491 }
492
493 }
494
495 blorp_batch_finish(&batch);
496 }
497
498 static enum isl_format
499 isl_format_for_size(unsigned size_B)
500 {
501 switch (size_B) {
502 case 1: return ISL_FORMAT_R8_UINT;
503 case 2: return ISL_FORMAT_R8G8_UINT;
504 case 4: return ISL_FORMAT_R8G8B8A8_UINT;
505 case 8: return ISL_FORMAT_R16G16B16A16_UINT;
506 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
507 default:
508 unreachable("Not a power-of-two format size");
509 }
510 }
511
512 static void
513 do_buffer_copy(struct blorp_batch *batch,
514 struct anv_bo *src, uint64_t src_offset,
515 struct anv_bo *dst, uint64_t dst_offset,
516 int width, int height, int block_size)
517 {
518 struct anv_device *device = batch->blorp->driver_ctx;
519
520 /* The actual format we pick doesn't matter as blorp will throw it away.
521 * The only thing that actually matters is the size.
522 */
523 enum isl_format format = isl_format_for_size(block_size);
524
525 struct isl_surf surf;
526 isl_surf_init(&device->isl_dev, &surf,
527 .dim = ISL_SURF_DIM_2D,
528 .format = format,
529 .width = width,
530 .height = height,
531 .depth = 1,
532 .levels = 1,
533 .array_len = 1,
534 .samples = 1,
535 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
536 ISL_SURF_USAGE_RENDER_TARGET_BIT,
537 .tiling_flags = ISL_TILING_LINEAR_BIT);
538 assert(surf.row_pitch == width * block_size);
539
540 struct blorp_surf src_blorp_surf = {
541 .surf = &surf,
542 .addr = {
543 .buffer = src,
544 .offset = src_offset,
545 },
546 };
547
548 struct blorp_surf dst_blorp_surf = {
549 .surf = &surf,
550 .addr = {
551 .buffer = dst,
552 .offset = dst_offset,
553 },
554 };
555
556 blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,
557 0, 0, 0, 0, width, height);
558 }
559
560 /**
561 * Returns the greatest common divisor of a and b that is a power of two.
562 */
563 static inline uint64_t
564 gcd_pow2_u64(uint64_t a, uint64_t b)
565 {
566 assert(a > 0 || b > 0);
567
568 unsigned a_log2 = ffsll(a) - 1;
569 unsigned b_log2 = ffsll(b) - 1;
570
571 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
572 * case, the MIN2() will take the other one. If both are 0 then we will
573 * hit the assert above.
574 */
575 return 1 << MIN2(a_log2, b_log2);
576 }
577
578 /* This is maximum possible width/height our HW can handle */
579 #define MAX_SURFACE_DIM (1ull << 14)
580
581 void anv_CmdCopyBuffer(
582 VkCommandBuffer commandBuffer,
583 VkBuffer srcBuffer,
584 VkBuffer dstBuffer,
585 uint32_t regionCount,
586 const VkBufferCopy* pRegions)
587 {
588 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
589 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
590 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
591
592 struct blorp_batch batch;
593 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
594
595 for (unsigned r = 0; r < regionCount; r++) {
596 uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
597 uint64_t dst_offset = dst_buffer->offset + pRegions[r].dstOffset;
598 uint64_t copy_size = pRegions[r].size;
599
600 /* First, we compute the biggest format that can be used with the
601 * given offsets and size.
602 */
603 int bs = 16;
604 bs = gcd_pow2_u64(bs, src_offset);
605 bs = gcd_pow2_u64(bs, dst_offset);
606 bs = gcd_pow2_u64(bs, pRegions[r].size);
607
608 /* First, we make a bunch of max-sized copies */
609 uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
610 while (copy_size >= max_copy_size) {
611 do_buffer_copy(&batch, src_buffer->bo, src_offset,
612 dst_buffer->bo, dst_offset,
613 MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);
614 copy_size -= max_copy_size;
615 src_offset += max_copy_size;
616 dst_offset += max_copy_size;
617 }
618
619 /* Now make a max-width copy */
620 uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);
621 assert(height < MAX_SURFACE_DIM);
622 if (height != 0) {
623 uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs;
624 do_buffer_copy(&batch, src_buffer->bo, src_offset,
625 dst_buffer->bo, dst_offset,
626 MAX_SURFACE_DIM, height, bs);
627 copy_size -= rect_copy_size;
628 src_offset += rect_copy_size;
629 dst_offset += rect_copy_size;
630 }
631
632 /* Finally, make a small copy to finish it off */
633 if (copy_size != 0) {
634 do_buffer_copy(&batch, src_buffer->bo, src_offset,
635 dst_buffer->bo, dst_offset,
636 copy_size / bs, 1, bs);
637 }
638 }
639
640 blorp_batch_finish(&batch);
641 }
642
643 void anv_CmdUpdateBuffer(
644 VkCommandBuffer commandBuffer,
645 VkBuffer dstBuffer,
646 VkDeviceSize dstOffset,
647 VkDeviceSize dataSize,
648 const void* pData)
649 {
650 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
651 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
652
653 struct blorp_batch batch;
654 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
655
656 /* We can't quite grab a full block because the state stream needs a
657 * little data at the top to build its linked list.
658 */
659 const uint32_t max_update_size =
660 cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
661
662 assert(max_update_size < MAX_SURFACE_DIM * 4);
663
664 while (dataSize) {
665 const uint32_t copy_size = MIN2(dataSize, max_update_size);
666
667 struct anv_state tmp_data =
668 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
669
670 memcpy(tmp_data.map, pData, copy_size);
671
672 int bs = 16;
673 bs = gcd_pow2_u64(bs, dstOffset);
674 bs = gcd_pow2_u64(bs, copy_size);
675
676 do_buffer_copy(&batch,
677 &cmd_buffer->device->dynamic_state_block_pool.bo,
678 tmp_data.offset,
679 dst_buffer->bo, dst_buffer->offset + dstOffset,
680 copy_size / bs, 1, bs);
681
682 dataSize -= copy_size;
683 dstOffset += copy_size;
684 pData = (void *)pData + copy_size;
685 }
686
687 blorp_batch_finish(&batch);
688 }
689
690 void anv_CmdFillBuffer(
691 VkCommandBuffer commandBuffer,
692 VkBuffer dstBuffer,
693 VkDeviceSize dstOffset,
694 VkDeviceSize fillSize,
695 uint32_t data)
696 {
697 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
698 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
699 struct blorp_surf surf;
700 struct isl_surf isl_surf;
701
702 struct blorp_batch batch;
703 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
704
705 if (fillSize == VK_WHOLE_SIZE) {
706 fillSize = dst_buffer->size - dstOffset;
707 /* Make sure fillSize is a multiple of 4 */
708 fillSize &= ~3ull;
709 }
710
711 /* First, we compute the biggest format that can be used with the
712 * given offsets and size.
713 */
714 int bs = 16;
715 bs = gcd_pow2_u64(bs, dstOffset);
716 bs = gcd_pow2_u64(bs, fillSize);
717 enum isl_format isl_format = isl_format_for_size(bs);
718
719 union isl_color_value color = {
720 .u32 = { data, data, data, data },
721 };
722
723 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
724 while (fillSize >= max_fill_size) {
725 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
726 dst_buffer, dstOffset,
727 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
728 MAX_SURFACE_DIM * bs, isl_format,
729 &surf, &isl_surf);
730
731 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
732 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
733 color, NULL);
734 fillSize -= max_fill_size;
735 dstOffset += max_fill_size;
736 }
737
738 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
739 assert(height < MAX_SURFACE_DIM);
740 if (height != 0) {
741 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
742 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
743 dst_buffer, dstOffset,
744 MAX_SURFACE_DIM, height,
745 MAX_SURFACE_DIM * bs, isl_format,
746 &surf, &isl_surf);
747
748 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
749 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
750 color, NULL);
751 fillSize -= rect_fill_size;
752 dstOffset += rect_fill_size;
753 }
754
755 if (fillSize != 0) {
756 const uint32_t width = fillSize / bs;
757 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
758 dst_buffer, dstOffset,
759 width, 1,
760 width * bs, isl_format,
761 &surf, &isl_surf);
762
763 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
764 0, 0, 1, 0, 0, width, 1,
765 color, NULL);
766 }
767
768 blorp_batch_finish(&batch);
769 }
770
771 void anv_CmdClearColorImage(
772 VkCommandBuffer commandBuffer,
773 VkImage _image,
774 VkImageLayout imageLayout,
775 const VkClearColorValue* pColor,
776 uint32_t rangeCount,
777 const VkImageSubresourceRange* pRanges)
778 {
779 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
780 ANV_FROM_HANDLE(anv_image, image, _image);
781
782 static const bool color_write_disable[4] = { false, false, false, false };
783
784 struct blorp_batch batch;
785 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
786
787 union isl_color_value clear_color;
788 memcpy(clear_color.u32, pColor->uint32, sizeof(pColor->uint32));
789
790 struct blorp_surf surf;
791 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, &surf);
792
793 for (unsigned r = 0; r < rangeCount; r++) {
794 if (pRanges[r].aspectMask == 0)
795 continue;
796
797 assert(pRanges[r].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
798
799 struct anv_format src_format =
800 anv_get_format(&cmd_buffer->device->info, image->vk_format,
801 VK_IMAGE_ASPECT_COLOR_BIT, image->tiling);
802
803 unsigned base_layer = pRanges[r].baseArrayLayer;
804 unsigned layer_count = pRanges[r].layerCount;
805
806 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
807 const unsigned level = pRanges[r].baseMipLevel + i;
808 const unsigned level_width = anv_minify(image->extent.width, level);
809 const unsigned level_height = anv_minify(image->extent.height, level);
810
811 if (image->type == VK_IMAGE_TYPE_3D) {
812 base_layer = 0;
813 layer_count = anv_minify(image->extent.depth, level);
814 }
815
816 blorp_clear(&batch, &surf,
817 src_format.isl_format, src_format.swizzle,
818 level, base_layer, layer_count,
819 0, 0, level_width, level_height,
820 clear_color, color_write_disable);
821 }
822 }
823
824 blorp_batch_finish(&batch);
825 }
826
827 void anv_CmdClearDepthStencilImage(
828 VkCommandBuffer commandBuffer,
829 VkImage image_h,
830 VkImageLayout imageLayout,
831 const VkClearDepthStencilValue* pDepthStencil,
832 uint32_t rangeCount,
833 const VkImageSubresourceRange* pRanges)
834 {
835 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
836 ANV_FROM_HANDLE(anv_image, image, image_h);
837
838 struct blorp_batch batch;
839 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
840
841 struct blorp_surf depth, stencil;
842 if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
843 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
844 &depth);
845 } else {
846 memset(&depth, 0, sizeof(depth));
847 }
848
849 if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
850 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
851 &stencil);
852 } else {
853 memset(&stencil, 0, sizeof(stencil));
854 }
855
856 for (unsigned r = 0; r < rangeCount; r++) {
857 if (pRanges[r].aspectMask == 0)
858 continue;
859
860 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
861 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
862
863 unsigned base_layer = pRanges[r].baseArrayLayer;
864 unsigned layer_count = pRanges[r].layerCount;
865
866 for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) {
867 const unsigned level = pRanges[r].baseMipLevel + i;
868 const unsigned level_width = anv_minify(image->extent.width, level);
869 const unsigned level_height = anv_minify(image->extent.height, level);
870
871 if (image->type == VK_IMAGE_TYPE_3D)
872 layer_count = anv_minify(image->extent.depth, level);
873
874 blorp_clear_depth_stencil(&batch, &depth, &stencil,
875 level, base_layer, layer_count,
876 0, 0, level_width, level_height,
877 clear_depth, pDepthStencil->depth,
878 clear_stencil ? 0xff : 0,
879 pDepthStencil->stencil);
880 }
881 }
882
883 blorp_batch_finish(&batch);
884 }
885
886 static void
887 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
888 struct blorp_batch *batch,
889 const VkClearAttachment *attachment,
890 uint32_t rectCount, const VkClearRect *pRects)
891 {
892 const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
893 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
894 const uint32_t att = attachment->colorAttachment;
895 const struct anv_image_view *iview =
896 fb->attachments[subpass->color_attachments[att]];
897 const struct anv_image *image = iview->image;
898
899 struct blorp_surf surf;
900 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, &surf);
901
902 union isl_color_value clear_color;
903 memcpy(clear_color.u32, attachment->clearValue.color.uint32,
904 sizeof(clear_color.u32));
905
906 static const bool color_write_disable[4] = { false, false, false, false };
907
908 for (uint32_t r = 0; r < rectCount; ++r) {
909 const VkOffset2D offset = pRects[r].rect.offset;
910 const VkExtent2D extent = pRects[r].rect.extent;
911 blorp_clear(batch, &surf, iview->isl.format, iview->isl.swizzle,
912 iview->isl.base_level,
913 iview->isl.base_array_layer + pRects[r].baseArrayLayer,
914 pRects[r].layerCount,
915 offset.x, offset.y,
916 offset.x + extent.width, offset.y + extent.height,
917 clear_color, color_write_disable);
918 }
919 }
920
921 static void
922 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
923 struct blorp_batch *batch,
924 const VkClearAttachment *attachment,
925 uint32_t rectCount, const VkClearRect *pRects)
926 {
927 const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
928 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
929 const struct anv_image_view *iview =
930 fb->attachments[subpass->depth_stencil_attachment];
931 const struct anv_image *image = iview->image;
932
933 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
934 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
935
936 struct blorp_surf depth, stencil;
937 if (clear_depth) {
938 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
939 &depth);
940 } else {
941 memset(&depth, 0, sizeof(depth));
942 }
943
944 if (clear_stencil) {
945 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
946 &stencil);
947 } else {
948 memset(&stencil, 0, sizeof(stencil));
949 }
950
951 for (uint32_t r = 0; r < rectCount; ++r) {
952 const VkOffset2D offset = pRects[r].rect.offset;
953 const VkExtent2D extent = pRects[r].rect.extent;
954 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
955 blorp_clear_depth_stencil(batch, &depth, &stencil,
956 iview->isl.base_level,
957 iview->isl.base_array_layer +
958 pRects[r].baseArrayLayer,
959 pRects[r].layerCount,
960 offset.x, offset.y,
961 offset.x + extent.width,
962 offset.y + extent.height,
963 clear_depth, value.depth,
964 clear_stencil ? 0xff : 0, value.stencil);
965 }
966 }
967
968 void anv_CmdClearAttachments(
969 VkCommandBuffer commandBuffer,
970 uint32_t attachmentCount,
971 const VkClearAttachment* pAttachments,
972 uint32_t rectCount,
973 const VkClearRect* pRects)
974 {
975 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
976
977 /* Because this gets called within a render pass, we tell blorp not to
978 * trash our depth and stencil buffers.
979 */
980 struct blorp_batch batch;
981 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
982 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
983
984 for (uint32_t a = 0; a < attachmentCount; ++a) {
985 if (pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
986 clear_color_attachment(cmd_buffer, &batch,
987 &pAttachments[a],
988 rectCount, pRects);
989 } else {
990 clear_depth_stencil_attachment(cmd_buffer, &batch,
991 &pAttachments[a],
992 rectCount, pRects);
993 }
994 }
995
996 blorp_batch_finish(&batch);
997 }
998
999 static bool
1000 subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
1001 {
1002 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
1003 uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
1004
1005 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1006 uint32_t a = cmd_state->subpass->color_attachments[i];
1007 if (cmd_state->attachments[a].pending_clear_aspects) {
1008 return true;
1009 }
1010 }
1011
1012 if (ds != VK_ATTACHMENT_UNUSED &&
1013 cmd_state->attachments[ds].pending_clear_aspects) {
1014 return true;
1015 }
1016
1017 return false;
1018 }
1019
1020 void
1021 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
1022 {
1023 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
1024
1025 if (!subpass_needs_clear(cmd_buffer))
1026 return;
1027
1028 /* Because this gets called within a render pass, we tell blorp not to
1029 * trash our depth and stencil buffers.
1030 */
1031 struct blorp_batch batch;
1032 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1033 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1034
1035 VkClearRect clear_rect = {
1036 .rect = cmd_buffer->state.render_area,
1037 .baseArrayLayer = 0,
1038 .layerCount = cmd_buffer->state.framebuffer->layers,
1039 };
1040
1041 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1042 const uint32_t a = cmd_state->subpass->color_attachments[i];
1043
1044 if (!cmd_state->attachments[a].pending_clear_aspects)
1045 continue;
1046
1047 assert(cmd_state->attachments[a].pending_clear_aspects ==
1048 VK_IMAGE_ASPECT_COLOR_BIT);
1049
1050 VkClearAttachment clear_att = {
1051 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1052 .colorAttachment = i, /* Use attachment index relative to subpass */
1053 .clearValue = cmd_state->attachments[a].clear_value,
1054 };
1055
1056 clear_color_attachment(cmd_buffer, &batch, &clear_att, 1, &clear_rect);
1057
1058 cmd_state->attachments[a].pending_clear_aspects = 0;
1059 }
1060
1061 const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
1062
1063 if (ds != VK_ATTACHMENT_UNUSED &&
1064 cmd_state->attachments[ds].pending_clear_aspects) {
1065
1066 VkClearAttachment clear_att = {
1067 .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
1068 .clearValue = cmd_state->attachments[ds].clear_value,
1069 };
1070
1071 clear_depth_stencil_attachment(cmd_buffer, &batch,
1072 &clear_att, 1, &clear_rect);
1073
1074 cmd_state->attachments[ds].pending_clear_aspects = 0;
1075 }
1076
1077 blorp_batch_finish(&batch);
1078 }
1079
1080 static void
1081 resolve_image(struct blorp_batch *batch,
1082 const struct anv_image *src_image,
1083 uint32_t src_level, uint32_t src_layer,
1084 const struct anv_image *dst_image,
1085 uint32_t dst_level, uint32_t dst_layer,
1086 VkImageAspectFlags aspect_mask,
1087 uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
1088 uint32_t width, uint32_t height)
1089 {
1090 assert(src_image->type == VK_IMAGE_TYPE_2D);
1091 assert(src_image->samples > 1);
1092 assert(dst_image->type == VK_IMAGE_TYPE_2D);
1093 assert(dst_image->samples == 1);
1094
1095 uint32_t a;
1096 for_each_bit(a, aspect_mask) {
1097 VkImageAspectFlagBits aspect = 1 << a;
1098
1099 struct blorp_surf src_surf, dst_surf;
1100 get_blorp_surf_for_anv_image(src_image, aspect, &src_surf);
1101 get_blorp_surf_for_anv_image(dst_image, aspect, &dst_surf);
1102
1103 blorp_blit(batch,
1104 &src_surf, src_level, src_layer,
1105 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1106 &dst_surf, dst_level, dst_layer,
1107 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1108 src_x, src_y, src_x + width, src_y + height,
1109 dst_x, dst_y, dst_x + width, dst_y + height,
1110 0x2600 /* GL_NEAREST */, false, false);
1111 }
1112 }
1113
1114 void anv_CmdResolveImage(
1115 VkCommandBuffer commandBuffer,
1116 VkImage srcImage,
1117 VkImageLayout srcImageLayout,
1118 VkImage dstImage,
1119 VkImageLayout dstImageLayout,
1120 uint32_t regionCount,
1121 const VkImageResolve* pRegions)
1122 {
1123 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1124 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
1125 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
1126
1127 struct blorp_batch batch;
1128 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1129
1130 for (uint32_t r = 0; r < regionCount; r++) {
1131 assert(pRegions[r].srcSubresource.aspectMask ==
1132 pRegions[r].dstSubresource.aspectMask);
1133 assert(pRegions[r].srcSubresource.layerCount ==
1134 pRegions[r].dstSubresource.layerCount);
1135
1136 const uint32_t layer_count = pRegions[r].dstSubresource.layerCount;
1137
1138 for (uint32_t layer = 0; layer < layer_count; layer++) {
1139 resolve_image(&batch,
1140 src_image, pRegions[r].srcSubresource.mipLevel,
1141 pRegions[r].srcSubresource.baseArrayLayer + layer,
1142 dst_image, pRegions[r].dstSubresource.mipLevel,
1143 pRegions[r].dstSubresource.baseArrayLayer + layer,
1144 pRegions[r].dstSubresource.aspectMask,
1145 pRegions[r].srcOffset.x, pRegions[r].srcOffset.y,
1146 pRegions[r].dstOffset.x, pRegions[r].dstOffset.y,
1147 pRegions[r].extent.width, pRegions[r].extent.height);
1148 }
1149 }
1150
1151 blorp_batch_finish(&batch);
1152 }
1153
1154 void
1155 anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
1156 {
1157 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1158 struct anv_subpass *subpass = cmd_buffer->state.subpass;
1159
1160 /* FINISHME(perf): Skip clears for resolve attachments.
1161 *
1162 * From the Vulkan 1.0 spec:
1163 *
1164 * If the first use of an attachment in a render pass is as a resolve
1165 * attachment, then the loadOp is effectively ignored as the resolve is
1166 * guaranteed to overwrite all pixels in the render area.
1167 */
1168
1169 if (!subpass->has_resolve)
1170 return;
1171
1172 struct blorp_batch batch;
1173 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1174
1175 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1176 uint32_t src_att = subpass->color_attachments[i];
1177 uint32_t dst_att = subpass->resolve_attachments[i];
1178
1179 if (dst_att == VK_ATTACHMENT_UNUSED)
1180 continue;
1181
1182 struct anv_image_view *src_iview = fb->attachments[src_att];
1183 struct anv_image_view *dst_iview = fb->attachments[dst_att];
1184
1185 const VkRect2D render_area = cmd_buffer->state.render_area;
1186
1187 assert(src_iview->aspect_mask == dst_iview->aspect_mask);
1188 resolve_image(&batch, src_iview->image,
1189 src_iview->isl.base_level, src_iview->isl.base_array_layer,
1190 dst_iview->image,
1191 dst_iview->isl.base_level, dst_iview->isl.base_array_layer,
1192 src_iview->aspect_mask,
1193 render_area.offset.x, render_area.offset.y,
1194 render_area.offset.x, render_area.offset.y,
1195 render_area.extent.width, render_area.extent.height);
1196 }
1197
1198 blorp_batch_finish(&batch);
1199 }