intel/blorp: Pass a brw_stage_prog_data to upload_shader
[mesa.git] / src / intel / vulkan / anv_blorp.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
27 lookup_blorp_shader(struct blorp_context *blorp,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct anv_device *device = blorp->driver_ctx;
32
33 /* The blorp cache must be a real cache */
34 assert(device->blorp_shader_cache.cache);
35
36 struct anv_shader_bin *bin =
37 anv_pipeline_cache_search(&device->blorp_shader_cache, key, key_size);
38 if (!bin)
39 return false;
40
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
43 */
44 anv_shader_bin_unref(device, bin);
45
46 *kernel_out = bin->kernel.offset;
47 *(const struct brw_stage_prog_data **)prog_data_out =
48 anv_shader_bin_get_prog_data(bin);
49
50 return true;
51 }
52
53 static void
54 upload_blorp_shader(struct blorp_context *blorp,
55 const void *key, uint32_t key_size,
56 const void *kernel, uint32_t kernel_size,
57 const struct brw_stage_prog_data *prog_data,
58 uint32_t prog_data_size,
59 uint32_t *kernel_out, void *prog_data_out)
60 {
61 struct anv_device *device = blorp->driver_ctx;
62
63 /* The blorp cache must be a real cache */
64 assert(device->blorp_shader_cache.cache);
65
66 struct anv_pipeline_bind_map bind_map = {
67 .surface_count = 0,
68 .sampler_count = 0,
69 };
70
71 struct anv_shader_bin *bin =
72 anv_pipeline_cache_upload_kernel(&device->blorp_shader_cache,
73 key, key_size, kernel, kernel_size,
74 prog_data, prog_data_size, &bind_map);
75
76 /* The cache already has a reference and it's not going anywhere so there
77 * is no need to hold a second reference.
78 */
79 anv_shader_bin_unref(device, bin);
80
81 *kernel_out = bin->kernel.offset;
82 *(const struct brw_stage_prog_data **)prog_data_out =
83 anv_shader_bin_get_prog_data(bin);
84 }
85
86 void
87 anv_device_init_blorp(struct anv_device *device)
88 {
89 anv_pipeline_cache_init(&device->blorp_shader_cache, device, true);
90 blorp_init(&device->blorp, device, &device->isl_dev);
91 device->blorp.compiler = device->instance->physicalDevice.compiler;
92 device->blorp.mocs.tex = device->default_mocs;
93 device->blorp.mocs.rb = device->default_mocs;
94 device->blorp.mocs.vb = device->default_mocs;
95 device->blorp.lookup_shader = lookup_blorp_shader;
96 device->blorp.upload_shader = upload_blorp_shader;
97 switch (device->info.gen) {
98 case 7:
99 if (device->info.is_haswell) {
100 device->blorp.exec = gen75_blorp_exec;
101 } else {
102 device->blorp.exec = gen7_blorp_exec;
103 }
104 break;
105 case 8:
106 device->blorp.exec = gen8_blorp_exec;
107 break;
108 case 9:
109 device->blorp.exec = gen9_blorp_exec;
110 break;
111 default:
112 unreachable("Unknown hardware generation");
113 }
114 }
115
116 void
117 anv_device_finish_blorp(struct anv_device *device)
118 {
119 blorp_finish(&device->blorp);
120 anv_pipeline_cache_finish(&device->blorp_shader_cache);
121 }
122
123 static void
124 get_blorp_surf_for_anv_buffer(struct anv_device *device,
125 struct anv_buffer *buffer, uint64_t offset,
126 uint32_t width, uint32_t height,
127 uint32_t row_pitch, enum isl_format format,
128 struct blorp_surf *blorp_surf,
129 struct isl_surf *isl_surf)
130 {
131 *blorp_surf = (struct blorp_surf) {
132 .surf = isl_surf,
133 .addr = {
134 .buffer = buffer->bo,
135 .offset = buffer->offset + offset,
136 },
137 };
138
139 isl_surf_init(&device->isl_dev, isl_surf,
140 .dim = ISL_SURF_DIM_2D,
141 .format = format,
142 .width = width,
143 .height = height,
144 .depth = 1,
145 .levels = 1,
146 .array_len = 1,
147 .samples = 1,
148 .min_pitch = row_pitch,
149 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
150 ISL_SURF_USAGE_RENDER_TARGET_BIT,
151 .tiling_flags = ISL_TILING_LINEAR_BIT);
152 assert(isl_surf->row_pitch == row_pitch);
153 }
154
155 static void
156 get_blorp_surf_for_anv_image(const struct anv_image *image,
157 VkImageAspectFlags aspect,
158 struct blorp_surf *blorp_surf)
159 {
160 const struct anv_surface *surface =
161 anv_image_get_surface_for_aspect_mask(image, aspect);
162
163 *blorp_surf = (struct blorp_surf) {
164 .surf = &surface->isl,
165 .addr = {
166 .buffer = image->bo,
167 .offset = image->offset + surface->offset,
168 },
169 };
170 }
171
172 void anv_CmdCopyImage(
173 VkCommandBuffer commandBuffer,
174 VkImage srcImage,
175 VkImageLayout srcImageLayout,
176 VkImage dstImage,
177 VkImageLayout dstImageLayout,
178 uint32_t regionCount,
179 const VkImageCopy* pRegions)
180 {
181 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
182 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
183 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
184
185 struct blorp_batch batch;
186 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
187
188 for (unsigned r = 0; r < regionCount; r++) {
189 VkOffset3D srcOffset =
190 anv_sanitize_image_offset(src_image->type, pRegions[r].srcOffset);
191 VkOffset3D dstOffset =
192 anv_sanitize_image_offset(dst_image->type, pRegions[r].dstOffset);
193 VkExtent3D extent =
194 anv_sanitize_image_extent(src_image->type, pRegions[r].extent);
195
196 unsigned dst_base_layer, layer_count;
197 if (dst_image->type == VK_IMAGE_TYPE_3D) {
198 dst_base_layer = pRegions[r].dstOffset.z;
199 layer_count = pRegions[r].extent.depth;
200 } else {
201 dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer;
202 layer_count = pRegions[r].dstSubresource.layerCount;
203 }
204
205 unsigned src_base_layer;
206 if (src_image->type == VK_IMAGE_TYPE_3D) {
207 src_base_layer = pRegions[r].srcOffset.z;
208 } else {
209 src_base_layer = pRegions[r].srcSubresource.baseArrayLayer;
210 assert(pRegions[r].srcSubresource.layerCount == layer_count);
211 }
212
213 assert(pRegions[r].srcSubresource.aspectMask ==
214 pRegions[r].dstSubresource.aspectMask);
215
216 uint32_t a;
217 for_each_bit(a, pRegions[r].dstSubresource.aspectMask) {
218 VkImageAspectFlagBits aspect = (1 << a);
219
220 struct blorp_surf src_surf, dst_surf;
221 get_blorp_surf_for_anv_image(src_image, aspect, &src_surf);
222 get_blorp_surf_for_anv_image(dst_image, aspect, &dst_surf);
223
224 for (unsigned i = 0; i < layer_count; i++) {
225 blorp_copy(&batch, &src_surf, pRegions[r].srcSubresource.mipLevel,
226 src_base_layer + i,
227 &dst_surf, pRegions[r].dstSubresource.mipLevel,
228 dst_base_layer + i,
229 srcOffset.x, srcOffset.y,
230 dstOffset.x, dstOffset.y,
231 extent.width, extent.height);
232 }
233 }
234 }
235
236 blorp_batch_finish(&batch);
237 }
238
239 static void
240 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
241 struct anv_buffer *anv_buffer,
242 struct anv_image *anv_image,
243 uint32_t regionCount,
244 const VkBufferImageCopy* pRegions,
245 bool buffer_to_image)
246 {
247 struct blorp_batch batch;
248 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
249
250 struct {
251 struct blorp_surf surf;
252 uint32_t level;
253 VkOffset3D offset;
254 } image, buffer, *src, *dst;
255
256 buffer.level = 0;
257 buffer.offset = (VkOffset3D) { 0, 0, 0 };
258
259 if (buffer_to_image) {
260 src = &buffer;
261 dst = &image;
262 } else {
263 src = &image;
264 dst = &buffer;
265 }
266
267 for (unsigned r = 0; r < regionCount; r++) {
268 const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
269
270 get_blorp_surf_for_anv_image(anv_image, aspect, &image.surf);
271 image.offset =
272 anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset);
273 image.level = pRegions[r].imageSubresource.mipLevel;
274
275 VkExtent3D extent =
276 anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent);
277 if (anv_image->type != VK_IMAGE_TYPE_3D) {
278 image.offset.z = pRegions[r].imageSubresource.baseArrayLayer;
279 extent.depth = pRegions[r].imageSubresource.layerCount;
280 }
281
282 const enum isl_format buffer_format =
283 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format,
284 aspect, VK_IMAGE_TILING_LINEAR);
285
286 const VkExtent3D bufferImageExtent = {
287 .width = pRegions[r].bufferRowLength ?
288 pRegions[r].bufferRowLength : extent.width,
289 .height = pRegions[r].bufferImageHeight ?
290 pRegions[r].bufferImageHeight : extent.height,
291 };
292
293 const struct isl_format_layout *buffer_fmtl =
294 isl_format_get_layout(buffer_format);
295
296 const uint32_t buffer_row_pitch =
297 DIV_ROUND_UP(bufferImageExtent.width, buffer_fmtl->bw) *
298 (buffer_fmtl->bpb / 8);
299
300 const uint32_t buffer_layer_stride =
301 DIV_ROUND_UP(bufferImageExtent.height, buffer_fmtl->bh) *
302 buffer_row_pitch;
303
304 struct isl_surf buffer_isl_surf;
305 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
306 anv_buffer, pRegions[r].bufferOffset,
307 extent.width, extent.height,
308 buffer_row_pitch, buffer_format,
309 &buffer.surf, &buffer_isl_surf);
310
311 for (unsigned z = 0; z < extent.depth; z++) {
312 blorp_copy(&batch, &src->surf, src->level, src->offset.z,
313 &dst->surf, dst->level, dst->offset.z,
314 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
315 extent.width, extent.height);
316
317 image.offset.z++;
318 buffer.surf.addr.offset += buffer_layer_stride;
319 }
320 }
321
322 blorp_batch_finish(&batch);
323 }
324
325 void anv_CmdCopyBufferToImage(
326 VkCommandBuffer commandBuffer,
327 VkBuffer srcBuffer,
328 VkImage dstImage,
329 VkImageLayout dstImageLayout,
330 uint32_t regionCount,
331 const VkBufferImageCopy* pRegions)
332 {
333 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
334 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
335 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
336
337 copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
338 regionCount, pRegions, true);
339 }
340
341 void anv_CmdCopyImageToBuffer(
342 VkCommandBuffer commandBuffer,
343 VkImage srcImage,
344 VkImageLayout srcImageLayout,
345 VkBuffer dstBuffer,
346 uint32_t regionCount,
347 const VkBufferImageCopy* pRegions)
348 {
349 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
350 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
351 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
352
353 copy_buffer_to_image(cmd_buffer, dst_buffer, src_image,
354 regionCount, pRegions, false);
355 }
356
357 static bool
358 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
359 {
360 bool flip = false;
361 if (*src0 > *src1) {
362 unsigned tmp = *src0;
363 *src0 = *src1;
364 *src1 = tmp;
365 flip = !flip;
366 }
367
368 if (*dst0 > *dst1) {
369 unsigned tmp = *dst0;
370 *dst0 = *dst1;
371 *dst1 = tmp;
372 flip = !flip;
373 }
374
375 return flip;
376 }
377
378 void anv_CmdBlitImage(
379 VkCommandBuffer commandBuffer,
380 VkImage srcImage,
381 VkImageLayout srcImageLayout,
382 VkImage dstImage,
383 VkImageLayout dstImageLayout,
384 uint32_t regionCount,
385 const VkImageBlit* pRegions,
386 VkFilter filter)
387
388 {
389 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
390 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
391 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
392
393 struct blorp_surf src, dst;
394
395 uint32_t gl_filter;
396 switch (filter) {
397 case VK_FILTER_NEAREST:
398 gl_filter = 0x2600; /* GL_NEAREST */
399 break;
400 case VK_FILTER_LINEAR:
401 gl_filter = 0x2601; /* GL_LINEAR */
402 break;
403 default:
404 unreachable("Invalid filter");
405 }
406
407 struct blorp_batch batch;
408 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
409
410 for (unsigned r = 0; r < regionCount; r++) {
411 const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
412 const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
413
414 get_blorp_surf_for_anv_image(src_image, src_res->aspectMask, &src);
415 get_blorp_surf_for_anv_image(dst_image, dst_res->aspectMask, &dst);
416
417 struct anv_format src_format =
418 anv_get_format(&cmd_buffer->device->info, src_image->vk_format,
419 src_res->aspectMask, src_image->tiling);
420 struct anv_format dst_format =
421 anv_get_format(&cmd_buffer->device->info, dst_image->vk_format,
422 dst_res->aspectMask, dst_image->tiling);
423
424 unsigned dst_start, dst_end;
425 if (dst_image->type == VK_IMAGE_TYPE_3D) {
426 assert(dst_res->baseArrayLayer == 0);
427 dst_start = pRegions[r].dstOffsets[0].z;
428 dst_end = pRegions[r].dstOffsets[1].z;
429 } else {
430 dst_start = dst_res->baseArrayLayer;
431 dst_end = dst_start + dst_res->layerCount;
432 }
433
434 unsigned src_start, src_end;
435 if (src_image->type == VK_IMAGE_TYPE_3D) {
436 assert(src_res->baseArrayLayer == 0);
437 src_start = pRegions[r].srcOffsets[0].z;
438 src_end = pRegions[r].srcOffsets[1].z;
439 } else {
440 src_start = src_res->baseArrayLayer;
441 src_end = src_start + src_res->layerCount;
442 }
443
444 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
445 float src_z_step = (float)(src_end + 1 - src_start) /
446 (float)(dst_end + 1 - dst_start);
447
448 if (flip_z) {
449 src_start = src_end;
450 src_z_step *= -1;
451 }
452
453 unsigned src_x0 = pRegions[r].srcOffsets[0].x;
454 unsigned src_x1 = pRegions[r].srcOffsets[1].x;
455 unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
456 unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
457 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
458
459 unsigned src_y0 = pRegions[r].srcOffsets[0].y;
460 unsigned src_y1 = pRegions[r].srcOffsets[1].y;
461 unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
462 unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
463 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
464
465 const unsigned num_layers = dst_end - dst_start;
466 for (unsigned i = 0; i < num_layers; i++) {
467 unsigned dst_z = dst_start + i;
468 unsigned src_z = src_start + i * src_z_step;
469
470 blorp_blit(&batch, &src, src_res->mipLevel, src_z,
471 src_format.isl_format, src_format.swizzle,
472 &dst, dst_res->mipLevel, dst_z,
473 dst_format.isl_format, dst_format.swizzle,
474 src_x0, src_y0, src_x1, src_y1,
475 dst_x0, dst_y0, dst_x1, dst_y1,
476 gl_filter, flip_x, flip_y);
477 }
478
479 }
480
481 blorp_batch_finish(&batch);
482 }
483
484 static enum isl_format
485 isl_format_for_size(unsigned size_B)
486 {
487 switch (size_B) {
488 case 1: return ISL_FORMAT_R8_UINT;
489 case 2: return ISL_FORMAT_R8G8_UINT;
490 case 4: return ISL_FORMAT_R8G8B8A8_UINT;
491 case 8: return ISL_FORMAT_R16G16B16A16_UINT;
492 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
493 default:
494 unreachable("Not a power-of-two format size");
495 }
496 }
497
498 static void
499 do_buffer_copy(struct blorp_batch *batch,
500 struct anv_bo *src, uint64_t src_offset,
501 struct anv_bo *dst, uint64_t dst_offset,
502 int width, int height, int block_size)
503 {
504 struct anv_device *device = batch->blorp->driver_ctx;
505
506 /* The actual format we pick doesn't matter as blorp will throw it away.
507 * The only thing that actually matters is the size.
508 */
509 enum isl_format format = isl_format_for_size(block_size);
510
511 struct isl_surf surf;
512 isl_surf_init(&device->isl_dev, &surf,
513 .dim = ISL_SURF_DIM_2D,
514 .format = format,
515 .width = width,
516 .height = height,
517 .depth = 1,
518 .levels = 1,
519 .array_len = 1,
520 .samples = 1,
521 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
522 ISL_SURF_USAGE_RENDER_TARGET_BIT,
523 .tiling_flags = ISL_TILING_LINEAR_BIT);
524 assert(surf.row_pitch == width * block_size);
525
526 struct blorp_surf src_blorp_surf = {
527 .surf = &surf,
528 .addr = {
529 .buffer = src,
530 .offset = src_offset,
531 },
532 };
533
534 struct blorp_surf dst_blorp_surf = {
535 .surf = &surf,
536 .addr = {
537 .buffer = dst,
538 .offset = dst_offset,
539 },
540 };
541
542 blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,
543 0, 0, 0, 0, width, height);
544 }
545
546 /**
547 * Returns the greatest common divisor of a and b that is a power of two.
548 */
549 static inline uint64_t
550 gcd_pow2_u64(uint64_t a, uint64_t b)
551 {
552 assert(a > 0 || b > 0);
553
554 unsigned a_log2 = ffsll(a) - 1;
555 unsigned b_log2 = ffsll(b) - 1;
556
557 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
558 * case, the MIN2() will take the other one. If both are 0 then we will
559 * hit the assert above.
560 */
561 return 1 << MIN2(a_log2, b_log2);
562 }
563
564 /* This is maximum possible width/height our HW can handle */
565 #define MAX_SURFACE_DIM (1ull << 14)
566
567 void anv_CmdCopyBuffer(
568 VkCommandBuffer commandBuffer,
569 VkBuffer srcBuffer,
570 VkBuffer dstBuffer,
571 uint32_t regionCount,
572 const VkBufferCopy* pRegions)
573 {
574 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
575 ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
576 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
577
578 struct blorp_batch batch;
579 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
580
581 for (unsigned r = 0; r < regionCount; r++) {
582 uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
583 uint64_t dst_offset = dst_buffer->offset + pRegions[r].dstOffset;
584 uint64_t copy_size = pRegions[r].size;
585
586 /* First, we compute the biggest format that can be used with the
587 * given offsets and size.
588 */
589 int bs = 16;
590 bs = gcd_pow2_u64(bs, src_offset);
591 bs = gcd_pow2_u64(bs, dst_offset);
592 bs = gcd_pow2_u64(bs, pRegions[r].size);
593
594 /* First, we make a bunch of max-sized copies */
595 uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
596 while (copy_size >= max_copy_size) {
597 do_buffer_copy(&batch, src_buffer->bo, src_offset,
598 dst_buffer->bo, dst_offset,
599 MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);
600 copy_size -= max_copy_size;
601 src_offset += max_copy_size;
602 dst_offset += max_copy_size;
603 }
604
605 /* Now make a max-width copy */
606 uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);
607 assert(height < MAX_SURFACE_DIM);
608 if (height != 0) {
609 uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs;
610 do_buffer_copy(&batch, src_buffer->bo, src_offset,
611 dst_buffer->bo, dst_offset,
612 MAX_SURFACE_DIM, height, bs);
613 copy_size -= rect_copy_size;
614 src_offset += rect_copy_size;
615 dst_offset += rect_copy_size;
616 }
617
618 /* Finally, make a small copy to finish it off */
619 if (copy_size != 0) {
620 do_buffer_copy(&batch, src_buffer->bo, src_offset,
621 dst_buffer->bo, dst_offset,
622 copy_size / bs, 1, bs);
623 }
624 }
625
626 blorp_batch_finish(&batch);
627 }
628
629 void anv_CmdUpdateBuffer(
630 VkCommandBuffer commandBuffer,
631 VkBuffer dstBuffer,
632 VkDeviceSize dstOffset,
633 VkDeviceSize dataSize,
634 const uint32_t* pData)
635 {
636 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
637 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
638
639 struct blorp_batch batch;
640 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
641
642 /* We can't quite grab a full block because the state stream needs a
643 * little data at the top to build its linked list.
644 */
645 const uint32_t max_update_size =
646 cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
647
648 assert(max_update_size < MAX_SURFACE_DIM * 4);
649
650 while (dataSize) {
651 const uint32_t copy_size = MIN2(dataSize, max_update_size);
652
653 struct anv_state tmp_data =
654 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
655
656 memcpy(tmp_data.map, pData, copy_size);
657
658 int bs = 16;
659 bs = gcd_pow2_u64(bs, dstOffset);
660 bs = gcd_pow2_u64(bs, copy_size);
661
662 do_buffer_copy(&batch,
663 &cmd_buffer->device->dynamic_state_block_pool.bo,
664 tmp_data.offset,
665 dst_buffer->bo, dst_buffer->offset + dstOffset,
666 copy_size / bs, 1, bs);
667
668 dataSize -= copy_size;
669 dstOffset += copy_size;
670 pData = (void *)pData + copy_size;
671 }
672
673 blorp_batch_finish(&batch);
674 }
675
676 void anv_CmdFillBuffer(
677 VkCommandBuffer commandBuffer,
678 VkBuffer dstBuffer,
679 VkDeviceSize dstOffset,
680 VkDeviceSize fillSize,
681 uint32_t data)
682 {
683 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
684 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
685 struct blorp_surf surf;
686 struct isl_surf isl_surf;
687
688 struct blorp_batch batch;
689 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
690
691 if (fillSize == VK_WHOLE_SIZE) {
692 fillSize = dst_buffer->size - dstOffset;
693 /* Make sure fillSize is a multiple of 4 */
694 fillSize &= ~3ull;
695 }
696
697 /* First, we compute the biggest format that can be used with the
698 * given offsets and size.
699 */
700 int bs = 16;
701 bs = gcd_pow2_u64(bs, dstOffset);
702 bs = gcd_pow2_u64(bs, fillSize);
703 enum isl_format isl_format = isl_format_for_size(bs);
704
705 union isl_color_value color = {
706 .u32 = { data, data, data, data },
707 };
708
709 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
710 while (fillSize >= max_fill_size) {
711 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
712 dst_buffer, dstOffset,
713 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
714 MAX_SURFACE_DIM * bs, isl_format,
715 &surf, &isl_surf);
716
717 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
718 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
719 color, NULL);
720 fillSize -= max_fill_size;
721 dstOffset += max_fill_size;
722 }
723
724 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
725 assert(height < MAX_SURFACE_DIM);
726 if (height != 0) {
727 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
728 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
729 dst_buffer, dstOffset,
730 MAX_SURFACE_DIM, height,
731 MAX_SURFACE_DIM * bs, isl_format,
732 &surf, &isl_surf);
733
734 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
735 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
736 color, NULL);
737 fillSize -= rect_fill_size;
738 dstOffset += rect_fill_size;
739 }
740
741 if (fillSize != 0) {
742 const uint32_t width = fillSize / bs;
743 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
744 dst_buffer, dstOffset,
745 width, 1,
746 width * bs, isl_format,
747 &surf, &isl_surf);
748
749 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
750 0, 0, 1, 0, 0, width, 1,
751 color, NULL);
752 }
753
754 blorp_batch_finish(&batch);
755 }
756
757 void anv_CmdClearColorImage(
758 VkCommandBuffer commandBuffer,
759 VkImage _image,
760 VkImageLayout imageLayout,
761 const VkClearColorValue* pColor,
762 uint32_t rangeCount,
763 const VkImageSubresourceRange* pRanges)
764 {
765 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
766 ANV_FROM_HANDLE(anv_image, image, _image);
767
768 static const bool color_write_disable[4] = { false, false, false, false };
769
770 struct blorp_batch batch;
771 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
772
773 union isl_color_value clear_color;
774 memcpy(clear_color.u32, pColor->uint32, sizeof(pColor->uint32));
775
776 struct blorp_surf surf;
777 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, &surf);
778
779 for (unsigned r = 0; r < rangeCount; r++) {
780 if (pRanges[r].aspectMask == 0)
781 continue;
782
783 assert(pRanges[r].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
784
785 struct anv_format src_format =
786 anv_get_format(&cmd_buffer->device->info, image->vk_format,
787 VK_IMAGE_ASPECT_COLOR_BIT, image->tiling);
788
789 unsigned base_layer = pRanges[r].baseArrayLayer;
790 unsigned layer_count = pRanges[r].layerCount;
791
792 for (unsigned i = 0; i < pRanges[r].levelCount; i++) {
793 const unsigned level = pRanges[r].baseMipLevel + i;
794 const unsigned level_width = anv_minify(image->extent.width, level);
795 const unsigned level_height = anv_minify(image->extent.height, level);
796
797 if (image->type == VK_IMAGE_TYPE_3D) {
798 base_layer = 0;
799 layer_count = anv_minify(image->extent.depth, level);
800 }
801
802 blorp_clear(&batch, &surf,
803 src_format.isl_format, src_format.swizzle,
804 level, base_layer, layer_count,
805 0, 0, level_width, level_height,
806 clear_color, color_write_disable);
807 }
808 }
809
810 blorp_batch_finish(&batch);
811 }
812
813 void anv_CmdClearDepthStencilImage(
814 VkCommandBuffer commandBuffer,
815 VkImage image_h,
816 VkImageLayout imageLayout,
817 const VkClearDepthStencilValue* pDepthStencil,
818 uint32_t rangeCount,
819 const VkImageSubresourceRange* pRanges)
820 {
821 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
822 ANV_FROM_HANDLE(anv_image, image, image_h);
823
824 struct blorp_batch batch;
825 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
826
827 struct blorp_surf depth, stencil;
828 if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
829 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
830 &depth);
831 } else {
832 memset(&depth, 0, sizeof(depth));
833 }
834
835 if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
836 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
837 &stencil);
838 } else {
839 memset(&stencil, 0, sizeof(stencil));
840 }
841
842 for (unsigned r = 0; r < rangeCount; r++) {
843 if (pRanges[r].aspectMask == 0)
844 continue;
845
846 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
847 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
848
849 unsigned base_layer = pRanges[r].baseArrayLayer;
850 unsigned layer_count = pRanges[r].layerCount;
851
852 for (unsigned i = 0; i < pRanges[r].levelCount; i++) {
853 const unsigned level = pRanges[r].baseMipLevel + i;
854 const unsigned level_width = anv_minify(image->extent.width, level);
855 const unsigned level_height = anv_minify(image->extent.height, level);
856
857 if (image->type == VK_IMAGE_TYPE_3D)
858 layer_count = anv_minify(image->extent.depth, level);
859
860 blorp_clear_depth_stencil(&batch, &depth, &stencil,
861 level, base_layer, layer_count,
862 0, 0, level_width, level_height,
863 clear_depth, pDepthStencil->depth,
864 clear_stencil ? 0xff : 0,
865 pDepthStencil->stencil);
866 }
867 }
868
869 blorp_batch_finish(&batch);
870 }
871
872 static void
873 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
874 struct blorp_batch *batch,
875 const VkClearAttachment *attachment,
876 uint32_t rectCount, const VkClearRect *pRects)
877 {
878 const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
879 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
880 const uint32_t att = attachment->colorAttachment;
881 const struct anv_image_view *iview =
882 fb->attachments[subpass->color_attachments[att]];
883 const struct anv_image *image = iview->image;
884
885 struct blorp_surf surf;
886 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, &surf);
887
888 union isl_color_value clear_color;
889 memcpy(clear_color.u32, attachment->clearValue.color.uint32,
890 sizeof(clear_color.u32));
891
892 static const bool color_write_disable[4] = { false, false, false, false };
893
894 for (uint32_t r = 0; r < rectCount; ++r) {
895 const VkOffset2D offset = pRects[r].rect.offset;
896 const VkExtent2D extent = pRects[r].rect.extent;
897 blorp_clear(batch, &surf, iview->isl.format, iview->isl.swizzle,
898 iview->isl.base_level,
899 iview->isl.base_array_layer + pRects[r].baseArrayLayer,
900 pRects[r].layerCount,
901 offset.x, offset.y,
902 offset.x + extent.width, offset.y + extent.height,
903 clear_color, color_write_disable);
904 }
905 }
906
907 static void
908 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
909 struct blorp_batch *batch,
910 const VkClearAttachment *attachment,
911 uint32_t rectCount, const VkClearRect *pRects)
912 {
913 const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
914 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
915 const struct anv_image_view *iview =
916 fb->attachments[subpass->depth_stencil_attachment];
917 const struct anv_image *image = iview->image;
918
919 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
920 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
921
922 struct blorp_surf depth, stencil;
923 if (clear_depth) {
924 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT,
925 &depth);
926 } else {
927 memset(&depth, 0, sizeof(depth));
928 }
929
930 if (clear_stencil) {
931 get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT,
932 &stencil);
933 } else {
934 memset(&stencil, 0, sizeof(stencil));
935 }
936
937 for (uint32_t r = 0; r < rectCount; ++r) {
938 const VkOffset2D offset = pRects[r].rect.offset;
939 const VkExtent2D extent = pRects[r].rect.extent;
940 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
941 blorp_clear_depth_stencil(batch, &depth, &stencil,
942 iview->isl.base_level,
943 iview->isl.base_array_layer +
944 pRects[r].baseArrayLayer,
945 pRects[r].layerCount,
946 offset.x, offset.y,
947 offset.x + extent.width,
948 offset.y + extent.height,
949 clear_depth, value.depth,
950 clear_stencil ? 0xff : 0, value.stencil);
951 }
952 }
953
954 void anv_CmdClearAttachments(
955 VkCommandBuffer commandBuffer,
956 uint32_t attachmentCount,
957 const VkClearAttachment* pAttachments,
958 uint32_t rectCount,
959 const VkClearRect* pRects)
960 {
961 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
962
963 /* Because this gets called within a render pass, we tell blorp not to
964 * trash our depth and stencil buffers.
965 */
966 struct blorp_batch batch;
967 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
968 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
969
970 for (uint32_t a = 0; a < attachmentCount; ++a) {
971 if (pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
972 clear_color_attachment(cmd_buffer, &batch,
973 &pAttachments[a],
974 rectCount, pRects);
975 } else {
976 clear_depth_stencil_attachment(cmd_buffer, &batch,
977 &pAttachments[a],
978 rectCount, pRects);
979 }
980 }
981
982 blorp_batch_finish(&batch);
983 }
984
985 static bool
986 subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
987 {
988 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
989 uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
990
991 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
992 uint32_t a = cmd_state->subpass->color_attachments[i];
993 if (cmd_state->attachments[a].pending_clear_aspects) {
994 return true;
995 }
996 }
997
998 if (ds != VK_ATTACHMENT_UNUSED &&
999 cmd_state->attachments[ds].pending_clear_aspects) {
1000 return true;
1001 }
1002
1003 return false;
1004 }
1005
1006 void
1007 anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
1008 {
1009 const struct anv_cmd_state *cmd_state = &cmd_buffer->state;
1010
1011 if (!subpass_needs_clear(cmd_buffer))
1012 return;
1013
1014 /* Because this gets called within a render pass, we tell blorp not to
1015 * trash our depth and stencil buffers.
1016 */
1017 struct blorp_batch batch;
1018 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
1019 BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1020
1021 VkClearRect clear_rect = {
1022 .rect = cmd_buffer->state.render_area,
1023 .baseArrayLayer = 0,
1024 .layerCount = cmd_buffer->state.framebuffer->layers,
1025 };
1026
1027 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1028 const uint32_t a = cmd_state->subpass->color_attachments[i];
1029
1030 if (!cmd_state->attachments[a].pending_clear_aspects)
1031 continue;
1032
1033 assert(cmd_state->attachments[a].pending_clear_aspects ==
1034 VK_IMAGE_ASPECT_COLOR_BIT);
1035
1036 VkClearAttachment clear_att = {
1037 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1038 .colorAttachment = i, /* Use attachment index relative to subpass */
1039 .clearValue = cmd_state->attachments[a].clear_value,
1040 };
1041
1042 clear_color_attachment(cmd_buffer, &batch, &clear_att, 1, &clear_rect);
1043
1044 cmd_state->attachments[a].pending_clear_aspects = 0;
1045 }
1046
1047 const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
1048
1049 if (ds != VK_ATTACHMENT_UNUSED &&
1050 cmd_state->attachments[ds].pending_clear_aspects) {
1051
1052 VkClearAttachment clear_att = {
1053 .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
1054 .clearValue = cmd_state->attachments[ds].clear_value,
1055 };
1056
1057 clear_depth_stencil_attachment(cmd_buffer, &batch,
1058 &clear_att, 1, &clear_rect);
1059
1060 cmd_state->attachments[ds].pending_clear_aspects = 0;
1061 }
1062
1063 blorp_batch_finish(&batch);
1064 }
1065
1066 static void
1067 resolve_image(struct blorp_batch *batch,
1068 const struct anv_image *src_image,
1069 uint32_t src_level, uint32_t src_layer,
1070 const struct anv_image *dst_image,
1071 uint32_t dst_level, uint32_t dst_layer,
1072 VkImageAspectFlags aspect_mask,
1073 uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
1074 uint32_t width, uint32_t height)
1075 {
1076 assert(src_image->type == VK_IMAGE_TYPE_2D);
1077 assert(src_image->samples > 1);
1078 assert(dst_image->type == VK_IMAGE_TYPE_2D);
1079 assert(dst_image->samples == 1);
1080
1081 uint32_t a;
1082 for_each_bit(a, aspect_mask) {
1083 VkImageAspectFlagBits aspect = 1 << a;
1084
1085 struct blorp_surf src_surf, dst_surf;
1086 get_blorp_surf_for_anv_image(src_image, aspect, &src_surf);
1087 get_blorp_surf_for_anv_image(dst_image, aspect, &dst_surf);
1088
1089 blorp_blit(batch,
1090 &src_surf, src_level, src_layer,
1091 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1092 &dst_surf, dst_level, dst_layer,
1093 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1094 src_x, src_y, src_x + width, src_y + height,
1095 dst_x, dst_y, dst_x + width, dst_y + height,
1096 0x2600 /* GL_NEAREST */, false, false);
1097 }
1098 }
1099
1100 void anv_CmdResolveImage(
1101 VkCommandBuffer commandBuffer,
1102 VkImage srcImage,
1103 VkImageLayout srcImageLayout,
1104 VkImage dstImage,
1105 VkImageLayout dstImageLayout,
1106 uint32_t regionCount,
1107 const VkImageResolve* pRegions)
1108 {
1109 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1110 ANV_FROM_HANDLE(anv_image, src_image, srcImage);
1111 ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
1112
1113 struct blorp_batch batch;
1114 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1115
1116 for (uint32_t r = 0; r < regionCount; r++) {
1117 assert(pRegions[r].srcSubresource.aspectMask ==
1118 pRegions[r].dstSubresource.aspectMask);
1119 assert(pRegions[r].srcSubresource.layerCount ==
1120 pRegions[r].dstSubresource.layerCount);
1121
1122 const uint32_t layer_count = pRegions[r].dstSubresource.layerCount;
1123
1124 for (uint32_t layer = 0; layer < layer_count; layer++) {
1125 resolve_image(&batch,
1126 src_image, pRegions[r].srcSubresource.mipLevel,
1127 pRegions[r].srcSubresource.baseArrayLayer + layer,
1128 dst_image, pRegions[r].dstSubresource.mipLevel,
1129 pRegions[r].dstSubresource.baseArrayLayer + layer,
1130 pRegions[r].dstSubresource.aspectMask,
1131 pRegions[r].srcOffset.x, pRegions[r].srcOffset.y,
1132 pRegions[r].dstOffset.x, pRegions[r].dstOffset.y,
1133 pRegions[r].extent.width, pRegions[r].extent.height);
1134 }
1135 }
1136
1137 blorp_batch_finish(&batch);
1138 }
1139
1140 void
1141 anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
1142 {
1143 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1144 struct anv_subpass *subpass = cmd_buffer->state.subpass;
1145
1146 /* FINISHME(perf): Skip clears for resolve attachments.
1147 *
1148 * From the Vulkan 1.0 spec:
1149 *
1150 * If the first use of an attachment in a render pass is as a resolve
1151 * attachment, then the loadOp is effectively ignored as the resolve is
1152 * guaranteed to overwrite all pixels in the render area.
1153 */
1154
1155 if (!subpass->has_resolve)
1156 return;
1157
1158 struct blorp_batch batch;
1159 blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
1160
1161 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1162 uint32_t src_att = subpass->color_attachments[i];
1163 uint32_t dst_att = subpass->resolve_attachments[i];
1164
1165 if (dst_att == VK_ATTACHMENT_UNUSED)
1166 continue;
1167
1168 struct anv_image_view *src_iview = fb->attachments[src_att];
1169 struct anv_image_view *dst_iview = fb->attachments[dst_att];
1170
1171 const VkRect2D render_area = cmd_buffer->state.render_area;
1172
1173 assert(src_iview->aspect_mask == dst_iview->aspect_mask);
1174 resolve_image(&batch, src_iview->image,
1175 src_iview->isl.base_level, src_iview->isl.base_array_layer,
1176 dst_iview->image,
1177 dst_iview->isl.base_level, dst_iview->isl.base_array_layer,
1178 src_iview->aspect_mask,
1179 render_area.offset.x, render_area.offset.y,
1180 render_area.offset.x, render_area.offset.y,
1181 render_area.extent.width, render_area.extent.height);
1182 }
1183
1184 blorp_batch_finish(&batch);
1185 }