intel/anv: Use #defines for all __gen_ helpers
[mesa.git] / src / intel / vulkan / anv_meta_blit2d.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_meta.h"
25 #include "nir/nir_builder.h"
26
27 enum blit2d_src_type {
28 /* We can make a "normal" image view of this source and just texture
29 * from it like you would in any other shader.
30 */
31 BLIT2D_SRC_TYPE_NORMAL,
32
33 /* The source is W-tiled and we need to detile manually in the shader.
34 * This will work on any platform but is needed for all W-tiled sources
35 * prior to Broadwell.
36 */
37 BLIT2D_SRC_TYPE_W_DETILE,
38
39 BLIT2D_NUM_SRC_TYPES,
40 };
41
42 enum blit2d_dst_type {
43 /* We can bind this destination as a "normal" render target and render
44 * to it just like you would anywhere else.
45 */
46 BLIT2D_DST_TYPE_NORMAL,
47
48 /* The destination is W-tiled and we need to do the tiling manually in
49 * the shader. This is required for all W-tiled destinations.
50 *
51 * Sky Lake adds a feature for providing explicit stencil values in the
52 * shader but mesa doesn't support that yet so neither do we.
53 */
54 BLIT2D_DST_TYPE_W_TILE,
55
56 /* The destination has a 3-channel RGB format. Since we can't render to
57 * non-power-of-two textures, we have to bind it as a red texture and
58 * select the correct component for the given red pixel in the shader.
59 */
60 BLIT2D_DST_TYPE_RGB,
61
62 BLIT2D_NUM_DST_TYPES,
63 };
64
65 static VkFormat
66 vk_format_for_size(int bs)
67 {
68 /* The choice of UNORM and UINT formats is very intentional here. Most of
69 * the time, we want to use a UINT format to avoid any rounding error in
70 * the blit. For stencil blits, R8_UINT is required by the hardware.
71 * (It's the only format allowed in conjunction with W-tiling.) Also we
72 * intentionally use the 4-channel formats whenever we can. This is so
73 * that, when we do a RGB <-> RGBX copy, the two formats will line up even
74 * though one of them is 3/4 the size of the other. The choice of UNORM
75 * vs. UINT is also very intentional because Haswell doesn't handle 8 or
76 * 16-bit RGB UINT formats at all so we have to use UNORM there.
77 * Fortunately, the only time we should ever use two different formats in
78 * the table below is for RGB -> RGBA blits and so we will never have any
79 * UNORM/UINT mismatch.
80 */
81 switch (bs) {
82 case 1: return VK_FORMAT_R8_UINT;
83 case 2: return VK_FORMAT_R8G8_UINT;
84 case 3: return VK_FORMAT_R8G8B8_UNORM;
85 case 4: return VK_FORMAT_R8G8B8A8_UNORM;
86 case 6: return VK_FORMAT_R16G16B16_UNORM;
87 case 8: return VK_FORMAT_R16G16B16A16_UNORM;
88 case 12: return VK_FORMAT_R32G32B32_UINT;
89 case 16: return VK_FORMAT_R32G32B32A32_UINT;
90 default:
91 unreachable("Invalid format block size");
92 }
93 }
94
95 /* This function returns the format corresponding to a single component of the
96 * RGB format for the given size returned by vk_format_for_size().
97 */
98 static VkFormat
99 vk_single_component_format_for_rgb_size(int bs)
100 {
101 switch (bs) {
102 case 3: return VK_FORMAT_R8_UNORM;
103 case 6: return VK_FORMAT_R16_UNORM;
104 case 12: return VK_FORMAT_R32_UINT;
105 default:
106 unreachable("Invalid format block size");
107 }
108 }
109
110 static void
111 create_iview(struct anv_cmd_buffer *cmd_buffer,
112 struct anv_meta_blit2d_surf *surf,
113 uint64_t offset,
114 VkImageUsageFlags usage,
115 uint32_t width,
116 uint32_t height,
117 VkFormat format,
118 VkImage *img,
119 struct anv_image_view *iview)
120 {
121 const VkImageCreateInfo image_info = {
122 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
123 .imageType = VK_IMAGE_TYPE_2D,
124 /* W-tiled images must be stencil-formatted. */
125 .format = format,
126 .extent = {
127 .width = width,
128 .height = height,
129 .depth = 1,
130 },
131 .mipLevels = 1,
132 .arrayLayers = 1,
133 .samples = 1,
134 .tiling = surf->tiling == ISL_TILING_LINEAR ?
135 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
136 .usage = usage,
137 };
138
139 /* Create the VkImage that is bound to the surface's memory. */
140 anv_image_create(anv_device_to_handle(cmd_buffer->device),
141 &(struct anv_image_create_info) {
142 .vk_info = &image_info,
143 .isl_tiling_flags = 1 << surf->tiling,
144 .stride = surf->pitch,
145 }, &cmd_buffer->pool->alloc, img);
146
147 /* We could use a vk call to bind memory, but that would require
148 * creating a dummy memory object etc. so there's really no point.
149 */
150 anv_image_from_handle(*img)->bo = surf->bo;
151 anv_image_from_handle(*img)->offset = surf->base_offset + offset;
152
153 anv_image_view_init(iview, cmd_buffer->device,
154 &(VkImageViewCreateInfo) {
155 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
156 .image = *img,
157 .viewType = VK_IMAGE_VIEW_TYPE_2D,
158 .format = image_info.format,
159 .subresourceRange = {
160 .aspectMask = anv_image_from_handle(*img)->aspects,
161 .baseMipLevel = 0,
162 .levelCount = 1,
163 .baseArrayLayer = 0,
164 .layerCount = 1
165 },
166 }, cmd_buffer, usage);
167 }
168
169 struct blit2d_src_temps {
170 VkImage image;
171 struct anv_image_view iview;
172
173 struct anv_buffer buffer;
174 struct anv_buffer_view bview;
175
176 VkDescriptorPool desc_pool;
177 VkDescriptorSet set;
178 };
179
180 static void
181 blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer,
182 struct anv_meta_blit2d_surf *src,
183 enum blit2d_src_type src_type,
184 struct anv_meta_blit2d_rect *rect,
185 struct blit2d_src_temps *tmp)
186 {
187 struct anv_device *device = cmd_buffer->device;
188 VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
189
190 if (src_type == BLIT2D_SRC_TYPE_NORMAL) {
191 uint32_t offset = 0;
192 isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
193 src->tiling, src->bs, src->pitch,
194 rect->src_x, rect->src_y,
195 &offset, &rect->src_x, &rect->src_y);
196
197 VkImageUsageFlags usage = VK_IMAGE_USAGE_SAMPLED_BIT;
198
199 /* W-tiled images must be stencil-formatted. Outside of meta,
200 * a stencil image has this usage bit set. Adding it here
201 * ensures the ISL surface is created correctly.
202 */
203 if (src->tiling == ISL_TILING_W)
204 usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
205
206 create_iview(cmd_buffer, src, offset, usage,
207 rect->src_x + rect->width, rect->src_y + rect->height,
208 src->tiling == ISL_TILING_W ?
209 VK_FORMAT_S8_UINT : vk_format_for_size(src->bs),
210 &tmp->image, &tmp->iview);
211
212 anv_CreateDescriptorPool(vk_device,
213 &(const VkDescriptorPoolCreateInfo) {
214 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
215 .pNext = NULL,
216 .flags = 0,
217 .maxSets = 1,
218 .poolSizeCount = 1,
219 .pPoolSizes = (VkDescriptorPoolSize[]) {
220 {
221 .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
222 .descriptorCount = 1
223 },
224 }
225 }, &cmd_buffer->pool->alloc, &tmp->desc_pool);
226
227 anv_AllocateDescriptorSets(vk_device,
228 &(VkDescriptorSetAllocateInfo) {
229 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
230 .descriptorPool = tmp->desc_pool,
231 .descriptorSetCount = 1,
232 .pSetLayouts = &device->meta_state.blit2d.img_ds_layout
233 }, &tmp->set);
234
235 anv_UpdateDescriptorSets(vk_device,
236 1, /* writeCount */
237 (VkWriteDescriptorSet[]) {
238 {
239 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
240 .dstSet = tmp->set,
241 .dstBinding = 0,
242 .dstArrayElement = 0,
243 .descriptorCount = 1,
244 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
245 .pImageInfo = (VkDescriptorImageInfo[]) {
246 {
247 .sampler = NULL,
248 .imageView = anv_image_view_to_handle(&tmp->iview),
249 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
250 },
251 }
252 }
253 }, 0, NULL);
254
255 anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
256 VK_PIPELINE_BIND_POINT_GRAPHICS,
257 device->meta_state.blit2d.img_p_layout, 0, 1,
258 &tmp->set, 0, NULL);
259 } else {
260 assert(src_type == BLIT2D_SRC_TYPE_W_DETILE);
261 assert(src->tiling == ISL_TILING_W);
262 assert(src->bs == 1);
263
264 uint32_t tile_offset = 0;
265 isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
266 ISL_TILING_W, 1, src->pitch,
267 rect->src_x, rect->src_y,
268 &tile_offset,
269 &rect->src_x, &rect->src_y);
270
271 tmp->buffer = (struct anv_buffer) {
272 .device = device,
273 .size = align_u32(rect->src_y + rect->height, 64) * src->pitch,
274 .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
275 .bo = src->bo,
276 .offset = src->base_offset + tile_offset,
277 };
278
279 anv_buffer_view_init(&tmp->bview, device,
280 &(VkBufferViewCreateInfo) {
281 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
282 .buffer = anv_buffer_to_handle(&tmp->buffer),
283 .format = VK_FORMAT_R8_UINT,
284 .offset = 0,
285 .range = VK_WHOLE_SIZE,
286 }, cmd_buffer);
287
288 anv_CreateDescriptorPool(vk_device,
289 &(const VkDescriptorPoolCreateInfo) {
290 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
291 .pNext = NULL,
292 .flags = 0,
293 .maxSets = 1,
294 .poolSizeCount = 1,
295 .pPoolSizes = (VkDescriptorPoolSize[]) {
296 {
297 .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
298 .descriptorCount = 1
299 },
300 }
301 }, &cmd_buffer->pool->alloc, &tmp->desc_pool);
302
303 anv_AllocateDescriptorSets(vk_device,
304 &(VkDescriptorSetAllocateInfo) {
305 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
306 .descriptorPool = tmp->desc_pool,
307 .descriptorSetCount = 1,
308 .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout
309 }, &tmp->set);
310
311 anv_UpdateDescriptorSets(vk_device,
312 1, /* writeCount */
313 (VkWriteDescriptorSet[]) {
314 {
315 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
316 .dstSet = tmp->set,
317 .dstBinding = 0,
318 .dstArrayElement = 0,
319 .descriptorCount = 1,
320 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
321 .pTexelBufferView = (VkBufferView[]) {
322 anv_buffer_view_to_handle(&tmp->bview),
323 },
324 }
325 }, 0, NULL);
326
327 anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
328 VK_PIPELINE_BIND_POINT_GRAPHICS,
329 device->meta_state.blit2d.buf_p_layout, 0, 1,
330 &tmp->set, 0, NULL);
331 }
332 }
333
334 static void
335 blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer,
336 enum blit2d_src_type src_type,
337 struct blit2d_src_temps *tmp)
338 {
339 anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device),
340 tmp->desc_pool, &cmd_buffer->pool->alloc);
341 if (src_type == BLIT2D_SRC_TYPE_NORMAL) {
342 anv_DestroyImage(anv_device_to_handle(cmd_buffer->device),
343 tmp->image, &cmd_buffer->pool->alloc);
344 }
345 }
346
347 struct blit2d_dst_temps {
348 VkImage image;
349 struct anv_image_view iview;
350 VkFramebuffer fb;
351 };
352
353 static void
354 blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer,
355 struct anv_meta_blit2d_surf *dst,
356 uint64_t offset,
357 uint32_t width,
358 uint32_t height,
359 VkFormat format,
360 struct blit2d_dst_temps *tmp)
361 {
362 create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
363 width, height, format, &tmp->image, &tmp->iview);
364
365 anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device),
366 &(VkFramebufferCreateInfo) {
367 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
368 .attachmentCount = 1,
369 .pAttachments = (VkImageView[]) {
370 anv_image_view_to_handle(&tmp->iview),
371 },
372 .width = width,
373 .height = height,
374 .layers = 1
375 }, &cmd_buffer->pool->alloc, &tmp->fb);
376 }
377
378 static void
379 blit2d_unbind_dst(struct anv_cmd_buffer *cmd_buffer,
380 struct blit2d_dst_temps *tmp)
381 {
382 VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
383 anv_DestroyFramebuffer(vk_device, tmp->fb, &cmd_buffer->pool->alloc);
384 anv_DestroyImage(vk_device, tmp->image, &cmd_buffer->pool->alloc);
385 }
386
387 void
388 anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer,
389 struct anv_meta_saved_state *save)
390 {
391 anv_meta_restore(save, cmd_buffer);
392 }
393
394 void
395 anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer,
396 struct anv_meta_saved_state *save)
397 {
398 anv_meta_save(save, cmd_buffer, 0);
399 }
400
401 static void
402 bind_pipeline(struct anv_cmd_buffer *cmd_buffer,
403 enum blit2d_src_type src_type,
404 enum blit2d_dst_type dst_type)
405 {
406 VkPipeline pipeline =
407 cmd_buffer->device->meta_state.blit2d.pipelines[src_type][dst_type];
408
409 if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
410 anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
411 VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
412 }
413 }
414
415 static void
416 anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer,
417 struct anv_meta_blit2d_surf *src,
418 enum blit2d_src_type src_type,
419 struct anv_meta_blit2d_surf *dst,
420 unsigned num_rects,
421 struct anv_meta_blit2d_rect *rects)
422 {
423 struct anv_device *device = cmd_buffer->device;
424
425 for (unsigned r = 0; r < num_rects; ++r) {
426 struct blit2d_src_temps src_temps;
427 blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
428
429 uint32_t offset = 0;
430 isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
431 dst->tiling, dst->bs, dst->pitch,
432 rects[r].dst_x, rects[r].dst_y,
433 &offset,
434 &rects[r].dst_x, &rects[r].dst_y);
435
436 struct blit2d_dst_temps dst_temps;
437 blit2d_bind_dst(cmd_buffer, dst, offset, rects[r].dst_x + rects[r].width,
438 rects[r].dst_y + rects[r].height,
439 vk_format_for_size(dst->bs), &dst_temps);
440
441 struct blit_vb_data {
442 float pos[2];
443 float tex_coord[3];
444 } *vb_data;
445
446 unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
447
448 struct anv_state vb_state =
449 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
450 memset(vb_state.map, 0, sizeof(struct anv_vue_header));
451 vb_data = vb_state.map + sizeof(struct anv_vue_header);
452
453 vb_data[0] = (struct blit_vb_data) {
454 .pos = {
455 rects[r].dst_x + rects[r].width,
456 rects[r].dst_y + rects[r].height,
457 },
458 .tex_coord = {
459 rects[r].src_x + rects[r].width,
460 rects[r].src_y + rects[r].height,
461 src->pitch,
462 },
463 };
464
465 vb_data[1] = (struct blit_vb_data) {
466 .pos = {
467 rects[r].dst_x,
468 rects[r].dst_y + rects[r].height,
469 },
470 .tex_coord = {
471 rects[r].src_x,
472 rects[r].src_y + rects[r].height,
473 src->pitch,
474 },
475 };
476
477 vb_data[2] = (struct blit_vb_data) {
478 .pos = {
479 rects[r].dst_x,
480 rects[r].dst_y,
481 },
482 .tex_coord = {
483 rects[r].src_x,
484 rects[r].src_y,
485 src->pitch,
486 },
487 };
488
489 if (!device->info.has_llc)
490 anv_state_clflush(vb_state);
491
492 struct anv_buffer vertex_buffer = {
493 .device = device,
494 .size = vb_size,
495 .bo = &device->dynamic_state_block_pool.bo,
496 .offset = vb_state.offset,
497 };
498
499 anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
500 (VkBuffer[]) {
501 anv_buffer_to_handle(&vertex_buffer),
502 anv_buffer_to_handle(&vertex_buffer)
503 },
504 (VkDeviceSize[]) {
505 0,
506 sizeof(struct anv_vue_header),
507 });
508
509 ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
510 &(VkRenderPassBeginInfo) {
511 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
512 .renderPass = device->meta_state.blit2d.render_pass,
513 .framebuffer = dst_temps.fb,
514 .renderArea = {
515 .offset = { rects[r].dst_x, rects[r].dst_y, },
516 .extent = { rects[r].width, rects[r].height },
517 },
518 .clearValueCount = 0,
519 .pClearValues = NULL,
520 }, VK_SUBPASS_CONTENTS_INLINE);
521
522 bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_NORMAL);
523
524 ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
525
526 ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
527
528 /* At the point where we emit the draw call, all data from the
529 * descriptor sets, etc. has been used. We are free to delete it.
530 */
531 blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
532 blit2d_unbind_dst(cmd_buffer, &dst_temps);
533 }
534 }
535
536 static void
537 anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer,
538 struct anv_meta_blit2d_surf *src,
539 enum blit2d_src_type src_type,
540 struct anv_meta_blit2d_surf *dst,
541 unsigned num_rects,
542 struct anv_meta_blit2d_rect *rects)
543 {
544 struct anv_device *device = cmd_buffer->device;
545
546 for (unsigned r = 0; r < num_rects; ++r) {
547 struct blit2d_src_temps src_temps;
548 blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
549
550 assert(dst->bs == 1);
551 uint32_t offset;
552 isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
553 ISL_TILING_W, 1, dst->pitch,
554 rects[r].dst_x, rects[r].dst_y,
555 &offset,
556 &rects[r].dst_x, &rects[r].dst_y);
557
558 /* The original coordinates were in terms of an actual W-tiled offset
559 * but we are binding this image as Y-tiled. We need to adjust our
560 * rectangle accordingly.
561 */
562 uint32_t xmin_Y, xmax_Y, ymin_Y, ymax_Y;
563 xmin_Y = (rects[r].dst_x / 8) * 16;
564 xmax_Y = DIV_ROUND_UP(rects[r].dst_x + rects[r].width, 8) * 16;
565 ymin_Y = (rects[r].dst_y / 4) * 2;
566 ymax_Y = DIV_ROUND_UP(rects[r].dst_y + rects[r].height, 4) * 2;
567
568 struct anv_meta_blit2d_surf dst_Y = {
569 .bo = dst->bo,
570 .tiling = ISL_TILING_Y0,
571 .base_offset = dst->base_offset,
572 .bs = 1,
573 .pitch = dst->pitch,
574 };
575
576 struct blit2d_dst_temps dst_temps;
577 blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y,
578 VK_FORMAT_R8_UINT, &dst_temps);
579
580 struct blit_vb_header {
581 struct anv_vue_header vue;
582 int32_t tex_offset[2];
583 uint32_t tex_pitch;
584 uint32_t bounds[4];
585 } *vb_header;
586
587 struct blit_vb_data {
588 float pos[2];
589 } *vb_data;
590
591 unsigned vb_size = sizeof(*vb_header) + 3 * sizeof(*vb_data);
592
593 struct anv_state vb_state =
594 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
595 vb_header = vb_state.map;
596
597 *vb_header = (struct blit_vb_header) {
598 .tex_offset = {
599 rects[r].src_x - rects[r].dst_x,
600 rects[r].src_y - rects[r].dst_y,
601 },
602 .tex_pitch = src->pitch,
603 .bounds = {
604 rects[r].dst_x,
605 rects[r].dst_y,
606 rects[r].dst_x + rects[r].width,
607 rects[r].dst_y + rects[r].height,
608 },
609 };
610
611 vb_data = (void *)(vb_header + 1);
612
613 vb_data[0] = (struct blit_vb_data) {
614 .pos = {
615 xmax_Y,
616 ymax_Y,
617 },
618 };
619
620 vb_data[1] = (struct blit_vb_data) {
621 .pos = {
622 xmin_Y,
623 ymax_Y,
624 },
625 };
626
627 vb_data[2] = (struct blit_vb_data) {
628 .pos = {
629 xmin_Y,
630 ymin_Y,
631 },
632 };
633
634 if (!device->info.has_llc)
635 anv_state_clflush(vb_state);
636
637 struct anv_buffer vertex_buffer = {
638 .device = device,
639 .size = vb_size,
640 .bo = &device->dynamic_state_block_pool.bo,
641 .offset = vb_state.offset,
642 };
643
644 anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
645 (VkBuffer[]) {
646 anv_buffer_to_handle(&vertex_buffer),
647 anv_buffer_to_handle(&vertex_buffer)
648 },
649 (VkDeviceSize[]) {
650 0,
651 (void *)vb_data - vb_state.map,
652 });
653
654 ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
655 &(VkRenderPassBeginInfo) {
656 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
657 .renderPass = device->meta_state.blit2d.render_pass,
658 .framebuffer = dst_temps.fb,
659 .renderArea = {
660 .offset = { xmin_Y, ymin_Y, },
661 .extent = { xmax_Y - xmin_Y, ymax_Y - ymin_Y },
662 },
663 .clearValueCount = 0,
664 .pClearValues = NULL,
665 }, VK_SUBPASS_CONTENTS_INLINE);
666
667 bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_W_TILE);
668
669 ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
670
671 ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
672
673 /* At the point where we emit the draw call, all data from the
674 * descriptor sets, etc. has been used. We are free to delete it.
675 */
676 blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
677 blit2d_unbind_dst(cmd_buffer, &dst_temps);
678 }
679 }
680
681 static void
682 anv_meta_blit2d_rgb_dst(struct anv_cmd_buffer *cmd_buffer,
683 struct anv_meta_blit2d_surf *src,
684 enum blit2d_src_type src_type,
685 struct anv_meta_blit2d_surf *dst,
686 unsigned num_rects,
687 struct anv_meta_blit2d_rect *rects)
688 {
689 struct anv_device *device = cmd_buffer->device;
690
691 for (unsigned r = 0; r < num_rects; ++r) {
692 struct blit2d_src_temps src_temps;
693 blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
694
695 assert(dst->bs % 3 == 0);
696 assert(dst->tiling == ISL_TILING_LINEAR);
697
698 uint32_t offset;
699 isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
700 dst->tiling, 1, dst->pitch,
701 rects[r].dst_x, rects[r].dst_y,
702 &offset,
703 &rects[r].dst_x, &rects[r].dst_y);
704
705 /* A red surface three times as wide as the actual RGB destination */
706 struct anv_meta_blit2d_surf dst_R = {
707 .bo = dst->bo,
708 .tiling = dst->tiling,
709 .base_offset = dst->base_offset,
710 .bs = dst->bs / 3,
711 .pitch = dst->pitch,
712 };
713
714 struct blit2d_dst_temps dst_temps;
715 blit2d_bind_dst(cmd_buffer, &dst_R, offset,
716 (rects[r].dst_x + rects[r].width) * 3,
717 rects[r].dst_y + rects[r].height,
718 vk_single_component_format_for_rgb_size(dst->bs),
719 &dst_temps);
720
721 struct blit_vb_data {
722 float pos[2];
723 float tex_coord[3];
724 } *vb_data;
725
726 unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
727
728 struct anv_state vb_state =
729 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
730 memset(vb_state.map, 0, sizeof(struct anv_vue_header));
731 vb_data = vb_state.map + sizeof(struct anv_vue_header);
732
733 vb_data[0] = (struct blit_vb_data) {
734 .pos = {
735 (rects[r].dst_x + rects[r].width) * 3,
736 rects[r].dst_y + rects[r].height,
737 },
738 .tex_coord = {
739 rects[r].src_x + rects[r].width,
740 rects[r].src_y + rects[r].height,
741 src->pitch,
742 },
743 };
744
745 vb_data[1] = (struct blit_vb_data) {
746 .pos = {
747 rects[r].dst_x * 3,
748 rects[r].dst_y + rects[r].height,
749 },
750 .tex_coord = {
751 rects[r].src_x,
752 rects[r].src_y + rects[r].height,
753 src->pitch,
754 },
755 };
756
757 vb_data[2] = (struct blit_vb_data) {
758 .pos = {
759 rects[r].dst_x * 3,
760 rects[r].dst_y,
761 },
762 .tex_coord = {
763 rects[r].src_x,
764 rects[r].src_y,
765 src->pitch,
766 },
767 };
768
769 if (!device->info.has_llc)
770 anv_state_clflush(vb_state);
771
772 struct anv_buffer vertex_buffer = {
773 .device = device,
774 .size = vb_size,
775 .bo = &device->dynamic_state_block_pool.bo,
776 .offset = vb_state.offset,
777 };
778
779 anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
780 (VkBuffer[]) {
781 anv_buffer_to_handle(&vertex_buffer),
782 anv_buffer_to_handle(&vertex_buffer)
783 },
784 (VkDeviceSize[]) {
785 0,
786 sizeof(struct anv_vue_header),
787 });
788
789 ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
790 &(VkRenderPassBeginInfo) {
791 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
792 .renderPass = device->meta_state.blit2d.render_pass,
793 .framebuffer = dst_temps.fb,
794 .renderArea = {
795 .offset = { rects[r].dst_x, rects[r].dst_y, },
796 .extent = { rects[r].width, rects[r].height },
797 },
798 .clearValueCount = 0,
799 .pClearValues = NULL,
800 }, VK_SUBPASS_CONTENTS_INLINE);
801
802 bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_RGB);
803
804 ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
805
806 ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
807
808 /* At the point where we emit the draw call, all data from the
809 * descriptor sets, etc. has been used. We are free to delete it.
810 */
811 blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
812 blit2d_unbind_dst(cmd_buffer, &dst_temps);
813 }
814 }
815
816 void
817 anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
818 struct anv_meta_blit2d_surf *src,
819 struct anv_meta_blit2d_surf *dst,
820 unsigned num_rects,
821 struct anv_meta_blit2d_rect *rects)
822 {
823 enum blit2d_src_type src_type;
824 if (src->tiling == ISL_TILING_W && cmd_buffer->device->info.gen < 8) {
825 src_type = BLIT2D_SRC_TYPE_W_DETILE;
826 } else {
827 src_type = BLIT2D_SRC_TYPE_NORMAL;
828 }
829
830 if (dst->tiling == ISL_TILING_W) {
831 anv_meta_blit2d_w_tiled_dst(cmd_buffer, src, src_type, dst,
832 num_rects, rects);
833 return;
834 } else if (dst->bs % 3 == 0) {
835 anv_meta_blit2d_rgb_dst(cmd_buffer, src, src_type, dst,
836 num_rects, rects);
837 return;
838 } else {
839 assert(util_is_power_of_two(dst->bs));
840 anv_meta_blit2d_normal_dst(cmd_buffer, src, src_type, dst,
841 num_rects, rects);
842 }
843 }
844
845 static nir_shader *
846 build_nir_vertex_shader(void)
847 {
848 const struct glsl_type *vec4 = glsl_vec4_type();
849 nir_builder b;
850
851 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
852 b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
853
854 nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
855 vec4, "a_pos");
856 pos_in->data.location = VERT_ATTRIB_GENERIC0;
857 nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
858 vec4, "gl_Position");
859 pos_out->data.location = VARYING_SLOT_POS;
860 nir_copy_var(&b, pos_out, pos_in);
861
862 nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
863 vec4, "a_tex_pos");
864 tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
865 nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
866 vec4, "v_tex_pos");
867 tex_pos_out->data.location = VARYING_SLOT_VAR0;
868 tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
869 nir_copy_var(&b, tex_pos_out, tex_pos_in);
870
871 nir_variable *other_in = nir_variable_create(b.shader, nir_var_shader_in,
872 vec4, "a_other");
873 other_in->data.location = VERT_ATTRIB_GENERIC2;
874 nir_variable *other_out = nir_variable_create(b.shader, nir_var_shader_out,
875 vec4, "v_other");
876 other_out->data.location = VARYING_SLOT_VAR1;
877 other_out->data.interpolation = INTERP_MODE_FLAT;
878 nir_copy_var(&b, other_out, other_in);
879
880 return b.shader;
881 }
882
883 typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *,
884 struct anv_device *,
885 nir_ssa_def *, nir_ssa_def *);
886
887 static nir_ssa_def *
888 nir_copy_bits(struct nir_builder *b, nir_ssa_def *dst, unsigned dst_offset,
889 nir_ssa_def *src, unsigned src_offset, unsigned num_bits)
890 {
891 unsigned src_mask = (~1u >> (32 - num_bits)) << src_offset;
892 nir_ssa_def *masked = nir_iand(b, src, nir_imm_int(b, src_mask));
893
894 nir_ssa_def *shifted;
895 if (dst_offset > src_offset) {
896 shifted = nir_ishl(b, masked, nir_imm_int(b, dst_offset - src_offset));
897 } else if (dst_offset < src_offset) {
898 shifted = nir_ushr(b, masked, nir_imm_int(b, src_offset - dst_offset));
899 } else {
900 assert(dst_offset == src_offset);
901 shifted = masked;
902 }
903
904 return nir_ior(b, dst, shifted);
905 }
906
907 static nir_ssa_def *
908 build_nir_w_tiled_fetch(struct nir_builder *b, struct anv_device *device,
909 nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch)
910 {
911 nir_ssa_def *x = nir_channel(b, tex_pos, 0);
912 nir_ssa_def *y = nir_channel(b, tex_pos, 1);
913
914 /* First, compute the block-aligned offset */
915 nir_ssa_def *x_major = nir_ushr(b, x, nir_imm_int(b, 6));
916 nir_ssa_def *y_major = nir_ushr(b, y, nir_imm_int(b, 6));
917 /* W tiles have physical size of 128x32 and logical size of 64x64, hence
918 * the multiplication by 32 (instead of 64). */
919 nir_ssa_def *offset =
920 nir_iadd(b, nir_imul(b, y_major,
921 nir_imul(b, tex_pitch, nir_imm_int(b, 32))),
922 nir_imul(b, x_major, nir_imm_int(b, 4096)));
923
924 /* Compute the bottom 12 bits of the offset */
925 offset = nir_copy_bits(b, offset, 0, x, 0, 1);
926 offset = nir_copy_bits(b, offset, 1, y, 0, 1);
927 offset = nir_copy_bits(b, offset, 2, x, 1, 1);
928 offset = nir_copy_bits(b, offset, 3, y, 1, 1);
929 offset = nir_copy_bits(b, offset, 4, x, 2, 1);
930 offset = nir_copy_bits(b, offset, 5, y, 2, 4);
931 offset = nir_copy_bits(b, offset, 9, x, 3, 3);
932
933 if (device->isl_dev.has_bit6_swizzling) {
934 offset = nir_ixor(b, offset,
935 nir_ushr(b, nir_iand(b, offset, nir_imm_int(b, 0x0200)),
936 nir_imm_int(b, 3)));
937 }
938
939 const struct glsl_type *sampler_type =
940 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
941 nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
942 sampler_type, "s_tex");
943 sampler->data.descriptor_set = 0;
944 sampler->data.binding = 0;
945
946 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
947 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
948 tex->op = nir_texop_txf;
949 tex->src[0].src_type = nir_tex_src_coord;
950 tex->src[0].src = nir_src_for_ssa(offset);
951 tex->dest_type = nir_type_float; /* TODO */
952 tex->is_array = false;
953 tex->coord_components = 1;
954 tex->texture = nir_deref_var_create(tex, sampler);
955 tex->sampler = NULL;
956
957 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
958 nir_builder_instr_insert(b, &tex->instr);
959
960 return &tex->dest.ssa;
961 }
962
963 static nir_ssa_def *
964 build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device,
965 nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch)
966 {
967 const struct glsl_type *sampler_type =
968 glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
969 nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
970 sampler_type, "s_tex");
971 sampler->data.descriptor_set = 0;
972 sampler->data.binding = 0;
973
974 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
975 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
976 tex->op = nir_texop_txf;
977 tex->src[0].src_type = nir_tex_src_coord;
978 tex->src[0].src = nir_src_for_ssa(tex_pos);
979 tex->src[1].src_type = nir_tex_src_lod;
980 tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
981 tex->dest_type = nir_type_float; /* TODO */
982 tex->is_array = false;
983 tex->coord_components = 2;
984 tex->texture = nir_deref_var_create(tex, sampler);
985 tex->sampler = NULL;
986
987 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
988 nir_builder_instr_insert(b, &tex->instr);
989
990 return &tex->dest.ssa;
991 }
992
993 static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
994 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
995 .vertexBindingDescriptionCount = 2,
996 .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
997 {
998 .binding = 0,
999 .stride = 0,
1000 .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
1001 },
1002 {
1003 .binding = 1,
1004 .stride = 5 * sizeof(float),
1005 .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
1006 },
1007 },
1008 .vertexAttributeDescriptionCount = 3,
1009 .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
1010 {
1011 /* VUE Header */
1012 .location = 0,
1013 .binding = 0,
1014 .format = VK_FORMAT_R32G32B32A32_UINT,
1015 .offset = 0
1016 },
1017 {
1018 /* Position */
1019 .location = 1,
1020 .binding = 1,
1021 .format = VK_FORMAT_R32G32_SFLOAT,
1022 .offset = 0
1023 },
1024 {
1025 /* Texture Coordinate */
1026 .location = 2,
1027 .binding = 1,
1028 .format = VK_FORMAT_R32G32B32_SFLOAT,
1029 .offset = 8
1030 },
1031 },
1032 };
1033
1034 static nir_shader *
1035 build_nir_copy_fragment_shader(struct anv_device *device,
1036 texel_fetch_build_func txf_func)
1037 {
1038 const struct glsl_type *vec4 = glsl_vec4_type();
1039 const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
1040 nir_builder b;
1041
1042 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
1043 b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
1044
1045 nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
1046 vec3, "v_tex_pos");
1047 tex_pos_in->data.location = VARYING_SLOT_VAR0;
1048
1049 nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
1050 vec4, "f_color");
1051 color_out->data.location = FRAG_RESULT_DATA0;
1052
1053 nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
1054 unsigned swiz[4] = { 0, 1 };
1055 nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
1056 nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2);
1057
1058 nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
1059 nir_store_var(&b, color_out, color, 0xf);
1060
1061 return b.shader;
1062 }
1063
1064 /* RGB copies have the same interface as normal copies */
1065 #define rgb_vi_create_info normal_vi_create_info
1066
1067 static nir_shader *
1068 build_nir_rgb_fragment_shader(struct anv_device *device,
1069 texel_fetch_build_func txf_func)
1070 {
1071 const struct glsl_type *vec4 = glsl_vec4_type();
1072 const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
1073 nir_builder b;
1074
1075 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
1076 b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
1077
1078 nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
1079 vec3, "v_tex_pos");
1080 tex_pos_in->data.location = VARYING_SLOT_VAR0;
1081
1082 nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
1083 vec4, "f_color");
1084 color_out->data.location = FRAG_RESULT_DATA0;
1085
1086 /* We need gl_FragCoord so we know our position */
1087 nir_variable *frag_coord_in = nir_variable_create(b.shader,
1088 nir_var_shader_in,
1089 vec4, "gl_FragCoord");
1090 frag_coord_in->data.location = VARYING_SLOT_POS;
1091 frag_coord_in->data.origin_upper_left = true;
1092
1093 nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
1094 unsigned swiz[4] = { 0, 1 };
1095 nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
1096 nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2);
1097
1098 nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
1099
1100 /* We figure out which component we are by the x component of FragCoord */
1101 nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in));
1102 nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, frag_coord_int, 0),
1103 nir_imm_int(&b, 3));
1104
1105 /* Select the given channel from the texelFetch result */
1106 nir_ssa_def *color_channel =
1107 nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)),
1108 nir_channel(&b, color, 0),
1109 nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 1)),
1110 nir_channel(&b, color, 1),
1111 nir_channel(&b, color, 2)));
1112
1113 nir_ssa_def *u = nir_ssa_undef(&b, 1, 32);
1114 nir_store_var(&b, color_out, nir_vec4(&b, color_channel, u, u, u), 0x1);
1115
1116 return b.shader;
1117 }
1118
1119 static const VkPipelineVertexInputStateCreateInfo w_tiled_vi_create_info = {
1120 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
1121 .vertexBindingDescriptionCount = 2,
1122 .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
1123 {
1124 .binding = 0,
1125 .stride = 0,
1126 .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
1127 },
1128 {
1129 .binding = 1,
1130 .stride = 2 * sizeof(float),
1131 .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
1132 },
1133 },
1134 .vertexAttributeDescriptionCount = 4,
1135 .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
1136 {
1137 /* VUE Header */
1138 .location = 0,
1139 .binding = 0,
1140 .format = VK_FORMAT_R32G32B32A32_UINT,
1141 .offset = 0
1142 },
1143 {
1144 /* Position */
1145 .location = 1,
1146 .binding = 1,
1147 .format = VK_FORMAT_R32G32_SFLOAT,
1148 .offset = 0
1149 },
1150 {
1151 /* Texture Offset */
1152 .location = 2,
1153 .binding = 0,
1154 .format = VK_FORMAT_R32G32B32_UINT,
1155 .offset = 16
1156 },
1157 {
1158 /* Destination bounds */
1159 .location = 3,
1160 .binding = 0,
1161 .format = VK_FORMAT_R32G32B32A32_UINT,
1162 .offset = 28
1163 },
1164 },
1165 };
1166
1167 static nir_shader *
1168 build_nir_w_tiled_fragment_shader(struct anv_device *device,
1169 texel_fetch_build_func txf_func)
1170 {
1171 const struct glsl_type *vec4 = glsl_vec4_type();
1172 const struct glsl_type *ivec3 = glsl_vector_type(GLSL_TYPE_INT, 3);
1173 const struct glsl_type *uvec4 = glsl_vector_type(GLSL_TYPE_UINT, 4);
1174 nir_builder b;
1175
1176 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
1177 b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
1178
1179 /* We need gl_FragCoord so we know our Y-tiled position */
1180 nir_variable *frag_coord_in = nir_variable_create(b.shader,
1181 nir_var_shader_in,
1182 vec4, "gl_FragCoord");
1183 frag_coord_in->data.location = VARYING_SLOT_POS;
1184 frag_coord_in->data.origin_upper_left = true;
1185
1186 /* In location 0 we have an ivec3 that has the offset from dest to
1187 * source in the first two components and the stride in the third.
1188 */
1189 nir_variable *tex_off_in = nir_variable_create(b.shader, nir_var_shader_in,
1190 ivec3, "v_tex_off");
1191 tex_off_in->data.location = VARYING_SLOT_VAR0;
1192 tex_off_in->data.interpolation = INTERP_MODE_FLAT;
1193
1194 /* In location 1 we have a uvec4 that gives us the bounds of the
1195 * destination. We need to discard if we get outside this boundary.
1196 */
1197 nir_variable *bounds_in = nir_variable_create(b.shader, nir_var_shader_in,
1198 uvec4, "v_bounds");
1199 bounds_in->data.location = VARYING_SLOT_VAR1;
1200 bounds_in->data.interpolation = INTERP_MODE_FLAT;
1201
1202 nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
1203 vec4, "f_color");
1204 color_out->data.location = FRAG_RESULT_DATA0;
1205
1206 nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in));
1207 nir_ssa_def *x_Y = nir_channel(&b, frag_coord_int, 0);
1208 nir_ssa_def *y_Y = nir_channel(&b, frag_coord_int, 1);
1209
1210 /* Compute the W-tiled position from the Y-tiled position */
1211 nir_ssa_def *x_W = nir_iand(&b, x_Y, nir_imm_int(&b, 0xffffff80));
1212 x_W = nir_ushr(&b, x_W, nir_imm_int(&b, 1));
1213 x_W = nir_copy_bits(&b, x_W, 0, x_Y, 0, 1);
1214 x_W = nir_copy_bits(&b, x_W, 1, x_Y, 2, 1);
1215 x_W = nir_copy_bits(&b, x_W, 2, y_Y, 0, 1);
1216 x_W = nir_copy_bits(&b, x_W, 3, x_Y, 4, 3);
1217
1218 nir_ssa_def *y_W = nir_iand(&b, y_Y, nir_imm_int(&b, 0xffffffe0));
1219 y_W = nir_ishl(&b, y_W, nir_imm_int(&b, 1));
1220 y_W = nir_copy_bits(&b, y_W, 0, x_Y, 1, 1);
1221 y_W = nir_copy_bits(&b, y_W, 1, x_Y, 3, 1);
1222 y_W = nir_copy_bits(&b, y_W, 2, y_Y, 1, 4);
1223
1224 /* Figure out if we are out-of-bounds and discard */
1225 nir_ssa_def *bounds = nir_load_var(&b, bounds_in);
1226 nir_ssa_def *oob =
1227 nir_ior(&b, nir_ult(&b, x_W, nir_channel(&b, bounds, 0)),
1228 nir_ior(&b, nir_ult(&b, y_W, nir_channel(&b, bounds, 1)),
1229 nir_ior(&b, nir_uge(&b, x_W, nir_channel(&b, bounds, 2)),
1230 nir_uge(&b, y_W, nir_channel(&b, bounds, 3)))));
1231
1232 nir_intrinsic_instr *discard =
1233 nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
1234 discard->src[0] = nir_src_for_ssa(oob);
1235 nir_builder_instr_insert(&b, &discard->instr);
1236
1237 nir_ssa_def *tex_off = nir_channels(&b, nir_load_var(&b, tex_off_in), 0x3);
1238 nir_ssa_def *tex_pos = nir_iadd(&b, nir_vec2(&b, x_W, y_W), tex_off);
1239 nir_ssa_def *tex_pitch = nir_channel(&b, nir_load_var(&b, tex_off_in), 2);
1240
1241 nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
1242 nir_store_var(&b, color_out, color, 0xf);
1243
1244 return b.shader;
1245 }
1246
1247 void
1248 anv_device_finish_meta_blit2d_state(struct anv_device *device)
1249 {
1250 if (device->meta_state.blit2d.render_pass) {
1251 anv_DestroyRenderPass(anv_device_to_handle(device),
1252 device->meta_state.blit2d.render_pass,
1253 &device->meta_state.alloc);
1254 }
1255
1256 if (device->meta_state.blit2d.img_p_layout) {
1257 anv_DestroyPipelineLayout(anv_device_to_handle(device),
1258 device->meta_state.blit2d.img_p_layout,
1259 &device->meta_state.alloc);
1260 }
1261
1262 if (device->meta_state.blit2d.img_ds_layout) {
1263 anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
1264 device->meta_state.blit2d.img_ds_layout,
1265 &device->meta_state.alloc);
1266 }
1267
1268 if (device->meta_state.blit2d.buf_p_layout) {
1269 anv_DestroyPipelineLayout(anv_device_to_handle(device),
1270 device->meta_state.blit2d.buf_p_layout,
1271 &device->meta_state.alloc);
1272 }
1273
1274 if (device->meta_state.blit2d.buf_ds_layout) {
1275 anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
1276 device->meta_state.blit2d.buf_ds_layout,
1277 &device->meta_state.alloc);
1278 }
1279
1280 for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
1281 for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) {
1282 if (device->meta_state.blit2d.pipelines[src][dst]) {
1283 anv_DestroyPipeline(anv_device_to_handle(device),
1284 device->meta_state.blit2d.pipelines[src][dst],
1285 &device->meta_state.alloc);
1286 }
1287 }
1288 }
1289 }
1290
1291 static VkResult
1292 blit2d_init_pipeline(struct anv_device *device,
1293 enum blit2d_src_type src_type,
1294 enum blit2d_dst_type dst_type)
1295 {
1296 VkResult result;
1297
1298 texel_fetch_build_func src_func;
1299 switch (src_type) {
1300 case BLIT2D_SRC_TYPE_NORMAL:
1301 src_func = build_nir_texel_fetch;
1302 break;
1303 case BLIT2D_SRC_TYPE_W_DETILE:
1304 src_func = build_nir_w_tiled_fetch;
1305 break;
1306 default:
1307 unreachable("Invalid blit2d source type");
1308 }
1309
1310 const VkPipelineVertexInputStateCreateInfo *vi_create_info;
1311 struct anv_shader_module fs = { .nir = NULL };
1312 switch (dst_type) {
1313 case BLIT2D_DST_TYPE_NORMAL:
1314 fs.nir = build_nir_copy_fragment_shader(device, src_func);
1315 vi_create_info = &normal_vi_create_info;
1316 break;
1317 case BLIT2D_DST_TYPE_W_TILE:
1318 fs.nir = build_nir_w_tiled_fragment_shader(device, src_func);
1319 vi_create_info = &w_tiled_vi_create_info;
1320 break;
1321 case BLIT2D_DST_TYPE_RGB:
1322 /* RGB destinations and W-detiling don't mix */
1323 if (src_type != BLIT2D_SRC_TYPE_NORMAL)
1324 return VK_SUCCESS;
1325
1326 fs.nir = build_nir_rgb_fragment_shader(device, src_func);
1327 vi_create_info = &rgb_vi_create_info;
1328 break;
1329 default:
1330 return VK_SUCCESS;
1331 }
1332
1333 /* We don't use a vertex shader for blitting, but instead build and pass
1334 * the VUEs directly to the rasterization backend. However, we do need
1335 * to provide GLSL source for the vertex shader so that the compiler
1336 * does not dead-code our inputs.
1337 */
1338 struct anv_shader_module vs = {
1339 .nir = build_nir_vertex_shader(),
1340 };
1341
1342 VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
1343 {
1344 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1345 .stage = VK_SHADER_STAGE_VERTEX_BIT,
1346 .module = anv_shader_module_to_handle(&vs),
1347 .pName = "main",
1348 .pSpecializationInfo = NULL
1349 }, {
1350 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1351 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
1352 .module = anv_shader_module_to_handle(&fs),
1353 .pName = "main",
1354 .pSpecializationInfo = NULL
1355 },
1356 };
1357
1358 const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
1359 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
1360 .stageCount = ARRAY_SIZE(pipeline_shader_stages),
1361 .pStages = pipeline_shader_stages,
1362 .pVertexInputState = vi_create_info,
1363 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
1364 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
1365 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
1366 .primitiveRestartEnable = false,
1367 },
1368 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
1369 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
1370 .viewportCount = 1,
1371 .scissorCount = 1,
1372 },
1373 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
1374 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
1375 .rasterizerDiscardEnable = false,
1376 .polygonMode = VK_POLYGON_MODE_FILL,
1377 .cullMode = VK_CULL_MODE_NONE,
1378 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
1379 },
1380 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
1381 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
1382 .rasterizationSamples = 1,
1383 .sampleShadingEnable = false,
1384 .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
1385 },
1386 .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
1387 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
1388 .attachmentCount = 1,
1389 .pAttachments = (VkPipelineColorBlendAttachmentState []) {
1390 { .colorWriteMask =
1391 VK_COLOR_COMPONENT_A_BIT |
1392 VK_COLOR_COMPONENT_R_BIT |
1393 VK_COLOR_COMPONENT_G_BIT |
1394 VK_COLOR_COMPONENT_B_BIT },
1395 }
1396 },
1397 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
1398 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
1399 .dynamicStateCount = 9,
1400 .pDynamicStates = (VkDynamicState[]) {
1401 VK_DYNAMIC_STATE_VIEWPORT,
1402 VK_DYNAMIC_STATE_SCISSOR,
1403 VK_DYNAMIC_STATE_LINE_WIDTH,
1404 VK_DYNAMIC_STATE_DEPTH_BIAS,
1405 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
1406 VK_DYNAMIC_STATE_DEPTH_BOUNDS,
1407 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
1408 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
1409 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
1410 },
1411 },
1412 .flags = 0,
1413 .layout = device->meta_state.blit2d.img_p_layout,
1414 .renderPass = device->meta_state.blit2d.render_pass,
1415 .subpass = 0,
1416 };
1417
1418 const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
1419 .color_attachment_count = -1,
1420 .use_repclear = false,
1421 .disable_vs = true,
1422 .use_rectlist = true
1423 };
1424
1425 result = anv_graphics_pipeline_create(anv_device_to_handle(device),
1426 VK_NULL_HANDLE,
1427 &vk_pipeline_info, &anv_pipeline_info,
1428 &device->meta_state.alloc,
1429 &device->meta_state.blit2d.pipelines[src_type][dst_type]);
1430
1431 ralloc_free(vs.nir);
1432 ralloc_free(fs.nir);
1433
1434 return result;
1435 }
1436
1437 VkResult
1438 anv_device_init_meta_blit2d_state(struct anv_device *device)
1439 {
1440 VkResult result;
1441
1442 zero(device->meta_state.blit2d);
1443
1444 result = anv_CreateRenderPass(anv_device_to_handle(device),
1445 &(VkRenderPassCreateInfo) {
1446 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1447 .attachmentCount = 1,
1448 .pAttachments = &(VkAttachmentDescription) {
1449 .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
1450 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1451 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1452 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
1453 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
1454 },
1455 .subpassCount = 1,
1456 .pSubpasses = &(VkSubpassDescription) {
1457 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1458 .inputAttachmentCount = 0,
1459 .colorAttachmentCount = 1,
1460 .pColorAttachments = &(VkAttachmentReference) {
1461 .attachment = 0,
1462 .layout = VK_IMAGE_LAYOUT_GENERAL,
1463 },
1464 .pResolveAttachments = NULL,
1465 .pDepthStencilAttachment = &(VkAttachmentReference) {
1466 .attachment = VK_ATTACHMENT_UNUSED,
1467 .layout = VK_IMAGE_LAYOUT_GENERAL,
1468 },
1469 .preserveAttachmentCount = 1,
1470 .pPreserveAttachments = (uint32_t[]) { 0 },
1471 },
1472 .dependencyCount = 0,
1473 }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass);
1474 if (result != VK_SUCCESS)
1475 goto fail;
1476
1477 result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
1478 &(VkDescriptorSetLayoutCreateInfo) {
1479 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1480 .bindingCount = 1,
1481 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1482 {
1483 .binding = 0,
1484 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1485 .descriptorCount = 1,
1486 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
1487 .pImmutableSamplers = NULL
1488 },
1489 }
1490 }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout);
1491 if (result != VK_SUCCESS)
1492 goto fail;
1493
1494 result = anv_CreatePipelineLayout(anv_device_to_handle(device),
1495 &(VkPipelineLayoutCreateInfo) {
1496 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1497 .setLayoutCount = 1,
1498 .pSetLayouts = &device->meta_state.blit2d.img_ds_layout,
1499 },
1500 &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout);
1501 if (result != VK_SUCCESS)
1502 goto fail;
1503
1504 result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
1505 &(VkDescriptorSetLayoutCreateInfo) {
1506 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1507 .bindingCount = 1,
1508 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1509 {
1510 .binding = 0,
1511 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1512 .descriptorCount = 1,
1513 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
1514 .pImmutableSamplers = NULL
1515 },
1516 }
1517 }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout);
1518 if (result != VK_SUCCESS)
1519 goto fail;
1520
1521 result = anv_CreatePipelineLayout(anv_device_to_handle(device),
1522 &(VkPipelineLayoutCreateInfo) {
1523 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1524 .setLayoutCount = 1,
1525 .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout,
1526 },
1527 &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout);
1528 if (result != VK_SUCCESS)
1529 goto fail;
1530
1531 for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
1532 for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) {
1533 result = blit2d_init_pipeline(device, src, dst);
1534 if (result != VK_SUCCESS)
1535 goto fail;
1536 }
1537 }
1538
1539 return VK_SUCCESS;
1540
1541 fail:
1542 anv_device_finish_meta_blit2d_state(device);
1543 return result;
1544 }