anv/meta2d: Add support for blitting from W-tiled sources on gen7
[mesa.git] / src / intel / vulkan / anv_meta_blit2d.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_meta.h"
25 #include "nir/nir_builder.h"
26
27 enum blit2d_src_type {
28 /* We can make a "normal" image view of this source and just texture
29 * from it like you would in any other shader.
30 */
31 BLIT2D_SRC_TYPE_NORMAL,
32
33 /* The source is W-tiled and we need to detile manually in the shader.
34 * This will work on any platform but is needed for all W-tiled sources
35 * prior to Broadwell.
36 */
37 BLIT2D_SRC_TYPE_W_DETILE,
38
39 BLIT2D_NUM_SRC_TYPES,
40 };
41
42 enum blit2d_dst_type {
43 /* We can bind this destination as a "normal" render target and render
44 * to it just like you would anywhere else.
45 */
46 BLIT2D_DST_TYPE_NORMAL,
47
48 /* The destination is W-tiled and we need to do the tiling manually in
49 * the shader. This is required for all W-tiled destinations.
50 *
51 * Sky Lake adds a feature for providing explicit stencil values in the
52 * shader but mesa doesn't support that yet so neither do we.
53 */
54 BLIT2D_DST_TYPE_W_TILE,
55
56 /* The destination has a 3-channel RGB format. Since we can't render to
57 * non-power-of-two textures, we have to bind it as a red texture and
58 * select the correct component for the given red pixel in the shader.
59 */
60 BLIT2D_DST_TYPE_RGB,
61
62 BLIT2D_NUM_DST_TYPES,
63 };
64
65 static VkFormat
66 vk_format_for_size(int bs)
67 {
68 /* The choice of UNORM and UINT formats is very intentional here. Most of
69 * the time, we want to use a UINT format to avoid any rounding error in
70 * the blit. For stencil blits, R8_UINT is required by the hardware.
71 * (It's the only format allowed in conjunction with W-tiling.) Also we
72 * intentionally use the 4-channel formats whenever we can. This is so
73 * that, when we do a RGB <-> RGBX copy, the two formats will line up even
74 * though one of them is 3/4 the size of the other. The choice of UNORM
75 * vs. UINT is also very intentional because Haswell doesn't handle 8 or
76 * 16-bit RGB UINT formats at all so we have to use UNORM there.
77 * Fortunately, the only time we should ever use two different formats in
78 * the table below is for RGB -> RGBA blits and so we will never have any
79 * UNORM/UINT mismatch.
80 */
81 switch (bs) {
82 case 1: return VK_FORMAT_R8_UINT;
83 case 2: return VK_FORMAT_R8G8_UINT;
84 case 3: return VK_FORMAT_R8G8B8_UNORM;
85 case 4: return VK_FORMAT_R8G8B8A8_UNORM;
86 case 6: return VK_FORMAT_R16G16B16_UNORM;
87 case 8: return VK_FORMAT_R16G16B16A16_UNORM;
88 case 12: return VK_FORMAT_R32G32B32_UINT;
89 case 16: return VK_FORMAT_R32G32B32A32_UINT;
90 default:
91 unreachable("Invalid format block size");
92 }
93 }
94
95 static void
96 create_iview(struct anv_cmd_buffer *cmd_buffer,
97 struct anv_meta_blit2d_surf *surf,
98 struct anv_meta_blit2d_rect *rect,
99 VkImageUsageFlags usage,
100 VkImage *img,
101 struct anv_image_view *iview)
102 {
103 struct isl_tile_info tile_info;
104 isl_tiling_get_info(&cmd_buffer->device->isl_dev,
105 surf->tiling, surf->bs, &tile_info);
106 const unsigned tile_width_px = tile_info.width > surf->bs ?
107 tile_info.width / surf->bs : 1;
108 uint32_t *rect_y = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ?
109 &rect->src_y : &rect->dst_y;
110 uint32_t *rect_x = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ?
111 &rect->src_x : &rect->dst_x;
112
113 /* Define the shared state among all created image views */
114 const VkImageCreateInfo image_info = {
115 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
116 .imageType = VK_IMAGE_TYPE_2D,
117 .format = vk_format_for_size(surf->bs),
118 .extent = {
119 .width = rect->width + (*rect_x) % tile_width_px,
120 .height = rect->height + (*rect_y) % tile_info.height,
121 .depth = 1,
122 },
123 .mipLevels = 1,
124 .arrayLayers = 1,
125 .samples = 1,
126 .tiling = surf->tiling == ISL_TILING_LINEAR ?
127 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
128 .usage = usage,
129 };
130
131 /* Create the VkImage that is bound to the surface's memory. */
132 anv_image_create(anv_device_to_handle(cmd_buffer->device),
133 &(struct anv_image_create_info) {
134 .vk_info = &image_info,
135 .isl_tiling_flags = 1 << surf->tiling,
136 .stride = surf->pitch,
137 }, &cmd_buffer->pool->alloc, img);
138
139 /* We could use a vk call to bind memory, but that would require
140 * creating a dummy memory object etc. so there's really no point.
141 */
142 anv_image_from_handle(*img)->bo = surf->bo;
143 anv_image_from_handle(*img)->offset = surf->base_offset;
144
145 /* Create a VkImageView that starts at the tile aligned offset closest
146 * to the provided x/y offset into the surface.
147 */
148 struct isl_surf *isl_surf = &anv_image_from_handle(*img)->color_surface.isl;
149
150 uint32_t img_o = 0;
151 isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
152 isl_surf->tiling, surf->bs,
153 isl_surf->row_pitch,
154 *rect_x * surf->bs, *rect_y,
155 &img_o, rect_x, rect_y);
156
157 anv_image_view_init(iview, cmd_buffer->device,
158 &(VkImageViewCreateInfo) {
159 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
160 .image = *img,
161 .viewType = VK_IMAGE_VIEW_TYPE_2D,
162 .format = image_info.format,
163 .subresourceRange = {
164 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
165 .baseMipLevel = 0,
166 .levelCount = 1,
167 .baseArrayLayer = 0,
168 .layerCount = 1
169 },
170 }, cmd_buffer, img_o, usage);
171 }
172
173 struct blit2d_src_temps {
174 VkImage image;
175 struct anv_image_view iview;
176
177 struct anv_buffer buffer;
178 struct anv_buffer_view bview;
179
180 VkDescriptorPool desc_pool;
181 VkDescriptorSet set;
182 };
183
184 static void
185 blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer,
186 struct anv_meta_blit2d_surf *src,
187 enum blit2d_src_type src_type,
188 struct anv_meta_blit2d_rect *rect,
189 struct blit2d_src_temps *tmp)
190 {
191 struct anv_device *device = cmd_buffer->device;
192 VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
193
194 if (src_type == BLIT2D_SRC_TYPE_NORMAL) {
195 create_iview(cmd_buffer, src, rect, VK_IMAGE_USAGE_SAMPLED_BIT,
196 &tmp->image, &tmp->iview);
197
198 anv_CreateDescriptorPool(vk_device,
199 &(const VkDescriptorPoolCreateInfo) {
200 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
201 .pNext = NULL,
202 .flags = 0,
203 .maxSets = 1,
204 .poolSizeCount = 1,
205 .pPoolSizes = (VkDescriptorPoolSize[]) {
206 {
207 .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
208 .descriptorCount = 1
209 },
210 }
211 }, &cmd_buffer->pool->alloc, &tmp->desc_pool);
212
213 anv_AllocateDescriptorSets(vk_device,
214 &(VkDescriptorSetAllocateInfo) {
215 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
216 .descriptorPool = tmp->desc_pool,
217 .descriptorSetCount = 1,
218 .pSetLayouts = &device->meta_state.blit2d.img_ds_layout
219 }, &tmp->set);
220
221 anv_UpdateDescriptorSets(vk_device,
222 1, /* writeCount */
223 (VkWriteDescriptorSet[]) {
224 {
225 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
226 .dstSet = tmp->set,
227 .dstBinding = 0,
228 .dstArrayElement = 0,
229 .descriptorCount = 1,
230 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
231 .pImageInfo = (VkDescriptorImageInfo[]) {
232 {
233 .sampler = NULL,
234 .imageView = anv_image_view_to_handle(&tmp->iview),
235 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
236 },
237 }
238 }
239 }, 0, NULL);
240
241 anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
242 VK_PIPELINE_BIND_POINT_GRAPHICS,
243 device->meta_state.blit2d.img_p_layout, 0, 1,
244 &tmp->set, 0, NULL);
245 } else {
246 assert(src_type == BLIT2D_SRC_TYPE_W_DETILE);
247 assert(src->tiling == ISL_TILING_W);
248 assert(src->bs == 1);
249
250 uint32_t tile_offset = 0;
251 isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
252 ISL_TILING_W, 1, src->pitch,
253 rect->src_x, rect->src_y,
254 &tile_offset,
255 &rect->src_x, &rect->src_y);
256
257 tmp->buffer = (struct anv_buffer) {
258 .device = device,
259 .size = align_u32(rect->src_y + rect->height, 64) * src->pitch,
260 .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
261 .bo = src->bo,
262 .offset = src->base_offset + tile_offset,
263 };
264
265 anv_buffer_view_init(&tmp->bview, device,
266 &(VkBufferViewCreateInfo) {
267 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
268 .buffer = anv_buffer_to_handle(&tmp->buffer),
269 .format = VK_FORMAT_R8_UINT,
270 .offset = 0,
271 .range = VK_WHOLE_SIZE,
272 }, cmd_buffer);
273
274 anv_CreateDescriptorPool(vk_device,
275 &(const VkDescriptorPoolCreateInfo) {
276 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
277 .pNext = NULL,
278 .flags = 0,
279 .maxSets = 1,
280 .poolSizeCount = 1,
281 .pPoolSizes = (VkDescriptorPoolSize[]) {
282 {
283 .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
284 .descriptorCount = 1
285 },
286 }
287 }, &cmd_buffer->pool->alloc, &tmp->desc_pool);
288
289 anv_AllocateDescriptorSets(vk_device,
290 &(VkDescriptorSetAllocateInfo) {
291 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
292 .descriptorPool = tmp->desc_pool,
293 .descriptorSetCount = 1,
294 .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout
295 }, &tmp->set);
296
297 anv_UpdateDescriptorSets(vk_device,
298 1, /* writeCount */
299 (VkWriteDescriptorSet[]) {
300 {
301 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
302 .dstSet = tmp->set,
303 .dstBinding = 0,
304 .dstArrayElement = 0,
305 .descriptorCount = 1,
306 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
307 .pTexelBufferView = (VkBufferView[]) {
308 anv_buffer_view_to_handle(&tmp->bview),
309 },
310 }
311 }, 0, NULL);
312
313 anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
314 VK_PIPELINE_BIND_POINT_GRAPHICS,
315 device->meta_state.blit2d.buf_p_layout, 0, 1,
316 &tmp->set, 0, NULL);
317 }
318 }
319
320 static void
321 blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer,
322 enum blit2d_src_type src_type,
323 struct blit2d_src_temps *tmp)
324 {
325 anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device),
326 tmp->desc_pool, &cmd_buffer->pool->alloc);
327 if (src_type == BLIT2D_SRC_TYPE_NORMAL) {
328 anv_DestroyImage(anv_device_to_handle(cmd_buffer->device),
329 tmp->image, &cmd_buffer->pool->alloc);
330 }
331 }
332
333 void
334 anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer,
335 struct anv_meta_saved_state *save)
336 {
337 anv_meta_restore(save, cmd_buffer);
338 }
339
340 void
341 anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer,
342 struct anv_meta_saved_state *save)
343 {
344 anv_meta_save(save, cmd_buffer,
345 (1 << VK_DYNAMIC_STATE_VIEWPORT));
346 }
347
348 static void
349 bind_pipeline(struct anv_cmd_buffer *cmd_buffer,
350 enum blit2d_src_type src_type,
351 enum blit2d_dst_type dst_type)
352 {
353 VkPipeline pipeline =
354 cmd_buffer->device->meta_state.blit2d.pipelines[src_type][dst_type];
355
356 if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
357 anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
358 VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
359 }
360 }
361
362 static void
363 anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer,
364 struct anv_meta_blit2d_surf *src,
365 enum blit2d_src_type src_type,
366 struct anv_meta_blit2d_surf *dst,
367 unsigned num_rects,
368 struct anv_meta_blit2d_rect *rects)
369 {
370 struct anv_device *device = cmd_buffer->device;
371 VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
372 VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
373
374 for (unsigned r = 0; r < num_rects; ++r) {
375 struct blit2d_src_temps src_temps;
376 blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
377
378 VkImage dst_img;
379 struct anv_image_view dst_iview;
380 create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview);
381
382 struct blit_vb_data {
383 float pos[2];
384 float tex_coord[3];
385 } *vb_data;
386
387 unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
388
389 struct anv_state vb_state =
390 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
391 memset(vb_state.map, 0, sizeof(struct anv_vue_header));
392 vb_data = vb_state.map + sizeof(struct anv_vue_header);
393
394 vb_data[0] = (struct blit_vb_data) {
395 .pos = {
396 rects[r].dst_x + rects[r].width,
397 rects[r].dst_y + rects[r].height,
398 },
399 .tex_coord = {
400 rects[r].src_x + rects[r].width,
401 rects[r].src_y + rects[r].height,
402 src->pitch,
403 },
404 };
405
406 vb_data[1] = (struct blit_vb_data) {
407 .pos = {
408 rects[r].dst_x,
409 rects[r].dst_y + rects[r].height,
410 },
411 .tex_coord = {
412 rects[r].src_x,
413 rects[r].src_y + rects[r].height,
414 src->pitch,
415 },
416 };
417
418 vb_data[2] = (struct blit_vb_data) {
419 .pos = {
420 rects[r].dst_x,
421 rects[r].dst_y,
422 },
423 .tex_coord = {
424 rects[r].src_x,
425 rects[r].src_y,
426 src->pitch,
427 },
428 };
429
430 anv_state_clflush(vb_state);
431
432 struct anv_buffer vertex_buffer = {
433 .device = device,
434 .size = vb_size,
435 .bo = &device->dynamic_state_block_pool.bo,
436 .offset = vb_state.offset,
437 };
438
439 anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
440 (VkBuffer[]) {
441 anv_buffer_to_handle(&vertex_buffer),
442 anv_buffer_to_handle(&vertex_buffer)
443 },
444 (VkDeviceSize[]) {
445 0,
446 sizeof(struct anv_vue_header),
447 });
448
449 VkFramebuffer fb;
450 anv_CreateFramebuffer(vk_device,
451 &(VkFramebufferCreateInfo) {
452 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
453 .attachmentCount = 1,
454 .pAttachments = (VkImageView[]) {
455 anv_image_view_to_handle(&dst_iview),
456 },
457 .width = dst_iview.extent.width,
458 .height = dst_iview.extent.height,
459 .layers = 1
460 }, &cmd_buffer->pool->alloc, &fb);
461
462 ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
463 &(VkRenderPassBeginInfo) {
464 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
465 .renderPass = device->meta_state.blit2d.render_pass,
466 .framebuffer = fb,
467 .renderArea = {
468 .offset = { rects[r].dst_x, rects[r].dst_y, },
469 .extent = { rects[r].width, rects[r].height },
470 },
471 .clearValueCount = 0,
472 .pClearValues = NULL,
473 }, VK_SUBPASS_CONTENTS_INLINE);
474
475 bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_NORMAL);
476
477 anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
478 &(VkViewport) {
479 .x = 0.0f,
480 .y = 0.0f,
481 .width = dst_iview.extent.width,
482 .height = dst_iview.extent.height,
483 .minDepth = 0.0f,
484 .maxDepth = 1.0f,
485 });
486
487 ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
488
489 ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
490
491 /* At the point where we emit the draw call, all data from the
492 * descriptor sets, etc. has been used. We are free to delete it.
493 */
494 blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
495 anv_DestroyFramebuffer(vk_device, fb, &cmd_buffer->pool->alloc);
496 anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc);
497 }
498 }
499
500 void
501 anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
502 struct anv_meta_blit2d_surf *src,
503 struct anv_meta_blit2d_surf *dst,
504 unsigned num_rects,
505 struct anv_meta_blit2d_rect *rects)
506 {
507 enum blit2d_src_type src_type;
508 if (src->tiling == ISL_TILING_W && cmd_buffer->device->info.gen < 8) {
509 src_type = BLIT2D_SRC_TYPE_W_DETILE;
510 } else {
511 src_type = BLIT2D_SRC_TYPE_NORMAL;
512 }
513
514 if (dst->tiling == ISL_TILING_W) {
515 assert(dst->bs == 1);
516 anv_finishme("Blitting to w-tiled destinations not yet supported");
517 return;
518 } else if (dst->bs % 3 == 0) {
519 anv_finishme("Blitting to RGB destinations not yet supported");
520 return;
521 } else {
522 assert(util_is_power_of_two(dst->bs));
523 anv_meta_blit2d_normal_dst(cmd_buffer, src, src_type, dst,
524 num_rects, rects);
525 }
526 }
527
528 static nir_shader *
529 build_nir_vertex_shader(void)
530 {
531 const struct glsl_type *vec4 = glsl_vec4_type();
532 nir_builder b;
533
534 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
535 b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
536
537 nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
538 vec4, "a_pos");
539 pos_in->data.location = VERT_ATTRIB_GENERIC0;
540 nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
541 vec4, "gl_Position");
542 pos_out->data.location = VARYING_SLOT_POS;
543 nir_copy_var(&b, pos_out, pos_in);
544
545 nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
546 vec4, "a_tex_pos");
547 tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
548 nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
549 vec4, "v_tex_pos");
550 tex_pos_out->data.location = VARYING_SLOT_VAR0;
551 tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
552 nir_copy_var(&b, tex_pos_out, tex_pos_in);
553
554 return b.shader;
555 }
556
557 typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *,
558 struct anv_device *,
559 nir_ssa_def *, nir_ssa_def *);
560
561 static nir_ssa_def *
562 nir_copy_bits(struct nir_builder *b, nir_ssa_def *dst, unsigned dst_offset,
563 nir_ssa_def *src, unsigned src_offset, unsigned num_bits)
564 {
565 unsigned src_mask = (~1u >> (32 - num_bits)) << src_offset;
566 nir_ssa_def *masked = nir_iand(b, src, nir_imm_int(b, src_mask));
567
568 nir_ssa_def *shifted;
569 if (dst_offset > src_offset) {
570 shifted = nir_ishl(b, masked, nir_imm_int(b, dst_offset - src_offset));
571 } else if (dst_offset < src_offset) {
572 shifted = nir_ushr(b, masked, nir_imm_int(b, src_offset - dst_offset));
573 } else {
574 assert(dst_offset == src_offset);
575 shifted = masked;
576 }
577
578 return nir_ior(b, dst, shifted);
579 }
580
581 static nir_ssa_def *
582 build_nir_w_tiled_fetch(struct nir_builder *b, struct anv_device *device,
583 nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch)
584 {
585 nir_ssa_def *x = nir_channel(b, tex_pos, 0);
586 nir_ssa_def *y = nir_channel(b, tex_pos, 1);
587
588 /* First, compute the block-aligned offset */
589 nir_ssa_def *x_major = nir_ushr(b, x, nir_imm_int(b, 6));
590 nir_ssa_def *y_major = nir_ushr(b, y, nir_imm_int(b, 6));
591 nir_ssa_def *offset =
592 nir_iadd(b, nir_imul(b, y_major,
593 nir_imul(b, tex_pitch, nir_imm_int(b, 64))),
594 nir_imul(b, x_major, nir_imm_int(b, 4096)));
595
596 /* Compute the bottom 12 bits of the offset */
597 offset = nir_copy_bits(b, offset, 0, x, 0, 1);
598 offset = nir_copy_bits(b, offset, 1, y, 0, 1);
599 offset = nir_copy_bits(b, offset, 2, x, 1, 1);
600 offset = nir_copy_bits(b, offset, 3, y, 1, 1);
601 offset = nir_copy_bits(b, offset, 4, x, 2, 1);
602 offset = nir_copy_bits(b, offset, 5, y, 2, 4);
603 offset = nir_copy_bits(b, offset, 9, x, 3, 3);
604
605 if (device->isl_dev.has_bit6_swizzling) {
606 offset = nir_ixor(b, offset,
607 nir_ushr(b, nir_iand(b, offset, nir_imm_int(b, 0x0200)),
608 nir_imm_int(b, 3)));
609 }
610
611 const struct glsl_type *sampler_type =
612 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
613 nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
614 sampler_type, "s_tex");
615 sampler->data.descriptor_set = 0;
616 sampler->data.binding = 0;
617
618 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
619 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
620 tex->op = nir_texop_txf;
621 tex->src[0].src_type = nir_tex_src_coord;
622 tex->src[0].src = nir_src_for_ssa(offset);
623 tex->dest_type = nir_type_float; /* TODO */
624 tex->is_array = false;
625 tex->coord_components = 1;
626 tex->texture = nir_deref_var_create(tex, sampler);
627 tex->sampler = NULL;
628
629 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
630 nir_builder_instr_insert(b, &tex->instr);
631
632 return &tex->dest.ssa;
633 }
634
635 static nir_ssa_def *
636 build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device,
637 nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch)
638 {
639 const struct glsl_type *sampler_type =
640 glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
641 nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
642 sampler_type, "s_tex");
643 sampler->data.descriptor_set = 0;
644 sampler->data.binding = 0;
645
646 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
647 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
648 tex->op = nir_texop_txf;
649 tex->src[0].src_type = nir_tex_src_coord;
650 tex->src[0].src = nir_src_for_ssa(tex_pos);
651 tex->src[1].src_type = nir_tex_src_lod;
652 tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
653 tex->dest_type = nir_type_float; /* TODO */
654 tex->is_array = false;
655 tex->coord_components = 2;
656 tex->texture = nir_deref_var_create(tex, sampler);
657 tex->sampler = NULL;
658
659 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
660 nir_builder_instr_insert(b, &tex->instr);
661
662 return &tex->dest.ssa;
663 }
664
665 static nir_shader *
666 build_nir_copy_fragment_shader(struct anv_device *device,
667 texel_fetch_build_func txf_func)
668 {
669 const struct glsl_type *vec4 = glsl_vec4_type();
670 const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
671 nir_builder b;
672
673 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
674 b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
675
676 nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
677 vec3, "v_tex_pos");
678 tex_pos_in->data.location = VARYING_SLOT_VAR0;
679
680 nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
681 vec4, "f_color");
682 color_out->data.location = FRAG_RESULT_DATA0;
683
684 nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
685 unsigned swiz[4] = { 0, 1 };
686 nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
687 nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2);
688
689 nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
690 nir_store_var(&b, color_out, color, 0xf);
691
692 return b.shader;
693 }
694
695 void
696 anv_device_finish_meta_blit2d_state(struct anv_device *device)
697 {
698 if (device->meta_state.blit2d.render_pass) {
699 anv_DestroyRenderPass(anv_device_to_handle(device),
700 device->meta_state.blit2d.render_pass,
701 &device->meta_state.alloc);
702 }
703
704 if (device->meta_state.blit2d.img_p_layout) {
705 anv_DestroyPipelineLayout(anv_device_to_handle(device),
706 device->meta_state.blit2d.img_p_layout,
707 &device->meta_state.alloc);
708 }
709
710 if (device->meta_state.blit2d.img_ds_layout) {
711 anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
712 device->meta_state.blit2d.img_ds_layout,
713 &device->meta_state.alloc);
714 }
715
716 if (device->meta_state.blit2d.buf_p_layout) {
717 anv_DestroyPipelineLayout(anv_device_to_handle(device),
718 device->meta_state.blit2d.buf_p_layout,
719 &device->meta_state.alloc);
720 }
721
722 if (device->meta_state.blit2d.buf_ds_layout) {
723 anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
724 device->meta_state.blit2d.buf_ds_layout,
725 &device->meta_state.alloc);
726 }
727
728 for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
729 for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) {
730 if (device->meta_state.blit2d.pipelines[src][dst]) {
731 anv_DestroyPipeline(anv_device_to_handle(device),
732 device->meta_state.blit2d.pipelines[src][dst],
733 &device->meta_state.alloc);
734 }
735 }
736 }
737 }
738
739 static VkResult
740 blit2d_init_pipeline(struct anv_device *device,
741 enum blit2d_src_type src_type,
742 enum blit2d_dst_type dst_type)
743 {
744 VkResult result;
745
746 texel_fetch_build_func src_func;
747 switch (src_type) {
748 case BLIT2D_SRC_TYPE_NORMAL:
749 src_func = build_nir_texel_fetch;
750 break;
751 case BLIT2D_SRC_TYPE_W_DETILE:
752 src_func = build_nir_w_tiled_fetch;
753 break;
754 default:
755 unreachable("Invalid blit2d source type");
756 }
757
758 struct anv_shader_module fs = { .nir = NULL };
759 switch (dst_type) {
760 case BLIT2D_DST_TYPE_NORMAL:
761 fs.nir = build_nir_copy_fragment_shader(device, src_func);
762 break;
763 case BLIT2D_DST_TYPE_W_TILE:
764 case BLIT2D_DST_TYPE_RGB:
765 /* Not yet supported */
766 default:
767 return VK_SUCCESS;
768 }
769
770 /* We don't use a vertex shader for blitting, but instead build and pass
771 * the VUEs directly to the rasterization backend. However, we do need
772 * to provide GLSL source for the vertex shader so that the compiler
773 * does not dead-code our inputs.
774 */
775 struct anv_shader_module vs = {
776 .nir = build_nir_vertex_shader(),
777 };
778
779 VkPipelineVertexInputStateCreateInfo vi_create_info = {
780 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
781 .vertexBindingDescriptionCount = 2,
782 .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
783 {
784 .binding = 0,
785 .stride = 0,
786 .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
787 },
788 {
789 .binding = 1,
790 .stride = 5 * sizeof(float),
791 .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
792 },
793 },
794 .vertexAttributeDescriptionCount = 3,
795 .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
796 {
797 /* VUE Header */
798 .location = 0,
799 .binding = 0,
800 .format = VK_FORMAT_R32G32B32A32_UINT,
801 .offset = 0
802 },
803 {
804 /* Position */
805 .location = 1,
806 .binding = 1,
807 .format = VK_FORMAT_R32G32_SFLOAT,
808 .offset = 0
809 },
810 {
811 /* Texture Coordinate */
812 .location = 2,
813 .binding = 1,
814 .format = VK_FORMAT_R32G32B32_SFLOAT,
815 .offset = 8
816 }
817 }
818 };
819
820 VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
821 {
822 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
823 .stage = VK_SHADER_STAGE_VERTEX_BIT,
824 .module = anv_shader_module_to_handle(&vs),
825 .pName = "main",
826 .pSpecializationInfo = NULL
827 }, {
828 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
829 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
830 .module = anv_shader_module_to_handle(&fs),
831 .pName = "main",
832 .pSpecializationInfo = NULL
833 },
834 };
835
836 const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
837 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
838 .stageCount = ARRAY_SIZE(pipeline_shader_stages),
839 .pStages = pipeline_shader_stages,
840 .pVertexInputState = &vi_create_info,
841 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
842 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
843 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
844 .primitiveRestartEnable = false,
845 },
846 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
847 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
848 .viewportCount = 1,
849 .scissorCount = 1,
850 },
851 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
852 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
853 .rasterizerDiscardEnable = false,
854 .polygonMode = VK_POLYGON_MODE_FILL,
855 .cullMode = VK_CULL_MODE_NONE,
856 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
857 },
858 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
859 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
860 .rasterizationSamples = 1,
861 .sampleShadingEnable = false,
862 .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
863 },
864 .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
865 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
866 .attachmentCount = 1,
867 .pAttachments = (VkPipelineColorBlendAttachmentState []) {
868 { .colorWriteMask =
869 VK_COLOR_COMPONENT_A_BIT |
870 VK_COLOR_COMPONENT_R_BIT |
871 VK_COLOR_COMPONENT_G_BIT |
872 VK_COLOR_COMPONENT_B_BIT },
873 }
874 },
875 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
876 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
877 .dynamicStateCount = 9,
878 .pDynamicStates = (VkDynamicState[]) {
879 VK_DYNAMIC_STATE_VIEWPORT,
880 VK_DYNAMIC_STATE_SCISSOR,
881 VK_DYNAMIC_STATE_LINE_WIDTH,
882 VK_DYNAMIC_STATE_DEPTH_BIAS,
883 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
884 VK_DYNAMIC_STATE_DEPTH_BOUNDS,
885 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
886 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
887 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
888 },
889 },
890 .flags = 0,
891 .layout = device->meta_state.blit2d.img_p_layout,
892 .renderPass = device->meta_state.blit2d.render_pass,
893 .subpass = 0,
894 };
895
896 const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
897 .color_attachment_count = -1,
898 .use_repclear = false,
899 .disable_viewport = true,
900 .disable_scissor = true,
901 .disable_vs = true,
902 .use_rectlist = true
903 };
904
905 result = anv_graphics_pipeline_create(anv_device_to_handle(device),
906 VK_NULL_HANDLE,
907 &vk_pipeline_info, &anv_pipeline_info,
908 &device->meta_state.alloc,
909 &device->meta_state.blit2d.pipelines[src_type][dst_type]);
910
911 ralloc_free(vs.nir);
912 ralloc_free(fs.nir);
913
914 return result;
915 }
916
917 VkResult
918 anv_device_init_meta_blit2d_state(struct anv_device *device)
919 {
920 VkResult result;
921
922 zero(device->meta_state.blit2d);
923
924 result = anv_CreateRenderPass(anv_device_to_handle(device),
925 &(VkRenderPassCreateInfo) {
926 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
927 .attachmentCount = 1,
928 .pAttachments = &(VkAttachmentDescription) {
929 .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
930 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
931 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
932 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
933 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
934 },
935 .subpassCount = 1,
936 .pSubpasses = &(VkSubpassDescription) {
937 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
938 .inputAttachmentCount = 0,
939 .colorAttachmentCount = 1,
940 .pColorAttachments = &(VkAttachmentReference) {
941 .attachment = 0,
942 .layout = VK_IMAGE_LAYOUT_GENERAL,
943 },
944 .pResolveAttachments = NULL,
945 .pDepthStencilAttachment = &(VkAttachmentReference) {
946 .attachment = VK_ATTACHMENT_UNUSED,
947 .layout = VK_IMAGE_LAYOUT_GENERAL,
948 },
949 .preserveAttachmentCount = 1,
950 .pPreserveAttachments = (uint32_t[]) { 0 },
951 },
952 .dependencyCount = 0,
953 }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass);
954 if (result != VK_SUCCESS)
955 goto fail;
956
957 result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
958 &(VkDescriptorSetLayoutCreateInfo) {
959 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
960 .bindingCount = 1,
961 .pBindings = (VkDescriptorSetLayoutBinding[]) {
962 {
963 .binding = 0,
964 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
965 .descriptorCount = 1,
966 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
967 .pImmutableSamplers = NULL
968 },
969 }
970 }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout);
971 if (result != VK_SUCCESS)
972 goto fail;
973
974 result = anv_CreatePipelineLayout(anv_device_to_handle(device),
975 &(VkPipelineLayoutCreateInfo) {
976 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
977 .setLayoutCount = 1,
978 .pSetLayouts = &device->meta_state.blit2d.img_ds_layout,
979 },
980 &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout);
981 if (result != VK_SUCCESS)
982 goto fail;
983
984 result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
985 &(VkDescriptorSetLayoutCreateInfo) {
986 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
987 .bindingCount = 1,
988 .pBindings = (VkDescriptorSetLayoutBinding[]) {
989 {
990 .binding = 0,
991 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
992 .descriptorCount = 1,
993 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
994 .pImmutableSamplers = NULL
995 },
996 }
997 }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout);
998 if (result != VK_SUCCESS)
999 goto fail;
1000
1001 result = anv_CreatePipelineLayout(anv_device_to_handle(device),
1002 &(VkPipelineLayoutCreateInfo) {
1003 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1004 .setLayoutCount = 1,
1005 .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout,
1006 },
1007 &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout);
1008 if (result != VK_SUCCESS)
1009 goto fail;
1010
1011 for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
1012 for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) {
1013 result = blit2d_init_pipeline(device, src, dst);
1014 if (result != VK_SUCCESS)
1015 goto fail;
1016 }
1017 }
1018
1019 return VK_SUCCESS;
1020
1021 fail:
1022 anv_device_finish_meta_blit2d_state(device);
1023 return result;
1024 }