d239074c61bc54414ddcd2c9cad76c5637a0a829
[mesa.git] / src / intel / vulkan / anv_nir_lower_multiview.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_nir.h"
25 #include "nir/nir_builder.h"
26 #include "util/debug.h"
27
28 /**
29 * This file implements the lowering required for VK_KHR_multiview.
30 *
31 * When possible, Primitive Replication is used and the shader is modified to
32 * make gl_Position an array and fill it with values for each view.
33 *
34 * Otherwise we implement multiview using instanced rendering. The number of
35 * instances in each draw call is multiplied by the number of views in the
36 * subpass. Then, in the shader, we divide gl_InstanceId by the number of
37 * views and use gl_InstanceId % view_count to compute the actual ViewIndex.
38 */
39
40 struct lower_multiview_state {
41 nir_builder builder;
42
43 uint32_t view_mask;
44
45 nir_ssa_def *instance_id;
46 nir_ssa_def *view_index;
47 };
48
49 static nir_ssa_def *
50 build_instance_id(struct lower_multiview_state *state)
51 {
52 assert(state->builder.shader->info.stage == MESA_SHADER_VERTEX);
53
54 if (state->instance_id == NULL) {
55 nir_builder *b = &state->builder;
56
57 b->cursor = nir_before_block(nir_start_block(b->impl));
58
59 /* We use instancing for implementing multiview. The actual instance id
60 * is given by dividing instance_id by the number of views in this
61 * subpass.
62 */
63 state->instance_id =
64 nir_idiv(b, nir_load_instance_id(b),
65 nir_imm_int(b, util_bitcount(state->view_mask)));
66 }
67
68 return state->instance_id;
69 }
70
71 static nir_ssa_def *
72 build_view_index(struct lower_multiview_state *state)
73 {
74 if (state->view_index == NULL) {
75 nir_builder *b = &state->builder;
76
77 b->cursor = nir_before_block(nir_start_block(b->impl));
78
79 assert(state->view_mask != 0);
80 if (util_bitcount(state->view_mask) == 1) {
81 /* Set the view index directly. */
82 state->view_index = nir_imm_int(b, ffs(state->view_mask) - 1);
83 } else if (state->builder.shader->info.stage == MESA_SHADER_VERTEX) {
84 /* We only support 16 viewports */
85 assert((state->view_mask & 0xffff0000) == 0);
86
87 /* We use instancing for implementing multiview. The compacted view
88 * id is given by instance_id % view_count. We then have to convert
89 * that to an actual view id.
90 */
91 nir_ssa_def *compacted =
92 nir_umod(b, nir_load_instance_id(b),
93 nir_imm_int(b, util_bitcount(state->view_mask)));
94
95 if (util_is_power_of_two_or_zero(state->view_mask + 1)) {
96 /* If we have a full view mask, then compacted is what we want */
97 state->view_index = compacted;
98 } else {
99 /* Now we define a map from compacted view index to the actual
100 * view index that's based on the view_mask. The map is given by
101 * 16 nibbles, each of which is a value from 0 to 15.
102 */
103 uint64_t remap = 0;
104 uint32_t bit, i = 0;
105 for_each_bit(bit, state->view_mask) {
106 assert(bit < 16);
107 remap |= (uint64_t)bit << (i++ * 4);
108 }
109
110 nir_ssa_def *shift = nir_imul(b, compacted, nir_imm_int(b, 4));
111
112 /* One of these days, when we have int64 everywhere, this will be
113 * easier.
114 */
115 nir_ssa_def *shifted;
116 if (remap <= UINT32_MAX) {
117 shifted = nir_ushr(b, nir_imm_int(b, remap), shift);
118 } else {
119 nir_ssa_def *shifted_low =
120 nir_ushr(b, nir_imm_int(b, remap), shift);
121 nir_ssa_def *shifted_high =
122 nir_ushr(b, nir_imm_int(b, remap >> 32),
123 nir_isub(b, shift, nir_imm_int(b, 32)));
124 shifted = nir_bcsel(b, nir_ilt(b, shift, nir_imm_int(b, 32)),
125 shifted_low, shifted_high);
126 }
127 state->view_index = nir_iand(b, shifted, nir_imm_int(b, 0xf));
128 }
129 } else {
130 const struct glsl_type *type = glsl_int_type();
131 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
132 b->shader->info.stage == MESA_SHADER_GEOMETRY)
133 type = glsl_array_type(type, 1, 0);
134
135 nir_variable *idx_var =
136 nir_variable_create(b->shader, nir_var_shader_in,
137 type, "view index");
138 idx_var->data.location = VARYING_SLOT_VIEW_INDEX;
139 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
140 idx_var->data.interpolation = INTERP_MODE_FLAT;
141
142 nir_deref_instr *deref = nir_build_deref_var(b, idx_var);
143 if (glsl_type_is_array(type))
144 deref = nir_build_deref_array_imm(b, deref, 0);
145
146 state->view_index = nir_load_deref(b, deref);
147 }
148 }
149
150 return state->view_index;
151 }
152
153 /* Primitive Replication allows a shader to write different positions for each
154 * view in the same execution. If only the position depends on the view, then
155 * it is possible to use the feature instead of instancing to implement
156 * multiview.
157 */
158 static bool
159 lower_multiview_with_primitive_replication(nir_shader *shader,
160 struct anv_graphics_pipeline *pipeline)
161 {
162 if (shader->info.stage == MESA_SHADER_FRAGMENT)
163 return false;
164
165 assert(shader->info.stage == MESA_SHADER_VERTEX);
166
167 uint32_t view_mask = pipeline->subpass->view_mask;
168 int view_count = util_bitcount(view_mask);
169 assert(view_count > 1 && view_count <= MAX_VIEWS_FOR_PRIMITIVE_REPLICATION);
170
171 nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
172
173 /* Update position to refer to an array. */
174 nir_variable *pos_var = NULL;
175 nir_foreach_variable(var, &shader->outputs) {
176 if (var->data.location == VARYING_SLOT_POS) {
177 assert(var->type == glsl_vec4_type());
178 var->type = glsl_array_type(glsl_vec4_type(), view_count, 0);
179 var->data.per_view = true;
180 pos_var = var;
181 break;
182 }
183 }
184
185 assert(pos_var);
186
187 nir_cf_list body;
188 nir_cf_list_extract(&body, &entrypoint->body);
189
190 nir_builder b;
191 nir_builder_init(&b, entrypoint);
192 b.cursor = nir_after_cf_list(&entrypoint->body);
193
194 /* Fill Layer ID with zero. Replication will use that as base to apply the
195 * RTAI offsets.
196 */
197 nir_variable *layer_id_out =
198 nir_variable_create(shader, nir_var_shader_out,
199 glsl_int_type(), "layer ID");
200 layer_id_out->data.location = VARYING_SLOT_LAYER;
201 nir_store_var(&b, layer_id_out, nir_imm_zero(&b, 1, 32), 0x1);
202
203 /* Loop Index will go from 0 to view_count. */
204 nir_variable *loop_index_var =
205 nir_local_variable_create(entrypoint, glsl_uint_type(), "loop_index");
206 nir_deref_instr *loop_index_deref = nir_build_deref_var(&b, loop_index_var);
207 nir_store_deref(&b, loop_index_deref, nir_imm_int(&b, 0), 1);
208
209 /* Array of view index values that are active in the loop. Note that the
210 * loop index only matches the view index if there are no gaps in the
211 * view_mask.
212 */
213 nir_variable *view_index_var = nir_local_variable_create(
214 entrypoint, glsl_array_type(glsl_uint_type(), view_count, 0), "view_index");
215 nir_deref_instr *view_index_deref = nir_build_deref_var(&b, view_index_var);
216 {
217 int array_position = 0;
218 uint32_t view_index;
219 for_each_bit(view_index, view_mask) {
220 nir_store_deref(&b, nir_build_deref_array_imm(&b, view_index_deref, array_position),
221 nir_imm_int(&b, view_index), 1);
222 array_position++;
223 }
224 }
225
226 /* Create the equivalent of
227 *
228 * while (true):
229 * if (loop_index >= view_count):
230 * break
231 *
232 * view_index = active_indices[loop_index]
233 * pos_deref = &pos[loop_index]
234 *
235 * # Placeholder for the body to be reinserted.
236 *
237 * loop_index += 1
238 *
239 * Later both `view_index` and `pos_deref` will be used to rewrite the
240 * original shader body.
241 */
242
243 nir_loop* loop = nir_push_loop(&b);
244
245 nir_ssa_def *loop_index = nir_load_deref(&b, loop_index_deref);
246 nir_ssa_def *cmp = nir_ige(&b, loop_index, nir_imm_int(&b, view_count));
247 nir_if *loop_check = nir_push_if(&b, cmp);
248 nir_jump(&b, nir_jump_break);
249 nir_pop_if(&b, loop_check);
250
251 nir_ssa_def *view_index =
252 nir_load_deref(&b, nir_build_deref_array(&b, view_index_deref, loop_index));
253 nir_deref_instr *pos_deref =
254 nir_build_deref_array(&b, nir_build_deref_var(&b, pos_var), loop_index);
255
256 nir_store_deref(&b, loop_index_deref, nir_iadd_imm(&b, loop_index, 1), 1);
257 nir_pop_loop(&b, loop);
258
259 /* Reinsert the body. */
260 b.cursor = nir_after_instr(&pos_deref->instr);
261 nir_cf_reinsert(&body, b.cursor);
262
263 nir_foreach_block(block, entrypoint) {
264 nir_foreach_instr_safe(instr, block) {
265 if (instr->type != nir_instr_type_intrinsic)
266 continue;
267
268 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
269
270 switch (intrin->intrinsic) {
271 case nir_intrinsic_load_view_index: {
272 assert(intrin->dest.is_ssa);
273 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(view_index));
274 break;
275 }
276
277 case nir_intrinsic_store_deref: {
278 nir_variable *var = nir_intrinsic_get_var(intrin, 0);
279 if (var == pos_var) {
280 nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]);
281
282 nir_instr_rewrite_src(instr, &intrin->src[0],
283 nir_src_for_ssa(&pos_deref->dest.ssa));
284
285 /* Remove old deref since it has the wrong type. */
286 nir_deref_instr_remove_if_unused(old_deref);
287 }
288 break;
289 }
290
291 case nir_intrinsic_load_deref:
292 if (nir_intrinsic_get_var(intrin, 0) == pos_var) {
293 unreachable("Should have lowered I/O to temporaries "
294 "so no load_deref on position output is expected.");
295 }
296 break;
297
298 case nir_intrinsic_copy_deref:
299 unreachable("Should have lowered copy_derefs at this point");
300 break;
301
302 default:
303 /* Do nothing. */
304 break;
305 }
306 }
307 }
308
309 nir_metadata_preserve(entrypoint, nir_metadata_none);
310 return true;
311 }
312
313 bool
314 anv_nir_lower_multiview(nir_shader *shader,
315 struct anv_graphics_pipeline *pipeline)
316 {
317 assert(shader->info.stage != MESA_SHADER_COMPUTE);
318 uint32_t view_mask = pipeline->subpass->view_mask;
319
320 /* If multiview isn't enabled, we have nothing to do. */
321 if (view_mask == 0)
322 return false;
323
324 if (pipeline->use_primitive_replication)
325 return lower_multiview_with_primitive_replication(shader, pipeline);
326
327 struct lower_multiview_state state = {
328 .view_mask = view_mask,
329 };
330
331 /* This pass assumes a single entrypoint */
332 nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
333
334 nir_builder_init(&state.builder, entrypoint);
335
336 bool progress = false;
337 nir_foreach_block(block, entrypoint) {
338 nir_foreach_instr_safe(instr, block) {
339 if (instr->type != nir_instr_type_intrinsic)
340 continue;
341
342 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
343
344 if (load->intrinsic != nir_intrinsic_load_instance_id &&
345 load->intrinsic != nir_intrinsic_load_view_index)
346 continue;
347
348 assert(load->dest.is_ssa);
349
350 nir_ssa_def *value;
351 if (load->intrinsic == nir_intrinsic_load_instance_id) {
352 value = build_instance_id(&state);
353 } else {
354 assert(load->intrinsic == nir_intrinsic_load_view_index);
355 value = build_view_index(&state);
356 }
357
358 nir_ssa_def_rewrite_uses(&load->dest.ssa, nir_src_for_ssa(value));
359
360 nir_instr_remove(&load->instr);
361 progress = true;
362 }
363 }
364
365 /* The view index is available in all stages but the instance id is only
366 * available in the VS. If it's not a fragment shader, we need to pass
367 * the view index on to the next stage.
368 */
369 if (shader->info.stage != MESA_SHADER_FRAGMENT) {
370 nir_ssa_def *view_index = build_view_index(&state);
371
372 nir_builder *b = &state.builder;
373
374 assert(view_index->parent_instr->block == nir_start_block(entrypoint));
375 b->cursor = nir_after_instr(view_index->parent_instr);
376
377 /* Unless there is only one possible view index (that would be set
378 * directly), pass it to the next stage. */
379 if (util_bitcount(state.view_mask) != 1) {
380 nir_variable *view_index_out =
381 nir_variable_create(shader, nir_var_shader_out,
382 glsl_int_type(), "view index");
383 view_index_out->data.location = VARYING_SLOT_VIEW_INDEX;
384 nir_store_var(b, view_index_out, view_index, 0x1);
385 }
386
387 nir_variable *layer_id_out =
388 nir_variable_create(shader, nir_var_shader_out,
389 glsl_int_type(), "layer ID");
390 layer_id_out->data.location = VARYING_SLOT_LAYER;
391 nir_store_var(b, layer_id_out, view_index, 0x1);
392
393 progress = true;
394 }
395
396 if (progress) {
397 nir_metadata_preserve(entrypoint, nir_metadata_block_index |
398 nir_metadata_dominance);
399 }
400
401 return progress;
402 }
403
404 static bool
405 shader_writes_to_memory(nir_shader *shader)
406 {
407 /* With multiview, we would need to ensure that memory writes happen either
408 * once or once per view. Since combination of multiview and memory writes
409 * is not expected, we'll just skip this optimization in this case.
410 */
411
412 nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
413
414 nir_foreach_block(block, entrypoint) {
415 nir_foreach_instr(instr, block) {
416 if (instr->type != nir_instr_type_intrinsic)
417 continue;
418 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
419
420 switch (intrin->intrinsic) {
421 case nir_intrinsic_deref_atomic_add:
422 case nir_intrinsic_deref_atomic_imin:
423 case nir_intrinsic_deref_atomic_umin:
424 case nir_intrinsic_deref_atomic_imax:
425 case nir_intrinsic_deref_atomic_umax:
426 case nir_intrinsic_deref_atomic_and:
427 case nir_intrinsic_deref_atomic_or:
428 case nir_intrinsic_deref_atomic_xor:
429 case nir_intrinsic_deref_atomic_exchange:
430 case nir_intrinsic_deref_atomic_comp_swap:
431 case nir_intrinsic_store_ssbo:
432 case nir_intrinsic_ssbo_atomic_add:
433 case nir_intrinsic_ssbo_atomic_imin:
434 case nir_intrinsic_ssbo_atomic_umin:
435 case nir_intrinsic_ssbo_atomic_imax:
436 case nir_intrinsic_ssbo_atomic_umax:
437 case nir_intrinsic_ssbo_atomic_and:
438 case nir_intrinsic_ssbo_atomic_or:
439 case nir_intrinsic_ssbo_atomic_xor:
440 case nir_intrinsic_ssbo_atomic_exchange:
441 case nir_intrinsic_ssbo_atomic_comp_swap:
442 case nir_intrinsic_store_shared:
443 case nir_intrinsic_shared_atomic_add:
444 case nir_intrinsic_shared_atomic_imin:
445 case nir_intrinsic_shared_atomic_umin:
446 case nir_intrinsic_shared_atomic_imax:
447 case nir_intrinsic_shared_atomic_umax:
448 case nir_intrinsic_shared_atomic_and:
449 case nir_intrinsic_shared_atomic_or:
450 case nir_intrinsic_shared_atomic_xor:
451 case nir_intrinsic_shared_atomic_exchange:
452 case nir_intrinsic_shared_atomic_comp_swap:
453 case nir_intrinsic_image_deref_store:
454 case nir_intrinsic_image_deref_atomic_add:
455 case nir_intrinsic_image_deref_atomic_umin:
456 case nir_intrinsic_image_deref_atomic_umax:
457 case nir_intrinsic_image_deref_atomic_imin:
458 case nir_intrinsic_image_deref_atomic_imax:
459 case nir_intrinsic_image_deref_atomic_and:
460 case nir_intrinsic_image_deref_atomic_or:
461 case nir_intrinsic_image_deref_atomic_xor:
462 case nir_intrinsic_image_deref_atomic_exchange:
463 case nir_intrinsic_image_deref_atomic_comp_swap:
464 return true;
465
466 default:
467 /* Keep walking. */
468 break;
469 }
470 }
471 }
472
473 return false;
474 }
475
476 static bool
477 shader_uses_view_index(nir_shader *shader)
478 {
479 nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
480
481 nir_foreach_block(block, entrypoint) {
482 nir_foreach_instr(instr, block) {
483 if (instr->type != nir_instr_type_intrinsic)
484 continue;
485
486 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
487 if (intrin->intrinsic == nir_intrinsic_load_view_index)
488 return true;
489 }
490 }
491
492 return false;
493 }
494
495 static bool
496 shader_only_position_uses_view_index(nir_shader *shader)
497 {
498 nir_shader *shader_no_position = nir_shader_clone(NULL, shader);
499 nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader_no_position);
500
501 /* Remove the store position from a cloned shader. */
502 nir_foreach_block(block, entrypoint) {
503 nir_foreach_instr_safe(instr, block) {
504 if (instr->type != nir_instr_type_intrinsic)
505 continue;
506
507 nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr);
508 if (store->intrinsic != nir_intrinsic_store_deref)
509 continue;
510
511 nir_variable *var = nir_intrinsic_get_var(store, 0);
512 if (var->data.location != VARYING_SLOT_POS)
513 continue;
514
515 nir_instr_remove(&store->instr);
516 }
517 }
518
519 /* Clean up shader so unused load_view_index intrinsics are removed. */
520 bool progress;
521 do {
522 progress = false;
523 progress |= nir_opt_dead_cf(shader_no_position);
524
525 /* Peephole select will drop if-blocks that have then and else empty,
526 * which will remove the usage of an SSA in the condition.
527 */
528 progress |= nir_opt_peephole_select(shader_no_position, 0, false, false);
529
530 progress |= nir_opt_dce(shader_no_position);
531 } while (progress);
532
533 bool uses_view_index = shader_uses_view_index(shader_no_position);
534
535 ralloc_free(shader_no_position);
536 return !uses_view_index;
537 }
538
539 bool
540 anv_check_for_primitive_replication(nir_shader **shaders,
541 struct anv_graphics_pipeline *pipeline)
542 {
543 assert(pipeline->base.device->info.gen >= 12);
544
545 static int primitive_replication_max_views = -1;
546 if (primitive_replication_max_views < 0) {
547 /* TODO: Figure out why we are not getting same benefits for larger than
548 * 2 views. For now use Primitive Replication just for the 2-view case
549 * by default.
550 */
551 const unsigned default_max_views = 2;
552
553 primitive_replication_max_views =
554 MIN2(MAX_VIEWS_FOR_PRIMITIVE_REPLICATION,
555 env_var_as_unsigned("ANV_PRIMITIVE_REPLICATION_MAX_VIEWS",
556 default_max_views));
557 }
558
559 /* TODO: We should be able to support replication at 'geometry' stages
560 * later than Vertex. In that case only the last stage can refer to
561 * gl_ViewIndex.
562 */
563 if (pipeline->active_stages != (VK_SHADER_STAGE_VERTEX_BIT |
564 VK_SHADER_STAGE_FRAGMENT_BIT)) {
565 return false;
566 }
567
568 uint32_t view_mask = pipeline->subpass->view_mask;
569 int view_count = util_bitcount(view_mask);
570 if (view_count == 1 || view_count > primitive_replication_max_views)
571 return false;
572
573 bool vs_writes_position = false;
574 nir_foreach_variable(var, &shaders[MESA_SHADER_VERTEX]->outputs) {
575 if (var->data.location == VARYING_SLOT_POS) {
576 vs_writes_position = true;
577 break;
578 }
579 }
580
581 /* Don't bother handling this edge case with Primitive Replication. */
582 if (!vs_writes_position)
583 return false;
584
585 return !shader_uses_view_index(shaders[MESA_SHADER_FRAGMENT]) &&
586 !shader_writes_to_memory(shaders[MESA_SHADER_VERTEX]) &&
587 shader_only_position_uses_view_index(shaders[MESA_SHADER_VERTEX]);
588 }