2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir/nir_builder.h"
26 #include "util/debug.h"
29 * This file implements the lowering required for VK_KHR_multiview.
31 * When possible, Primitive Replication is used and the shader is modified to
32 * make gl_Position an array and fill it with values for each view.
34 * Otherwise we implement multiview using instanced rendering. The number of
35 * instances in each draw call is multiplied by the number of views in the
36 * subpass. Then, in the shader, we divide gl_InstanceId by the number of
37 * views and use gl_InstanceId % view_count to compute the actual ViewIndex.
40 struct lower_multiview_state
{
45 nir_ssa_def
*instance_id
;
46 nir_ssa_def
*view_index
;
50 build_instance_id(struct lower_multiview_state
*state
)
52 assert(state
->builder
.shader
->info
.stage
== MESA_SHADER_VERTEX
);
54 if (state
->instance_id
== NULL
) {
55 nir_builder
*b
= &state
->builder
;
57 b
->cursor
= nir_before_block(nir_start_block(b
->impl
));
59 /* We use instancing for implementing multiview. The actual instance id
60 * is given by dividing instance_id by the number of views in this
64 nir_idiv(b
, nir_load_instance_id(b
),
65 nir_imm_int(b
, util_bitcount(state
->view_mask
)));
68 return state
->instance_id
;
72 build_view_index(struct lower_multiview_state
*state
)
74 if (state
->view_index
== NULL
) {
75 nir_builder
*b
= &state
->builder
;
77 b
->cursor
= nir_before_block(nir_start_block(b
->impl
));
79 assert(state
->view_mask
!= 0);
80 if (util_bitcount(state
->view_mask
) == 1) {
81 /* Set the view index directly. */
82 state
->view_index
= nir_imm_int(b
, ffs(state
->view_mask
) - 1);
83 } else if (state
->builder
.shader
->info
.stage
== MESA_SHADER_VERTEX
) {
84 /* We only support 16 viewports */
85 assert((state
->view_mask
& 0xffff0000) == 0);
87 /* We use instancing for implementing multiview. The compacted view
88 * id is given by instance_id % view_count. We then have to convert
89 * that to an actual view id.
91 nir_ssa_def
*compacted
=
92 nir_umod(b
, nir_load_instance_id(b
),
93 nir_imm_int(b
, util_bitcount(state
->view_mask
)));
95 if (util_is_power_of_two_or_zero(state
->view_mask
+ 1)) {
96 /* If we have a full view mask, then compacted is what we want */
97 state
->view_index
= compacted
;
99 /* Now we define a map from compacted view index to the actual
100 * view index that's based on the view_mask. The map is given by
101 * 16 nibbles, each of which is a value from 0 to 15.
105 for_each_bit(bit
, state
->view_mask
) {
107 remap
|= (uint64_t)bit
<< (i
++ * 4);
110 nir_ssa_def
*shift
= nir_imul(b
, compacted
, nir_imm_int(b
, 4));
112 /* One of these days, when we have int64 everywhere, this will be
115 nir_ssa_def
*shifted
;
116 if (remap
<= UINT32_MAX
) {
117 shifted
= nir_ushr(b
, nir_imm_int(b
, remap
), shift
);
119 nir_ssa_def
*shifted_low
=
120 nir_ushr(b
, nir_imm_int(b
, remap
), shift
);
121 nir_ssa_def
*shifted_high
=
122 nir_ushr(b
, nir_imm_int(b
, remap
>> 32),
123 nir_isub(b
, shift
, nir_imm_int(b
, 32)));
124 shifted
= nir_bcsel(b
, nir_ilt(b
, shift
, nir_imm_int(b
, 32)),
125 shifted_low
, shifted_high
);
127 state
->view_index
= nir_iand(b
, shifted
, nir_imm_int(b
, 0xf));
130 const struct glsl_type
*type
= glsl_int_type();
131 if (b
->shader
->info
.stage
== MESA_SHADER_TESS_CTRL
||
132 b
->shader
->info
.stage
== MESA_SHADER_GEOMETRY
)
133 type
= glsl_array_type(type
, 1, 0);
135 nir_variable
*idx_var
=
136 nir_variable_create(b
->shader
, nir_var_shader_in
,
138 idx_var
->data
.location
= VARYING_SLOT_VIEW_INDEX
;
139 if (b
->shader
->info
.stage
== MESA_SHADER_FRAGMENT
)
140 idx_var
->data
.interpolation
= INTERP_MODE_FLAT
;
142 nir_deref_instr
*deref
= nir_build_deref_var(b
, idx_var
);
143 if (glsl_type_is_array(type
))
144 deref
= nir_build_deref_array_imm(b
, deref
, 0);
146 state
->view_index
= nir_load_deref(b
, deref
);
150 return state
->view_index
;
153 /* Primitive Replication allows a shader to write different positions for each
154 * view in the same execution. If only the position depends on the view, then
155 * it is possible to use the feature instead of instancing to implement
159 lower_multiview_with_primitive_replication(nir_shader
*shader
,
160 struct anv_graphics_pipeline
*pipeline
)
162 if (shader
->info
.stage
== MESA_SHADER_FRAGMENT
)
165 assert(shader
->info
.stage
== MESA_SHADER_VERTEX
);
167 uint32_t view_mask
= pipeline
->subpass
->view_mask
;
168 int view_count
= util_bitcount(view_mask
);
169 assert(view_count
> 1 && view_count
<= MAX_VIEWS_FOR_PRIMITIVE_REPLICATION
);
171 nir_function_impl
*entrypoint
= nir_shader_get_entrypoint(shader
);
173 /* Update position to refer to an array. */
174 nir_variable
*pos_var
= NULL
;
175 nir_foreach_variable(var
, &shader
->outputs
) {
176 if (var
->data
.location
== VARYING_SLOT_POS
) {
177 assert(var
->type
== glsl_vec4_type());
178 var
->type
= glsl_array_type(glsl_vec4_type(), view_count
, 0);
179 var
->data
.per_view
= true;
188 nir_cf_list_extract(&body
, &entrypoint
->body
);
191 nir_builder_init(&b
, entrypoint
);
192 b
.cursor
= nir_after_cf_list(&entrypoint
->body
);
194 /* Fill Layer ID with zero. Replication will use that as base to apply the
197 nir_variable
*layer_id_out
=
198 nir_variable_create(shader
, nir_var_shader_out
,
199 glsl_int_type(), "layer ID");
200 layer_id_out
->data
.location
= VARYING_SLOT_LAYER
;
201 nir_store_var(&b
, layer_id_out
, nir_imm_zero(&b
, 1, 32), 0x1);
203 /* Loop Index will go from 0 to view_count. */
204 nir_variable
*loop_index_var
=
205 nir_local_variable_create(entrypoint
, glsl_uint_type(), "loop_index");
206 nir_deref_instr
*loop_index_deref
= nir_build_deref_var(&b
, loop_index_var
);
207 nir_store_deref(&b
, loop_index_deref
, nir_imm_int(&b
, 0), 1);
209 /* Array of view index values that are active in the loop. Note that the
210 * loop index only matches the view index if there are no gaps in the
213 nir_variable
*view_index_var
= nir_local_variable_create(
214 entrypoint
, glsl_array_type(glsl_uint_type(), view_count
, 0), "view_index");
215 nir_deref_instr
*view_index_deref
= nir_build_deref_var(&b
, view_index_var
);
217 int array_position
= 0;
219 for_each_bit(view_index
, view_mask
) {
220 nir_store_deref(&b
, nir_build_deref_array_imm(&b
, view_index_deref
, array_position
),
221 nir_imm_int(&b
, view_index
), 1);
226 /* Create the equivalent of
229 * if (loop_index >= view_count):
232 * view_index = active_indices[loop_index]
233 * pos_deref = &pos[loop_index]
235 * # Placeholder for the body to be reinserted.
239 * Later both `view_index` and `pos_deref` will be used to rewrite the
240 * original shader body.
243 nir_loop
* loop
= nir_push_loop(&b
);
245 nir_ssa_def
*loop_index
= nir_load_deref(&b
, loop_index_deref
);
246 nir_ssa_def
*cmp
= nir_ige(&b
, loop_index
, nir_imm_int(&b
, view_count
));
247 nir_if
*loop_check
= nir_push_if(&b
, cmp
);
248 nir_jump(&b
, nir_jump_break
);
249 nir_pop_if(&b
, loop_check
);
251 nir_ssa_def
*view_index
=
252 nir_load_deref(&b
, nir_build_deref_array(&b
, view_index_deref
, loop_index
));
253 nir_deref_instr
*pos_deref
=
254 nir_build_deref_array(&b
, nir_build_deref_var(&b
, pos_var
), loop_index
);
256 nir_store_deref(&b
, loop_index_deref
, nir_iadd_imm(&b
, loop_index
, 1), 1);
257 nir_pop_loop(&b
, loop
);
259 /* Reinsert the body. */
260 b
.cursor
= nir_after_instr(&pos_deref
->instr
);
261 nir_cf_reinsert(&body
, b
.cursor
);
263 nir_foreach_block(block
, entrypoint
) {
264 nir_foreach_instr_safe(instr
, block
) {
265 if (instr
->type
!= nir_instr_type_intrinsic
)
268 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
270 switch (intrin
->intrinsic
) {
271 case nir_intrinsic_load_view_index
: {
272 assert(intrin
->dest
.is_ssa
);
273 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(view_index
));
277 case nir_intrinsic_store_deref
: {
278 nir_variable
*var
= nir_intrinsic_get_var(intrin
, 0);
279 if (var
== pos_var
) {
280 nir_deref_instr
*old_deref
= nir_src_as_deref(intrin
->src
[0]);
282 nir_instr_rewrite_src(instr
, &intrin
->src
[0],
283 nir_src_for_ssa(&pos_deref
->dest
.ssa
));
285 /* Remove old deref since it has the wrong type. */
286 nir_deref_instr_remove_if_unused(old_deref
);
291 case nir_intrinsic_load_deref
:
292 if (nir_intrinsic_get_var(intrin
, 0) == pos_var
) {
293 unreachable("Should have lowered I/O to temporaries "
294 "so no load_deref on position output is expected.");
298 case nir_intrinsic_copy_deref
:
299 unreachable("Should have lowered copy_derefs at this point");
309 nir_metadata_preserve(entrypoint
, nir_metadata_none
);
314 anv_nir_lower_multiview(nir_shader
*shader
,
315 struct anv_graphics_pipeline
*pipeline
)
317 assert(shader
->info
.stage
!= MESA_SHADER_COMPUTE
);
318 uint32_t view_mask
= pipeline
->subpass
->view_mask
;
320 /* If multiview isn't enabled, we have nothing to do. */
324 if (pipeline
->use_primitive_replication
)
325 return lower_multiview_with_primitive_replication(shader
, pipeline
);
327 struct lower_multiview_state state
= {
328 .view_mask
= view_mask
,
331 /* This pass assumes a single entrypoint */
332 nir_function_impl
*entrypoint
= nir_shader_get_entrypoint(shader
);
334 nir_builder_init(&state
.builder
, entrypoint
);
336 bool progress
= false;
337 nir_foreach_block(block
, entrypoint
) {
338 nir_foreach_instr_safe(instr
, block
) {
339 if (instr
->type
!= nir_instr_type_intrinsic
)
342 nir_intrinsic_instr
*load
= nir_instr_as_intrinsic(instr
);
344 if (load
->intrinsic
!= nir_intrinsic_load_instance_id
&&
345 load
->intrinsic
!= nir_intrinsic_load_view_index
)
348 assert(load
->dest
.is_ssa
);
351 if (load
->intrinsic
== nir_intrinsic_load_instance_id
) {
352 value
= build_instance_id(&state
);
354 assert(load
->intrinsic
== nir_intrinsic_load_view_index
);
355 value
= build_view_index(&state
);
358 nir_ssa_def_rewrite_uses(&load
->dest
.ssa
, nir_src_for_ssa(value
));
360 nir_instr_remove(&load
->instr
);
365 /* The view index is available in all stages but the instance id is only
366 * available in the VS. If it's not a fragment shader, we need to pass
367 * the view index on to the next stage.
369 if (shader
->info
.stage
!= MESA_SHADER_FRAGMENT
) {
370 nir_ssa_def
*view_index
= build_view_index(&state
);
372 nir_builder
*b
= &state
.builder
;
374 assert(view_index
->parent_instr
->block
== nir_start_block(entrypoint
));
375 b
->cursor
= nir_after_instr(view_index
->parent_instr
);
377 /* Unless there is only one possible view index (that would be set
378 * directly), pass it to the next stage. */
379 if (util_bitcount(state
.view_mask
) != 1) {
380 nir_variable
*view_index_out
=
381 nir_variable_create(shader
, nir_var_shader_out
,
382 glsl_int_type(), "view index");
383 view_index_out
->data
.location
= VARYING_SLOT_VIEW_INDEX
;
384 nir_store_var(b
, view_index_out
, view_index
, 0x1);
387 nir_variable
*layer_id_out
=
388 nir_variable_create(shader
, nir_var_shader_out
,
389 glsl_int_type(), "layer ID");
390 layer_id_out
->data
.location
= VARYING_SLOT_LAYER
;
391 nir_store_var(b
, layer_id_out
, view_index
, 0x1);
397 nir_metadata_preserve(entrypoint
, nir_metadata_block_index
|
398 nir_metadata_dominance
);
405 shader_writes_to_memory(nir_shader
*shader
)
407 /* With multiview, we would need to ensure that memory writes happen either
408 * once or once per view. Since combination of multiview and memory writes
409 * is not expected, we'll just skip this optimization in this case.
412 nir_function_impl
*entrypoint
= nir_shader_get_entrypoint(shader
);
414 nir_foreach_block(block
, entrypoint
) {
415 nir_foreach_instr(instr
, block
) {
416 if (instr
->type
!= nir_instr_type_intrinsic
)
418 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
420 switch (intrin
->intrinsic
) {
421 case nir_intrinsic_deref_atomic_add
:
422 case nir_intrinsic_deref_atomic_imin
:
423 case nir_intrinsic_deref_atomic_umin
:
424 case nir_intrinsic_deref_atomic_imax
:
425 case nir_intrinsic_deref_atomic_umax
:
426 case nir_intrinsic_deref_atomic_and
:
427 case nir_intrinsic_deref_atomic_or
:
428 case nir_intrinsic_deref_atomic_xor
:
429 case nir_intrinsic_deref_atomic_exchange
:
430 case nir_intrinsic_deref_atomic_comp_swap
:
431 case nir_intrinsic_store_ssbo
:
432 case nir_intrinsic_ssbo_atomic_add
:
433 case nir_intrinsic_ssbo_atomic_imin
:
434 case nir_intrinsic_ssbo_atomic_umin
:
435 case nir_intrinsic_ssbo_atomic_imax
:
436 case nir_intrinsic_ssbo_atomic_umax
:
437 case nir_intrinsic_ssbo_atomic_and
:
438 case nir_intrinsic_ssbo_atomic_or
:
439 case nir_intrinsic_ssbo_atomic_xor
:
440 case nir_intrinsic_ssbo_atomic_exchange
:
441 case nir_intrinsic_ssbo_atomic_comp_swap
:
442 case nir_intrinsic_store_shared
:
443 case nir_intrinsic_shared_atomic_add
:
444 case nir_intrinsic_shared_atomic_imin
:
445 case nir_intrinsic_shared_atomic_umin
:
446 case nir_intrinsic_shared_atomic_imax
:
447 case nir_intrinsic_shared_atomic_umax
:
448 case nir_intrinsic_shared_atomic_and
:
449 case nir_intrinsic_shared_atomic_or
:
450 case nir_intrinsic_shared_atomic_xor
:
451 case nir_intrinsic_shared_atomic_exchange
:
452 case nir_intrinsic_shared_atomic_comp_swap
:
453 case nir_intrinsic_image_deref_store
:
454 case nir_intrinsic_image_deref_atomic_add
:
455 case nir_intrinsic_image_deref_atomic_umin
:
456 case nir_intrinsic_image_deref_atomic_umax
:
457 case nir_intrinsic_image_deref_atomic_imin
:
458 case nir_intrinsic_image_deref_atomic_imax
:
459 case nir_intrinsic_image_deref_atomic_and
:
460 case nir_intrinsic_image_deref_atomic_or
:
461 case nir_intrinsic_image_deref_atomic_xor
:
462 case nir_intrinsic_image_deref_atomic_exchange
:
463 case nir_intrinsic_image_deref_atomic_comp_swap
:
477 shader_uses_view_index(nir_shader
*shader
)
479 nir_function_impl
*entrypoint
= nir_shader_get_entrypoint(shader
);
481 nir_foreach_block(block
, entrypoint
) {
482 nir_foreach_instr(instr
, block
) {
483 if (instr
->type
!= nir_instr_type_intrinsic
)
486 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
487 if (intrin
->intrinsic
== nir_intrinsic_load_view_index
)
496 shader_only_position_uses_view_index(nir_shader
*shader
)
498 nir_shader
*shader_no_position
= nir_shader_clone(NULL
, shader
);
499 nir_function_impl
*entrypoint
= nir_shader_get_entrypoint(shader_no_position
);
501 /* Remove the store position from a cloned shader. */
502 nir_foreach_block(block
, entrypoint
) {
503 nir_foreach_instr_safe(instr
, block
) {
504 if (instr
->type
!= nir_instr_type_intrinsic
)
507 nir_intrinsic_instr
*store
= nir_instr_as_intrinsic(instr
);
508 if (store
->intrinsic
!= nir_intrinsic_store_deref
)
511 nir_variable
*var
= nir_intrinsic_get_var(store
, 0);
512 if (var
->data
.location
!= VARYING_SLOT_POS
)
515 nir_instr_remove(&store
->instr
);
519 /* Clean up shader so unused load_view_index intrinsics are removed. */
523 progress
|= nir_opt_dead_cf(shader_no_position
);
525 /* Peephole select will drop if-blocks that have then and else empty,
526 * which will remove the usage of an SSA in the condition.
528 progress
|= nir_opt_peephole_select(shader_no_position
, 0, false, false);
530 progress
|= nir_opt_dce(shader_no_position
);
533 bool uses_view_index
= shader_uses_view_index(shader_no_position
);
535 ralloc_free(shader_no_position
);
536 return !uses_view_index
;
540 anv_check_for_primitive_replication(nir_shader
**shaders
,
541 struct anv_graphics_pipeline
*pipeline
)
543 assert(pipeline
->base
.device
->info
.gen
>= 12);
545 static int primitive_replication_max_views
= -1;
546 if (primitive_replication_max_views
< 0) {
547 /* TODO: Figure out why we are not getting same benefits for larger than
548 * 2 views. For now use Primitive Replication just for the 2-view case
551 const unsigned default_max_views
= 2;
553 primitive_replication_max_views
=
554 MIN2(MAX_VIEWS_FOR_PRIMITIVE_REPLICATION
,
555 env_var_as_unsigned("ANV_PRIMITIVE_REPLICATION_MAX_VIEWS",
559 /* TODO: We should be able to support replication at 'geometry' stages
560 * later than Vertex. In that case only the last stage can refer to
563 if (pipeline
->active_stages
!= (VK_SHADER_STAGE_VERTEX_BIT
|
564 VK_SHADER_STAGE_FRAGMENT_BIT
)) {
568 uint32_t view_mask
= pipeline
->subpass
->view_mask
;
569 int view_count
= util_bitcount(view_mask
);
570 if (view_count
== 1 || view_count
> primitive_replication_max_views
)
573 bool vs_writes_position
= false;
574 nir_foreach_variable(var
, &shaders
[MESA_SHADER_VERTEX
]->outputs
) {
575 if (var
->data
.location
== VARYING_SLOT_POS
) {
576 vs_writes_position
= true;
581 /* Don't bother handling this edge case with Primitive Replication. */
582 if (!vs_writes_position
)
585 return !shader_uses_view_index(shaders
[MESA_SHADER_FRAGMENT
]) &&
586 !shader_writes_to_memory(shaders
[MESA_SHADER_VERTEX
]) &&
587 shader_only_position_uses_view_index(shaders
[MESA_SHADER_VERTEX
]);