nir: simplify phi handling in divergence analysis
[mesa.git] / src / compiler / nir / nir_divergence_analysis.c
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "nir.h"
26
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39 struct divergence_state {
40 const nir_divergence_options options;
41 const gl_shader_stage stage;
42
43 /** current control flow state */
44 /* True if some loop-active invocations might take a different control-flow path.
45 * A divergent break does not cause subsequent control-flow to be considered
46 * divergent because those invocations are no longer active in the loop.
47 * For a divergent if, both sides are considered divergent flow because
48 * the other side is still loop-active. */
49 bool divergent_loop_cf;
50 /* True if a divergent continue happened since the loop header */
51 bool divergent_loop_continue;
52 /* True if a divergent break happened since the loop header */
53 bool divergent_loop_break;
54 };
55
56 static bool
57 visit_cf_list(struct exec_list *list, struct divergence_state *state);
58
59 static bool
60 visit_alu(nir_alu_instr *instr)
61 {
62 if (instr->dest.dest.ssa.divergent)
63 return false;
64
65 unsigned num_src = nir_op_infos[instr->op].num_inputs;
66
67 for (unsigned i = 0; i < num_src; i++) {
68 if (instr->src[i].src.ssa->divergent) {
69 instr->dest.dest.ssa.divergent = true;
70 return true;
71 }
72 }
73
74 return false;
75 }
76
77 static bool
78 visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
79 {
80 if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
81 return false;
82
83 if (instr->dest.ssa.divergent)
84 return false;
85
86 nir_divergence_options options = state->options;
87 gl_shader_stage stage = state->stage;
88 bool is_divergent = false;
89 switch (instr->intrinsic) {
90 /* Intrinsics which are always uniform */
91 case nir_intrinsic_shader_clock:
92 case nir_intrinsic_ballot:
93 case nir_intrinsic_read_invocation:
94 case nir_intrinsic_read_first_invocation:
95 case nir_intrinsic_vote_any:
96 case nir_intrinsic_vote_all:
97 case nir_intrinsic_vote_feq:
98 case nir_intrinsic_vote_ieq:
99 case nir_intrinsic_load_work_dim:
100 case nir_intrinsic_load_work_group_id:
101 case nir_intrinsic_load_num_work_groups:
102 case nir_intrinsic_load_local_group_size:
103 case nir_intrinsic_load_subgroup_id:
104 case nir_intrinsic_load_num_subgroups:
105 case nir_intrinsic_load_subgroup_size:
106 case nir_intrinsic_load_subgroup_eq_mask:
107 case nir_intrinsic_load_subgroup_ge_mask:
108 case nir_intrinsic_load_subgroup_gt_mask:
109 case nir_intrinsic_load_subgroup_le_mask:
110 case nir_intrinsic_load_subgroup_lt_mask:
111 case nir_intrinsic_first_invocation:
112 case nir_intrinsic_load_base_instance:
113 case nir_intrinsic_load_base_vertex:
114 case nir_intrinsic_load_first_vertex:
115 case nir_intrinsic_load_draw_id:
116 case nir_intrinsic_load_is_indexed_draw:
117 case nir_intrinsic_load_viewport_scale:
118 case nir_intrinsic_load_alpha_ref_float:
119 case nir_intrinsic_load_user_clip_plane:
120 case nir_intrinsic_load_viewport_x_scale:
121 case nir_intrinsic_load_viewport_y_scale:
122 case nir_intrinsic_load_viewport_z_scale:
123 case nir_intrinsic_load_viewport_offset:
124 case nir_intrinsic_load_viewport_z_offset:
125 case nir_intrinsic_load_blend_const_color_a_float:
126 case nir_intrinsic_load_blend_const_color_b_float:
127 case nir_intrinsic_load_blend_const_color_g_float:
128 case nir_intrinsic_load_blend_const_color_r_float:
129 case nir_intrinsic_load_blend_const_color_rgba:
130 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
131 case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
132 is_divergent = false;
133 break;
134
135 /* Intrinsics with divergence depending on shader stage and hardware */
136 case nir_intrinsic_load_input:
137 is_divergent = instr->src[0].ssa->divergent;
138 if (stage == MESA_SHADER_FRAGMENT)
139 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
140 else if (stage == MESA_SHADER_TESS_EVAL)
141 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
142 else
143 is_divergent = true;
144 break;
145 case nir_intrinsic_load_input_vertex:
146 is_divergent = instr->src[1].ssa->divergent;
147 assert(stage == MESA_SHADER_FRAGMENT);
148 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
149 break;
150 case nir_intrinsic_load_output:
151 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
152 is_divergent = instr->src[0].ssa->divergent;
153 if (stage == MESA_SHADER_TESS_CTRL)
154 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
155 else
156 is_divergent = true;
157 break;
158 case nir_intrinsic_load_layer_id:
159 case nir_intrinsic_load_front_face:
160 assert(stage == MESA_SHADER_FRAGMENT);
161 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
162 break;
163 case nir_intrinsic_load_view_index:
164 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
165 if (options & nir_divergence_view_index_uniform)
166 is_divergent = false;
167 else if (stage == MESA_SHADER_FRAGMENT)
168 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
169 break;
170 case nir_intrinsic_load_fs_input_interp_deltas:
171 assert(stage == MESA_SHADER_FRAGMENT);
172 is_divergent = instr->src[0].ssa->divergent;
173 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
174 break;
175 case nir_intrinsic_load_primitive_id:
176 if (stage == MESA_SHADER_FRAGMENT)
177 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
178 else if (stage == MESA_SHADER_TESS_CTRL)
179 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
180 else if (stage == MESA_SHADER_TESS_EVAL)
181 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
182 else if (stage == MESA_SHADER_GEOMETRY)
183 is_divergent = true;
184 else
185 unreachable("Invalid stage for load_primitive_id");
186 break;
187 case nir_intrinsic_load_tess_level_inner:
188 case nir_intrinsic_load_tess_level_outer:
189 if (stage == MESA_SHADER_TESS_CTRL)
190 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
191 else if (stage == MESA_SHADER_TESS_EVAL)
192 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
193 else
194 unreachable("Invalid stage for load_primitive_tess_level_*");
195 break;
196 case nir_intrinsic_load_patch_vertices_in:
197 if (stage == MESA_SHADER_TESS_EVAL)
198 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
199 else
200 assert(stage == MESA_SHADER_TESS_CTRL);
201 break;
202
203 /* Clustered reductions are uniform if cluster_size == subgroup_size or
204 * the source is uniform and the operation is invariant.
205 * Inclusive scans are uniform if
206 * the source is uniform and the operation is invariant
207 */
208 case nir_intrinsic_reduce:
209 if (nir_intrinsic_cluster_size(instr) == 0)
210 return false;
211 /* fallthrough */
212 case nir_intrinsic_inclusive_scan: {
213 nir_op op = nir_intrinsic_reduction_op(instr);
214 is_divergent = instr->src[0].ssa->divergent;
215 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
216 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
217 op != nir_op_iand && op != nir_op_ior)
218 is_divergent = true;
219 break;
220 }
221
222 /* Intrinsics with divergence depending on sources */
223 case nir_intrinsic_ballot_bitfield_extract:
224 case nir_intrinsic_ballot_find_lsb:
225 case nir_intrinsic_ballot_find_msb:
226 case nir_intrinsic_ballot_bit_count_reduce:
227 case nir_intrinsic_shuffle_xor:
228 case nir_intrinsic_shuffle_up:
229 case nir_intrinsic_shuffle_down:
230 case nir_intrinsic_quad_broadcast:
231 case nir_intrinsic_quad_swap_horizontal:
232 case nir_intrinsic_quad_swap_vertical:
233 case nir_intrinsic_quad_swap_diagonal:
234 case nir_intrinsic_load_deref:
235 case nir_intrinsic_load_ubo:
236 case nir_intrinsic_load_ssbo:
237 case nir_intrinsic_load_shared:
238 case nir_intrinsic_load_global:
239 case nir_intrinsic_load_uniform:
240 case nir_intrinsic_load_push_constant:
241 case nir_intrinsic_load_constant:
242 case nir_intrinsic_load_sample_pos_from_id:
243 case nir_intrinsic_load_kernel_input:
244 case nir_intrinsic_image_load:
245 case nir_intrinsic_image_deref_load:
246 case nir_intrinsic_bindless_image_load:
247 case nir_intrinsic_image_samples:
248 case nir_intrinsic_image_deref_samples:
249 case nir_intrinsic_bindless_image_samples:
250 case nir_intrinsic_get_buffer_size:
251 case nir_intrinsic_image_size:
252 case nir_intrinsic_image_deref_size:
253 case nir_intrinsic_bindless_image_size:
254 case nir_intrinsic_copy_deref:
255 case nir_intrinsic_deref_buffer_array_length:
256 case nir_intrinsic_vulkan_resource_index:
257 case nir_intrinsic_vulkan_resource_reindex:
258 case nir_intrinsic_load_vulkan_descriptor:
259 case nir_intrinsic_atomic_counter_read:
260 case nir_intrinsic_atomic_counter_read_deref:
261 case nir_intrinsic_quad_swizzle_amd:
262 case nir_intrinsic_masked_swizzle_amd: {
263 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
264 for (unsigned i = 0; i < num_srcs; i++) {
265 if (instr->src[i].ssa->divergent) {
266 is_divergent = true;
267 break;
268 }
269 }
270 break;
271 }
272
273 case nir_intrinsic_shuffle:
274 is_divergent = instr->src[0].ssa->divergent &&
275 instr->src[1].ssa->divergent;
276 break;
277
278 /* Intrinsics which are always divergent */
279 case nir_intrinsic_load_color0:
280 case nir_intrinsic_load_color1:
281 case nir_intrinsic_load_param:
282 case nir_intrinsic_load_sample_id:
283 case nir_intrinsic_load_sample_id_no_per_sample:
284 case nir_intrinsic_load_sample_mask_in:
285 case nir_intrinsic_load_interpolated_input:
286 case nir_intrinsic_load_barycentric_pixel:
287 case nir_intrinsic_load_barycentric_centroid:
288 case nir_intrinsic_load_barycentric_sample:
289 case nir_intrinsic_load_barycentric_model:
290 case nir_intrinsic_load_barycentric_at_sample:
291 case nir_intrinsic_load_barycentric_at_offset:
292 case nir_intrinsic_interp_deref_at_offset:
293 case nir_intrinsic_interp_deref_at_sample:
294 case nir_intrinsic_interp_deref_at_centroid:
295 case nir_intrinsic_interp_deref_at_vertex:
296 case nir_intrinsic_load_tess_coord:
297 case nir_intrinsic_load_point_coord:
298 case nir_intrinsic_load_frag_coord:
299 case nir_intrinsic_load_sample_pos:
300 case nir_intrinsic_load_vertex_id_zero_base:
301 case nir_intrinsic_load_vertex_id:
302 case nir_intrinsic_load_per_vertex_input:
303 case nir_intrinsic_load_per_vertex_output:
304 case nir_intrinsic_load_instance_id:
305 case nir_intrinsic_load_invocation_id:
306 case nir_intrinsic_load_local_invocation_id:
307 case nir_intrinsic_load_local_invocation_index:
308 case nir_intrinsic_load_global_invocation_id:
309 case nir_intrinsic_load_global_invocation_index:
310 case nir_intrinsic_load_subgroup_invocation:
311 case nir_intrinsic_load_helper_invocation:
312 case nir_intrinsic_is_helper_invocation:
313 case nir_intrinsic_load_scratch:
314 case nir_intrinsic_deref_atomic_add:
315 case nir_intrinsic_deref_atomic_imin:
316 case nir_intrinsic_deref_atomic_umin:
317 case nir_intrinsic_deref_atomic_imax:
318 case nir_intrinsic_deref_atomic_umax:
319 case nir_intrinsic_deref_atomic_and:
320 case nir_intrinsic_deref_atomic_or:
321 case nir_intrinsic_deref_atomic_xor:
322 case nir_intrinsic_deref_atomic_exchange:
323 case nir_intrinsic_deref_atomic_comp_swap:
324 case nir_intrinsic_deref_atomic_fadd:
325 case nir_intrinsic_deref_atomic_fmin:
326 case nir_intrinsic_deref_atomic_fmax:
327 case nir_intrinsic_deref_atomic_fcomp_swap:
328 case nir_intrinsic_ssbo_atomic_add:
329 case nir_intrinsic_ssbo_atomic_imin:
330 case nir_intrinsic_ssbo_atomic_umin:
331 case nir_intrinsic_ssbo_atomic_imax:
332 case nir_intrinsic_ssbo_atomic_umax:
333 case nir_intrinsic_ssbo_atomic_and:
334 case nir_intrinsic_ssbo_atomic_or:
335 case nir_intrinsic_ssbo_atomic_xor:
336 case nir_intrinsic_ssbo_atomic_exchange:
337 case nir_intrinsic_ssbo_atomic_comp_swap:
338 case nir_intrinsic_ssbo_atomic_fadd:
339 case nir_intrinsic_ssbo_atomic_fmax:
340 case nir_intrinsic_ssbo_atomic_fmin:
341 case nir_intrinsic_ssbo_atomic_fcomp_swap:
342 case nir_intrinsic_image_deref_atomic_add:
343 case nir_intrinsic_image_deref_atomic_imin:
344 case nir_intrinsic_image_deref_atomic_umin:
345 case nir_intrinsic_image_deref_atomic_imax:
346 case nir_intrinsic_image_deref_atomic_umax:
347 case nir_intrinsic_image_deref_atomic_and:
348 case nir_intrinsic_image_deref_atomic_or:
349 case nir_intrinsic_image_deref_atomic_xor:
350 case nir_intrinsic_image_deref_atomic_exchange:
351 case nir_intrinsic_image_deref_atomic_comp_swap:
352 case nir_intrinsic_image_deref_atomic_fadd:
353 case nir_intrinsic_image_atomic_add:
354 case nir_intrinsic_image_atomic_imin:
355 case nir_intrinsic_image_atomic_umin:
356 case nir_intrinsic_image_atomic_imax:
357 case nir_intrinsic_image_atomic_umax:
358 case nir_intrinsic_image_atomic_and:
359 case nir_intrinsic_image_atomic_or:
360 case nir_intrinsic_image_atomic_xor:
361 case nir_intrinsic_image_atomic_exchange:
362 case nir_intrinsic_image_atomic_comp_swap:
363 case nir_intrinsic_image_atomic_fadd:
364 case nir_intrinsic_bindless_image_atomic_add:
365 case nir_intrinsic_bindless_image_atomic_imin:
366 case nir_intrinsic_bindless_image_atomic_umin:
367 case nir_intrinsic_bindless_image_atomic_imax:
368 case nir_intrinsic_bindless_image_atomic_umax:
369 case nir_intrinsic_bindless_image_atomic_and:
370 case nir_intrinsic_bindless_image_atomic_or:
371 case nir_intrinsic_bindless_image_atomic_xor:
372 case nir_intrinsic_bindless_image_atomic_exchange:
373 case nir_intrinsic_bindless_image_atomic_comp_swap:
374 case nir_intrinsic_bindless_image_atomic_fadd:
375 case nir_intrinsic_shared_atomic_add:
376 case nir_intrinsic_shared_atomic_imin:
377 case nir_intrinsic_shared_atomic_umin:
378 case nir_intrinsic_shared_atomic_imax:
379 case nir_intrinsic_shared_atomic_umax:
380 case nir_intrinsic_shared_atomic_and:
381 case nir_intrinsic_shared_atomic_or:
382 case nir_intrinsic_shared_atomic_xor:
383 case nir_intrinsic_shared_atomic_exchange:
384 case nir_intrinsic_shared_atomic_comp_swap:
385 case nir_intrinsic_shared_atomic_fadd:
386 case nir_intrinsic_shared_atomic_fmin:
387 case nir_intrinsic_shared_atomic_fmax:
388 case nir_intrinsic_shared_atomic_fcomp_swap:
389 case nir_intrinsic_global_atomic_add:
390 case nir_intrinsic_global_atomic_imin:
391 case nir_intrinsic_global_atomic_umin:
392 case nir_intrinsic_global_atomic_imax:
393 case nir_intrinsic_global_atomic_umax:
394 case nir_intrinsic_global_atomic_and:
395 case nir_intrinsic_global_atomic_or:
396 case nir_intrinsic_global_atomic_xor:
397 case nir_intrinsic_global_atomic_exchange:
398 case nir_intrinsic_global_atomic_comp_swap:
399 case nir_intrinsic_global_atomic_fadd:
400 case nir_intrinsic_global_atomic_fmin:
401 case nir_intrinsic_global_atomic_fmax:
402 case nir_intrinsic_global_atomic_fcomp_swap:
403 case nir_intrinsic_atomic_counter_add:
404 case nir_intrinsic_atomic_counter_min:
405 case nir_intrinsic_atomic_counter_max:
406 case nir_intrinsic_atomic_counter_and:
407 case nir_intrinsic_atomic_counter_or:
408 case nir_intrinsic_atomic_counter_xor:
409 case nir_intrinsic_atomic_counter_inc:
410 case nir_intrinsic_atomic_counter_pre_dec:
411 case nir_intrinsic_atomic_counter_post_dec:
412 case nir_intrinsic_atomic_counter_exchange:
413 case nir_intrinsic_atomic_counter_comp_swap:
414 case nir_intrinsic_atomic_counter_add_deref:
415 case nir_intrinsic_atomic_counter_min_deref:
416 case nir_intrinsic_atomic_counter_max_deref:
417 case nir_intrinsic_atomic_counter_and_deref:
418 case nir_intrinsic_atomic_counter_or_deref:
419 case nir_intrinsic_atomic_counter_xor_deref:
420 case nir_intrinsic_atomic_counter_inc_deref:
421 case nir_intrinsic_atomic_counter_pre_dec_deref:
422 case nir_intrinsic_atomic_counter_post_dec_deref:
423 case nir_intrinsic_atomic_counter_exchange_deref:
424 case nir_intrinsic_atomic_counter_comp_swap_deref:
425 case nir_intrinsic_exclusive_scan:
426 case nir_intrinsic_ballot_bit_count_exclusive:
427 case nir_intrinsic_ballot_bit_count_inclusive:
428 case nir_intrinsic_write_invocation_amd:
429 case nir_intrinsic_mbcnt_amd:
430 case nir_intrinsic_elect:
431 is_divergent = true;
432 break;
433
434 default:
435 #ifdef NDEBUG
436 is_divergent = true;
437 break;
438 #else
439 nir_print_instr(&instr->instr, stderr);
440 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
441 #endif
442 }
443
444 instr->dest.ssa.divergent = is_divergent;
445 return is_divergent;
446 }
447
448 static bool
449 visit_tex(nir_tex_instr *instr)
450 {
451 if (instr->dest.ssa.divergent)
452 return false;
453
454 bool is_divergent = false;
455
456 for (unsigned i = 0; i < instr->num_srcs; i++) {
457 switch (instr->src[i].src_type) {
458 case nir_tex_src_sampler_deref:
459 case nir_tex_src_sampler_handle:
460 case nir_tex_src_sampler_offset:
461 is_divergent |= instr->src[i].src.ssa->divergent &&
462 instr->sampler_non_uniform;
463 break;
464 case nir_tex_src_texture_deref:
465 case nir_tex_src_texture_handle:
466 case nir_tex_src_texture_offset:
467 is_divergent |= instr->src[i].src.ssa->divergent &&
468 instr->texture_non_uniform;
469 break;
470 default:
471 is_divergent |= instr->src[i].src.ssa->divergent;
472 break;
473 }
474 }
475
476 instr->dest.ssa.divergent = is_divergent;
477 return is_divergent;
478 }
479
480 static bool
481 visit_load_const(nir_load_const_instr *instr)
482 {
483 return false;
484 }
485
486 static bool
487 visit_ssa_undef(nir_ssa_undef_instr *instr)
488 {
489 return false;
490 }
491
492 static bool
493 nir_variable_mode_is_uniform(nir_variable_mode mode) {
494 switch (mode) {
495 case nir_var_uniform:
496 case nir_var_mem_ubo:
497 case nir_var_mem_ssbo:
498 case nir_var_mem_shared:
499 case nir_var_mem_global:
500 return true;
501 default:
502 return false;
503 }
504 }
505
506 static bool
507 nir_variable_is_uniform(nir_variable *var, struct divergence_state *state)
508 {
509 if (nir_variable_mode_is_uniform(var->data.mode))
510 return true;
511
512 if (state->stage == MESA_SHADER_FRAGMENT &&
513 (state->options & nir_divergence_single_prim_per_subgroup) &&
514 var->data.mode == nir_var_shader_in &&
515 var->data.interpolation == INTERP_MODE_FLAT)
516 return true;
517
518 if (state->stage == MESA_SHADER_TESS_CTRL &&
519 (state->options & nir_divergence_single_patch_per_tcs_subgroup) &&
520 var->data.mode == nir_var_shader_out && var->data.patch)
521 return true;
522
523 if (state->stage == MESA_SHADER_TESS_EVAL &&
524 (state->options & nir_divergence_single_patch_per_tes_subgroup) &&
525 var->data.mode == nir_var_shader_in && var->data.patch)
526 return true;
527
528 return false;
529 }
530
531 static bool
532 visit_deref(nir_deref_instr *deref, struct divergence_state *state)
533 {
534 if (deref->dest.ssa.divergent)
535 return false;
536
537 bool is_divergent = false;
538 switch (deref->deref_type) {
539 case nir_deref_type_var:
540 is_divergent = !nir_variable_is_uniform(deref->var, state);
541 break;
542 case nir_deref_type_array:
543 case nir_deref_type_ptr_as_array:
544 is_divergent = deref->arr.index.ssa->divergent;
545 /* fallthrough */
546 case nir_deref_type_struct:
547 case nir_deref_type_array_wildcard:
548 is_divergent |= deref->parent.ssa->divergent;
549 break;
550 case nir_deref_type_cast:
551 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
552 deref->parent.ssa->divergent;
553 break;
554 }
555
556 deref->dest.ssa.divergent = is_divergent;
557 return is_divergent;
558 }
559
560 static bool
561 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
562 {
563 switch (jump->type) {
564 case nir_jump_continue:
565 if (state->divergent_loop_continue)
566 return false;
567 if (state->divergent_loop_cf)
568 state->divergent_loop_continue = true;
569 return state->divergent_loop_continue;
570 case nir_jump_break:
571 if (state->divergent_loop_break)
572 return false;
573 if (state->divergent_loop_cf)
574 state->divergent_loop_break = true;
575 return state->divergent_loop_break;
576 case nir_jump_return:
577 unreachable("NIR divergence analysis: Unsupported return instruction.");
578 }
579 return false;
580 }
581
582 static bool
583 visit_block(nir_block *block, struct divergence_state *state)
584 {
585 bool has_changed = false;
586
587 nir_foreach_instr(instr, block) {
588 switch (instr->type) {
589 case nir_instr_type_alu:
590 has_changed |= visit_alu(nir_instr_as_alu(instr));
591 break;
592 case nir_instr_type_intrinsic:
593 has_changed |= visit_intrinsic(nir_instr_as_intrinsic(instr), state);
594 break;
595 case nir_instr_type_tex:
596 has_changed |= visit_tex(nir_instr_as_tex(instr));
597 break;
598 case nir_instr_type_load_const:
599 has_changed |= visit_load_const(nir_instr_as_load_const(instr));
600 break;
601 case nir_instr_type_ssa_undef:
602 has_changed |= visit_ssa_undef(nir_instr_as_ssa_undef(instr));
603 break;
604 case nir_instr_type_deref:
605 has_changed |= visit_deref(nir_instr_as_deref(instr), state);
606 break;
607 case nir_instr_type_jump:
608 has_changed |= visit_jump(nir_instr_as_jump(instr), state);
609 break;
610 /* phis are handled when processing the branches */
611 case nir_instr_type_phi:
612 break;
613 case nir_instr_type_call:
614 case nir_instr_type_parallel_copy:
615 unreachable("NIR divergence analysis: Unsupported instruction type.");
616 }
617 }
618
619 return has_changed;
620 }
621
622 /* There are 3 types of phi instructions:
623 * (1) gamma: represent the joining point of different paths
624 * created by an “if-then-else” branch.
625 * The resulting value is divergent if the branch condition
626 * or any of the source values is divergent. */
627 static bool
628 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
629 {
630 if (phi->dest.ssa.divergent)
631 return false;
632
633 unsigned defined_srcs = 0;
634 nir_foreach_phi_src(src, phi) {
635 /* if any source value is divergent, the resulting value is divergent */
636 if (src->src.ssa->divergent) {
637 phi->dest.ssa.divergent = true;
638 return true;
639 }
640 if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
641 defined_srcs++;
642 }
643 }
644
645 /* if the condition is divergent and two sources defined, the definition is divergent */
646 if (defined_srcs > 1 && if_cond_divergent) {
647 phi->dest.ssa.divergent = true;
648 return true;
649 }
650
651 return false;
652 }
653
654 /* There are 3 types of phi instructions:
655 * (2) mu: which only exist at loop headers,
656 * merge initial and loop-carried values.
657 * The resulting value is divergent if any source value
658 * is divergent or a divergent loop continue condition
659 * is associated with a different ssa-def. */
660 static bool
661 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
662 {
663 if (phi->dest.ssa.divergent)
664 return false;
665
666 nir_ssa_def* same = NULL;
667 nir_foreach_phi_src(src, phi) {
668 /* if any source value is divergent, the resulting value is divergent */
669 if (src->src.ssa->divergent) {
670 phi->dest.ssa.divergent = true;
671 return true;
672 }
673 /* if this loop is uniform, we're done here */
674 if (!divergent_continue)
675 continue;
676 /* skip the loop preheader */
677 if (src->pred == preheader)
678 continue;
679 /* skip undef values */
680 if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef)
681 continue;
682
683 /* check if all loop-carried values are from the same ssa-def */
684 if (!same)
685 same = src->src.ssa;
686 else if (same != src->src.ssa) {
687 phi->dest.ssa.divergent = true;
688 return true;
689 }
690 }
691
692 return false;
693 }
694
695 /* There are 3 types of phi instructions:
696 * (3) eta: represent values that leave a loop.
697 * The resulting value is divergent if the source value is divergent
698 * or any loop exit condition is divergent for a value which is
699 * not loop-invariant.
700 * (note: there should be no phi for loop-invariant variables.) */
701 static bool
702 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
703 {
704 if (phi->dest.ssa.divergent)
705 return false;
706
707 if (divergent_break) {
708 phi->dest.ssa.divergent = true;
709 return true;
710 }
711
712 /* if any source value is divergent, the resulting value is divergent */
713 nir_foreach_phi_src(src, phi) {
714 if (src->src.ssa->divergent) {
715 phi->dest.ssa.divergent = true;
716 return true;
717 }
718 }
719
720 return false;
721 }
722
723 static bool
724 visit_if(nir_if *if_stmt, struct divergence_state *state)
725 {
726 bool progress = false;
727
728 struct divergence_state then_state = *state;
729 then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
730 progress |= visit_cf_list(&if_stmt->then_list, &then_state);
731
732 struct divergence_state else_state = *state;
733 else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
734 progress |= visit_cf_list(&if_stmt->else_list, &else_state);
735
736 /* handle phis after the IF */
737 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
738 if (instr->type != nir_instr_type_phi)
739 break;
740
741 progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
742 if_stmt->condition.ssa->divergent);
743 }
744
745 /* join loop divergence information from both branch legs */
746 state->divergent_loop_continue |= then_state.divergent_loop_continue ||
747 else_state.divergent_loop_continue;
748 state->divergent_loop_break |= then_state.divergent_loop_break ||
749 else_state.divergent_loop_break;
750
751 /* A divergent continue makes succeeding loop CF divergent:
752 * not all loop-active invocations participate in the remaining loop-body
753 * which means that a following break might be taken by some invocations, only */
754 state->divergent_loop_cf |= state->divergent_loop_continue;
755
756 return progress;
757 }
758
759 static bool
760 visit_loop(nir_loop *loop, struct divergence_state *state)
761 {
762 bool progress = false;
763 nir_block *loop_header = nir_loop_first_block(loop);
764 nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
765
766 /* handle loop header phis first */
767 nir_foreach_instr(instr, loop_header) {
768 if (instr->type != nir_instr_type_phi)
769 break;
770
771 progress |= visit_loop_header_phi(nir_instr_as_phi(instr),
772 loop_preheader, false);
773
774 }
775
776 /* setup loop state */
777 struct divergence_state loop_state = *state;
778 loop_state.divergent_loop_cf = false;
779 loop_state.divergent_loop_continue = false;
780 loop_state.divergent_loop_break = false;
781
782 /* process loop body until no further changes are made */
783 bool repeat;
784 do {
785 progress |= visit_cf_list(&loop->body, &loop_state);
786 repeat = false;
787
788 /* revisit loop header phis to see if something has changed */
789 nir_foreach_instr(instr, loop_header) {
790 if (instr->type != nir_instr_type_phi)
791 break;
792
793 repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
794 loop_preheader,
795 loop_state.divergent_loop_continue);
796 }
797
798 loop_state.divergent_loop_cf = false;
799 } while (repeat);
800
801 /* handle phis after the loop */
802 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
803 if (instr->type != nir_instr_type_phi)
804 break;
805
806 progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
807 loop_state.divergent_loop_break);
808 }
809
810 return progress;
811 }
812
813 static bool
814 visit_cf_list(struct exec_list *list, struct divergence_state *state)
815 {
816 bool has_changed = false;
817
818 foreach_list_typed(nir_cf_node, node, node, list) {
819 switch (node->type) {
820 case nir_cf_node_block:
821 has_changed |= visit_block(nir_cf_node_as_block(node), state);
822 break;
823 case nir_cf_node_if:
824 has_changed |= visit_if(nir_cf_node_as_if(node), state);
825 break;
826 case nir_cf_node_loop:
827 has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
828 break;
829 case nir_cf_node_function:
830 unreachable("NIR divergence analysis: Unsupported cf_node type.");
831 }
832 }
833
834 return has_changed;
835 }
836
837 static bool
838 set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
839 {
840 def->divergent = false;
841 return true;
842 }
843
844 void
845 nir_divergence_analysis(nir_shader *shader, nir_divergence_options options)
846 {
847 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
848
849 /* Set all SSA defs to non-divergent to start off */
850 nir_foreach_block(block, impl) {
851 nir_foreach_instr(instr, block)
852 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
853 }
854
855 struct divergence_state state = {
856 .options = options,
857 .stage = shader->info.stage,
858 .divergent_loop_cf = false,
859 .divergent_loop_continue = false,
860 .divergent_loop_break = false,
861 };
862
863 visit_cf_list(&impl->body, &state);
864 }
865