nir: rework phi handling in divergence analysis
[mesa.git] / src / compiler / nir / nir_divergence_analysis.c
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "nir.h"
26
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39 struct divergence_state {
40 const nir_divergence_options options;
41 const gl_shader_stage stage;
42 };
43
44 static bool
45 visit_cf_list(struct exec_list *list, struct divergence_state *state);
46
47 static bool
48 visit_alu(nir_alu_instr *instr)
49 {
50 if (instr->dest.dest.ssa.divergent)
51 return false;
52
53 unsigned num_src = nir_op_infos[instr->op].num_inputs;
54
55 for (unsigned i = 0; i < num_src; i++) {
56 if (instr->src[i].src.ssa->divergent) {
57 instr->dest.dest.ssa.divergent = true;
58 return true;
59 }
60 }
61
62 return false;
63 }
64
65 static bool
66 visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
67 {
68 if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
69 return false;
70
71 if (instr->dest.ssa.divergent)
72 return false;
73
74 nir_divergence_options options = state->options;
75 gl_shader_stage stage = state->stage;
76 bool is_divergent = false;
77 switch (instr->intrinsic) {
78 /* Intrinsics which are always uniform */
79 case nir_intrinsic_shader_clock:
80 case nir_intrinsic_ballot:
81 case nir_intrinsic_read_invocation:
82 case nir_intrinsic_read_first_invocation:
83 case nir_intrinsic_vote_any:
84 case nir_intrinsic_vote_all:
85 case nir_intrinsic_vote_feq:
86 case nir_intrinsic_vote_ieq:
87 case nir_intrinsic_load_work_dim:
88 case nir_intrinsic_load_work_group_id:
89 case nir_intrinsic_load_num_work_groups:
90 case nir_intrinsic_load_local_group_size:
91 case nir_intrinsic_load_subgroup_id:
92 case nir_intrinsic_load_num_subgroups:
93 case nir_intrinsic_load_subgroup_size:
94 case nir_intrinsic_load_subgroup_eq_mask:
95 case nir_intrinsic_load_subgroup_ge_mask:
96 case nir_intrinsic_load_subgroup_gt_mask:
97 case nir_intrinsic_load_subgroup_le_mask:
98 case nir_intrinsic_load_subgroup_lt_mask:
99 case nir_intrinsic_first_invocation:
100 case nir_intrinsic_load_base_instance:
101 case nir_intrinsic_load_base_vertex:
102 case nir_intrinsic_load_first_vertex:
103 case nir_intrinsic_load_draw_id:
104 case nir_intrinsic_load_is_indexed_draw:
105 case nir_intrinsic_load_viewport_scale:
106 case nir_intrinsic_load_alpha_ref_float:
107 case nir_intrinsic_load_user_clip_plane:
108 case nir_intrinsic_load_viewport_x_scale:
109 case nir_intrinsic_load_viewport_y_scale:
110 case nir_intrinsic_load_viewport_z_scale:
111 case nir_intrinsic_load_viewport_offset:
112 case nir_intrinsic_load_viewport_z_offset:
113 case nir_intrinsic_load_blend_const_color_a_float:
114 case nir_intrinsic_load_blend_const_color_b_float:
115 case nir_intrinsic_load_blend_const_color_g_float:
116 case nir_intrinsic_load_blend_const_color_r_float:
117 case nir_intrinsic_load_blend_const_color_rgba:
118 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
119 case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
120 is_divergent = false;
121 break;
122
123 /* Intrinsics with divergence depending on shader stage and hardware */
124 case nir_intrinsic_load_input:
125 is_divergent = instr->src[0].ssa->divergent;
126 if (stage == MESA_SHADER_FRAGMENT)
127 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
128 else if (stage == MESA_SHADER_TESS_EVAL)
129 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
130 else
131 is_divergent = true;
132 break;
133 case nir_intrinsic_load_input_vertex:
134 is_divergent = instr->src[1].ssa->divergent;
135 assert(stage == MESA_SHADER_FRAGMENT);
136 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
137 break;
138 case nir_intrinsic_load_output:
139 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
140 is_divergent = instr->src[0].ssa->divergent;
141 if (stage == MESA_SHADER_TESS_CTRL)
142 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
143 else
144 is_divergent = true;
145 break;
146 case nir_intrinsic_load_layer_id:
147 case nir_intrinsic_load_front_face:
148 assert(stage == MESA_SHADER_FRAGMENT);
149 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
150 break;
151 case nir_intrinsic_load_view_index:
152 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
153 if (options & nir_divergence_view_index_uniform)
154 is_divergent = false;
155 else if (stage == MESA_SHADER_FRAGMENT)
156 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
157 break;
158 case nir_intrinsic_load_fs_input_interp_deltas:
159 assert(stage == MESA_SHADER_FRAGMENT);
160 is_divergent = instr->src[0].ssa->divergent;
161 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
162 break;
163 case nir_intrinsic_load_primitive_id:
164 if (stage == MESA_SHADER_FRAGMENT)
165 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
166 else if (stage == MESA_SHADER_TESS_CTRL)
167 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
168 else if (stage == MESA_SHADER_TESS_EVAL)
169 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
170 else if (stage == MESA_SHADER_GEOMETRY)
171 is_divergent = true;
172 else
173 unreachable("Invalid stage for load_primitive_id");
174 break;
175 case nir_intrinsic_load_tess_level_inner:
176 case nir_intrinsic_load_tess_level_outer:
177 if (stage == MESA_SHADER_TESS_CTRL)
178 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
179 else if (stage == MESA_SHADER_TESS_EVAL)
180 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
181 else
182 unreachable("Invalid stage for load_primitive_tess_level_*");
183 break;
184 case nir_intrinsic_load_patch_vertices_in:
185 if (stage == MESA_SHADER_TESS_EVAL)
186 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
187 else
188 assert(stage == MESA_SHADER_TESS_CTRL);
189 break;
190
191 /* Clustered reductions are uniform if cluster_size == subgroup_size or
192 * the source is uniform and the operation is invariant.
193 * Inclusive scans are uniform if
194 * the source is uniform and the operation is invariant
195 */
196 case nir_intrinsic_reduce:
197 if (nir_intrinsic_cluster_size(instr) == 0)
198 return false;
199 /* fallthrough */
200 case nir_intrinsic_inclusive_scan: {
201 nir_op op = nir_intrinsic_reduction_op(instr);
202 is_divergent = instr->src[0].ssa->divergent;
203 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
204 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
205 op != nir_op_iand && op != nir_op_ior)
206 is_divergent = true;
207 break;
208 }
209
210 /* Intrinsics with divergence depending on sources */
211 case nir_intrinsic_ballot_bitfield_extract:
212 case nir_intrinsic_ballot_find_lsb:
213 case nir_intrinsic_ballot_find_msb:
214 case nir_intrinsic_ballot_bit_count_reduce:
215 case nir_intrinsic_shuffle_xor:
216 case nir_intrinsic_shuffle_up:
217 case nir_intrinsic_shuffle_down:
218 case nir_intrinsic_quad_broadcast:
219 case nir_intrinsic_quad_swap_horizontal:
220 case nir_intrinsic_quad_swap_vertical:
221 case nir_intrinsic_quad_swap_diagonal:
222 case nir_intrinsic_load_deref:
223 case nir_intrinsic_load_ubo:
224 case nir_intrinsic_load_ssbo:
225 case nir_intrinsic_load_shared:
226 case nir_intrinsic_load_global:
227 case nir_intrinsic_load_uniform:
228 case nir_intrinsic_load_push_constant:
229 case nir_intrinsic_load_constant:
230 case nir_intrinsic_load_sample_pos_from_id:
231 case nir_intrinsic_load_kernel_input:
232 case nir_intrinsic_image_load:
233 case nir_intrinsic_image_deref_load:
234 case nir_intrinsic_bindless_image_load:
235 case nir_intrinsic_image_samples:
236 case nir_intrinsic_image_deref_samples:
237 case nir_intrinsic_bindless_image_samples:
238 case nir_intrinsic_get_buffer_size:
239 case nir_intrinsic_image_size:
240 case nir_intrinsic_image_deref_size:
241 case nir_intrinsic_bindless_image_size:
242 case nir_intrinsic_copy_deref:
243 case nir_intrinsic_deref_buffer_array_length:
244 case nir_intrinsic_vulkan_resource_index:
245 case nir_intrinsic_vulkan_resource_reindex:
246 case nir_intrinsic_load_vulkan_descriptor:
247 case nir_intrinsic_atomic_counter_read:
248 case nir_intrinsic_atomic_counter_read_deref:
249 case nir_intrinsic_quad_swizzle_amd:
250 case nir_intrinsic_masked_swizzle_amd: {
251 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
252 for (unsigned i = 0; i < num_srcs; i++) {
253 if (instr->src[i].ssa->divergent) {
254 is_divergent = true;
255 break;
256 }
257 }
258 break;
259 }
260
261 case nir_intrinsic_shuffle:
262 is_divergent = instr->src[0].ssa->divergent &&
263 instr->src[1].ssa->divergent;
264 break;
265
266 /* Intrinsics which are always divergent */
267 case nir_intrinsic_load_color0:
268 case nir_intrinsic_load_color1:
269 case nir_intrinsic_load_param:
270 case nir_intrinsic_load_sample_id:
271 case nir_intrinsic_load_sample_id_no_per_sample:
272 case nir_intrinsic_load_sample_mask_in:
273 case nir_intrinsic_load_interpolated_input:
274 case nir_intrinsic_load_barycentric_pixel:
275 case nir_intrinsic_load_barycentric_centroid:
276 case nir_intrinsic_load_barycentric_sample:
277 case nir_intrinsic_load_barycentric_model:
278 case nir_intrinsic_load_barycentric_at_sample:
279 case nir_intrinsic_load_barycentric_at_offset:
280 case nir_intrinsic_interp_deref_at_offset:
281 case nir_intrinsic_interp_deref_at_sample:
282 case nir_intrinsic_interp_deref_at_centroid:
283 case nir_intrinsic_interp_deref_at_vertex:
284 case nir_intrinsic_load_tess_coord:
285 case nir_intrinsic_load_point_coord:
286 case nir_intrinsic_load_frag_coord:
287 case nir_intrinsic_load_sample_pos:
288 case nir_intrinsic_load_vertex_id_zero_base:
289 case nir_intrinsic_load_vertex_id:
290 case nir_intrinsic_load_per_vertex_input:
291 case nir_intrinsic_load_per_vertex_output:
292 case nir_intrinsic_load_instance_id:
293 case nir_intrinsic_load_invocation_id:
294 case nir_intrinsic_load_local_invocation_id:
295 case nir_intrinsic_load_local_invocation_index:
296 case nir_intrinsic_load_global_invocation_id:
297 case nir_intrinsic_load_global_invocation_index:
298 case nir_intrinsic_load_subgroup_invocation:
299 case nir_intrinsic_load_helper_invocation:
300 case nir_intrinsic_is_helper_invocation:
301 case nir_intrinsic_load_scratch:
302 case nir_intrinsic_deref_atomic_add:
303 case nir_intrinsic_deref_atomic_imin:
304 case nir_intrinsic_deref_atomic_umin:
305 case nir_intrinsic_deref_atomic_imax:
306 case nir_intrinsic_deref_atomic_umax:
307 case nir_intrinsic_deref_atomic_and:
308 case nir_intrinsic_deref_atomic_or:
309 case nir_intrinsic_deref_atomic_xor:
310 case nir_intrinsic_deref_atomic_exchange:
311 case nir_intrinsic_deref_atomic_comp_swap:
312 case nir_intrinsic_deref_atomic_fadd:
313 case nir_intrinsic_deref_atomic_fmin:
314 case nir_intrinsic_deref_atomic_fmax:
315 case nir_intrinsic_deref_atomic_fcomp_swap:
316 case nir_intrinsic_ssbo_atomic_add:
317 case nir_intrinsic_ssbo_atomic_imin:
318 case nir_intrinsic_ssbo_atomic_umin:
319 case nir_intrinsic_ssbo_atomic_imax:
320 case nir_intrinsic_ssbo_atomic_umax:
321 case nir_intrinsic_ssbo_atomic_and:
322 case nir_intrinsic_ssbo_atomic_or:
323 case nir_intrinsic_ssbo_atomic_xor:
324 case nir_intrinsic_ssbo_atomic_exchange:
325 case nir_intrinsic_ssbo_atomic_comp_swap:
326 case nir_intrinsic_ssbo_atomic_fadd:
327 case nir_intrinsic_ssbo_atomic_fmax:
328 case nir_intrinsic_ssbo_atomic_fmin:
329 case nir_intrinsic_ssbo_atomic_fcomp_swap:
330 case nir_intrinsic_image_deref_atomic_add:
331 case nir_intrinsic_image_deref_atomic_imin:
332 case nir_intrinsic_image_deref_atomic_umin:
333 case nir_intrinsic_image_deref_atomic_imax:
334 case nir_intrinsic_image_deref_atomic_umax:
335 case nir_intrinsic_image_deref_atomic_and:
336 case nir_intrinsic_image_deref_atomic_or:
337 case nir_intrinsic_image_deref_atomic_xor:
338 case nir_intrinsic_image_deref_atomic_exchange:
339 case nir_intrinsic_image_deref_atomic_comp_swap:
340 case nir_intrinsic_image_deref_atomic_fadd:
341 case nir_intrinsic_image_atomic_add:
342 case nir_intrinsic_image_atomic_imin:
343 case nir_intrinsic_image_atomic_umin:
344 case nir_intrinsic_image_atomic_imax:
345 case nir_intrinsic_image_atomic_umax:
346 case nir_intrinsic_image_atomic_and:
347 case nir_intrinsic_image_atomic_or:
348 case nir_intrinsic_image_atomic_xor:
349 case nir_intrinsic_image_atomic_exchange:
350 case nir_intrinsic_image_atomic_comp_swap:
351 case nir_intrinsic_image_atomic_fadd:
352 case nir_intrinsic_bindless_image_atomic_add:
353 case nir_intrinsic_bindless_image_atomic_imin:
354 case nir_intrinsic_bindless_image_atomic_umin:
355 case nir_intrinsic_bindless_image_atomic_imax:
356 case nir_intrinsic_bindless_image_atomic_umax:
357 case nir_intrinsic_bindless_image_atomic_and:
358 case nir_intrinsic_bindless_image_atomic_or:
359 case nir_intrinsic_bindless_image_atomic_xor:
360 case nir_intrinsic_bindless_image_atomic_exchange:
361 case nir_intrinsic_bindless_image_atomic_comp_swap:
362 case nir_intrinsic_bindless_image_atomic_fadd:
363 case nir_intrinsic_shared_atomic_add:
364 case nir_intrinsic_shared_atomic_imin:
365 case nir_intrinsic_shared_atomic_umin:
366 case nir_intrinsic_shared_atomic_imax:
367 case nir_intrinsic_shared_atomic_umax:
368 case nir_intrinsic_shared_atomic_and:
369 case nir_intrinsic_shared_atomic_or:
370 case nir_intrinsic_shared_atomic_xor:
371 case nir_intrinsic_shared_atomic_exchange:
372 case nir_intrinsic_shared_atomic_comp_swap:
373 case nir_intrinsic_shared_atomic_fadd:
374 case nir_intrinsic_shared_atomic_fmin:
375 case nir_intrinsic_shared_atomic_fmax:
376 case nir_intrinsic_shared_atomic_fcomp_swap:
377 case nir_intrinsic_global_atomic_add:
378 case nir_intrinsic_global_atomic_imin:
379 case nir_intrinsic_global_atomic_umin:
380 case nir_intrinsic_global_atomic_imax:
381 case nir_intrinsic_global_atomic_umax:
382 case nir_intrinsic_global_atomic_and:
383 case nir_intrinsic_global_atomic_or:
384 case nir_intrinsic_global_atomic_xor:
385 case nir_intrinsic_global_atomic_exchange:
386 case nir_intrinsic_global_atomic_comp_swap:
387 case nir_intrinsic_global_atomic_fadd:
388 case nir_intrinsic_global_atomic_fmin:
389 case nir_intrinsic_global_atomic_fmax:
390 case nir_intrinsic_global_atomic_fcomp_swap:
391 case nir_intrinsic_atomic_counter_add:
392 case nir_intrinsic_atomic_counter_min:
393 case nir_intrinsic_atomic_counter_max:
394 case nir_intrinsic_atomic_counter_and:
395 case nir_intrinsic_atomic_counter_or:
396 case nir_intrinsic_atomic_counter_xor:
397 case nir_intrinsic_atomic_counter_inc:
398 case nir_intrinsic_atomic_counter_pre_dec:
399 case nir_intrinsic_atomic_counter_post_dec:
400 case nir_intrinsic_atomic_counter_exchange:
401 case nir_intrinsic_atomic_counter_comp_swap:
402 case nir_intrinsic_atomic_counter_add_deref:
403 case nir_intrinsic_atomic_counter_min_deref:
404 case nir_intrinsic_atomic_counter_max_deref:
405 case nir_intrinsic_atomic_counter_and_deref:
406 case nir_intrinsic_atomic_counter_or_deref:
407 case nir_intrinsic_atomic_counter_xor_deref:
408 case nir_intrinsic_atomic_counter_inc_deref:
409 case nir_intrinsic_atomic_counter_pre_dec_deref:
410 case nir_intrinsic_atomic_counter_post_dec_deref:
411 case nir_intrinsic_atomic_counter_exchange_deref:
412 case nir_intrinsic_atomic_counter_comp_swap_deref:
413 case nir_intrinsic_exclusive_scan:
414 case nir_intrinsic_ballot_bit_count_exclusive:
415 case nir_intrinsic_ballot_bit_count_inclusive:
416 case nir_intrinsic_write_invocation_amd:
417 case nir_intrinsic_mbcnt_amd:
418 case nir_intrinsic_elect:
419 is_divergent = true;
420 break;
421
422 default:
423 #ifdef NDEBUG
424 is_divergent = true;
425 break;
426 #else
427 nir_print_instr(&instr->instr, stderr);
428 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
429 #endif
430 }
431
432 instr->dest.ssa.divergent = is_divergent;
433 return is_divergent;
434 }
435
436 static bool
437 visit_tex(nir_tex_instr *instr)
438 {
439 if (instr->dest.ssa.divergent)
440 return false;
441
442 bool is_divergent = false;
443
444 for (unsigned i = 0; i < instr->num_srcs; i++) {
445 switch (instr->src[i].src_type) {
446 case nir_tex_src_sampler_deref:
447 case nir_tex_src_sampler_handle:
448 case nir_tex_src_sampler_offset:
449 is_divergent |= instr->src[i].src.ssa->divergent &&
450 instr->sampler_non_uniform;
451 break;
452 case nir_tex_src_texture_deref:
453 case nir_tex_src_texture_handle:
454 case nir_tex_src_texture_offset:
455 is_divergent |= instr->src[i].src.ssa->divergent &&
456 instr->texture_non_uniform;
457 break;
458 default:
459 is_divergent |= instr->src[i].src.ssa->divergent;
460 break;
461 }
462 }
463
464 instr->dest.ssa.divergent = is_divergent;
465 return is_divergent;
466 }
467
468 static bool
469 visit_load_const(nir_load_const_instr *instr)
470 {
471 return false;
472 }
473
474 static bool
475 visit_ssa_undef(nir_ssa_undef_instr *instr)
476 {
477 return false;
478 }
479
480 static bool
481 nir_variable_mode_is_uniform(nir_variable_mode mode) {
482 switch (mode) {
483 case nir_var_uniform:
484 case nir_var_mem_ubo:
485 case nir_var_mem_ssbo:
486 case nir_var_mem_shared:
487 case nir_var_mem_global:
488 return true;
489 default:
490 return false;
491 }
492 }
493
494 static bool
495 nir_variable_is_uniform(nir_variable *var, struct divergence_state *state)
496 {
497 if (nir_variable_mode_is_uniform(var->data.mode))
498 return true;
499
500 if (state->stage == MESA_SHADER_FRAGMENT &&
501 (state->options & nir_divergence_single_prim_per_subgroup) &&
502 var->data.mode == nir_var_shader_in &&
503 var->data.interpolation == INTERP_MODE_FLAT)
504 return true;
505
506 if (state->stage == MESA_SHADER_TESS_CTRL &&
507 (state->options & nir_divergence_single_patch_per_tcs_subgroup) &&
508 var->data.mode == nir_var_shader_out && var->data.patch)
509 return true;
510
511 if (state->stage == MESA_SHADER_TESS_EVAL &&
512 (state->options & nir_divergence_single_patch_per_tes_subgroup) &&
513 var->data.mode == nir_var_shader_in && var->data.patch)
514 return true;
515
516 return false;
517 }
518
519 static bool
520 visit_deref(nir_deref_instr *deref, struct divergence_state *state)
521 {
522 if (deref->dest.ssa.divergent)
523 return false;
524
525 bool is_divergent = false;
526 switch (deref->deref_type) {
527 case nir_deref_type_var:
528 is_divergent = !nir_variable_is_uniform(deref->var, state);
529 break;
530 case nir_deref_type_array:
531 case nir_deref_type_ptr_as_array:
532 is_divergent = deref->arr.index.ssa->divergent;
533 /* fallthrough */
534 case nir_deref_type_struct:
535 case nir_deref_type_array_wildcard:
536 is_divergent |= deref->parent.ssa->divergent;
537 break;
538 case nir_deref_type_cast:
539 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
540 deref->parent.ssa->divergent;
541 break;
542 }
543
544 deref->dest.ssa.divergent = is_divergent;
545 return is_divergent;
546 }
547
548 static bool
549 visit_block(nir_block *block, struct divergence_state *state)
550 {
551 bool has_changed = false;
552
553 nir_foreach_instr(instr, block) {
554 switch (instr->type) {
555 case nir_instr_type_alu:
556 has_changed |= visit_alu(nir_instr_as_alu(instr));
557 break;
558 case nir_instr_type_intrinsic:
559 has_changed |= visit_intrinsic(nir_instr_as_intrinsic(instr), state);
560 break;
561 case nir_instr_type_tex:
562 has_changed |= visit_tex(nir_instr_as_tex(instr));
563 break;
564 case nir_instr_type_load_const:
565 has_changed |= visit_load_const(nir_instr_as_load_const(instr));
566 break;
567 case nir_instr_type_ssa_undef:
568 has_changed |= visit_ssa_undef(nir_instr_as_ssa_undef(instr));
569 break;
570 case nir_instr_type_deref:
571 has_changed |= visit_deref(nir_instr_as_deref(instr), state);
572 break;
573 /* phis are handled when processing the branches */
574 case nir_instr_type_phi:
575 break;
576 case nir_instr_type_jump:
577 break;
578 case nir_instr_type_call:
579 case nir_instr_type_parallel_copy:
580 unreachable("NIR divergence analysis: Unsupported instruction type.");
581 }
582 }
583
584 return has_changed;
585 }
586
587 /* There are 3 types of phi instructions:
588 * (1) gamma: represent the joining point of different paths
589 * created by an “if-then-else” branch.
590 * The resulting value is divergent if the branch condition
591 * or any of the source values is divergent. */
592 static bool
593 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
594 {
595 if (phi->dest.ssa.divergent)
596 return false;
597
598 unsigned defined_srcs = 0;
599 nir_foreach_phi_src(src, phi) {
600 /* if any source value is divergent, the resulting value is divergent */
601 if (src->src.ssa->divergent) {
602 phi->dest.ssa.divergent = true;
603 return true;
604 }
605 if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
606 defined_srcs++;
607 }
608 }
609
610 /* if the condition is divergent and two sources defined, the definition is divergent */
611 if (defined_srcs > 1 && if_cond_divergent) {
612 phi->dest.ssa.divergent = true;
613 return true;
614 }
615 return false;
616 }
617
618 /* There are 3 types of phi instructions:
619 * (2) mu: which only exist at loop headers,
620 * merge initial and loop-carried values.
621 * The resulting value is divergent if any source value
622 * is divergent or a divergent loop continue condition
623 * is associated with a different ssa-def. */
624 static bool
625 visit_loop_header_phi(nir_phi_instr *phi, nir_loop *loop)
626 {
627 if (phi->dest.ssa.divergent)
628 return false;
629
630 nir_cf_node *prev = nir_cf_node_prev(&loop->cf_node);
631 nir_ssa_def* same = NULL;
632 bool all_same = true;
633
634 /* first, check if all loop-carried values are from the same ssa-def */
635 nir_foreach_phi_src(src, phi) {
636 /* if any source value is divergent, the resulting value is divergent */
637 if (src->src.ssa->divergent) {
638 phi->dest.ssa.divergent = true;
639 return true;
640 }
641 /* skip the loop preheader */
642 if (src->pred == nir_cf_node_as_block(prev))
643 continue;
644 if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef)
645 continue;
646 if (!same)
647 same = src->src.ssa;
648 else if (same != src->src.ssa)
649 all_same = false;
650 }
651
652 /* if all loop-carried values are the same, the resulting value is uniform */
653 if (all_same)
654 return false;
655
656 /* check if the loop-carried values come from different ssa-defs
657 * and the corresponding condition is divergent. */
658 nir_foreach_phi_src(src, phi) {
659 /* skip the loop preheader */
660 if (src->pred == nir_cf_node_as_block(prev))
661 continue;
662
663 /* skip the unconditional back-edge */
664 if (src->pred == nir_loop_last_block(loop))
665 continue;
666
667 /* if the value is undef, we don't need to check the condition */
668 if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef)
669 continue;
670
671 nir_cf_node *current = src->pred->cf_node.parent;
672 /* check recursively the conditions if any is divergent */
673 while (current->type != nir_cf_node_loop) {
674 assert (current->type == nir_cf_node_if);
675 nir_if *if_node = nir_cf_node_as_if(current);
676 if (if_node->condition.ssa->divergent) {
677 phi->dest.ssa.divergent = true;
678 return true;
679 }
680 current = current->parent;
681 }
682 assert(current == &loop->cf_node);
683 }
684
685 return false;
686 }
687
688 /* There are 3 types of phi instructions:
689 * (3) eta: represent values that leave a loop.
690 * The resulting value is divergent if the source value is divergent
691 * or any loop exit condition is divergent for a value which is
692 * not loop-invariant.
693 * (note: there should be no phi for loop-invariant variables.) */
694 static bool
695 visit_loop_exit_phi(nir_phi_instr *phi, nir_loop *loop)
696 {
697 if (phi->dest.ssa.divergent)
698 return false;
699
700 /* Check if any loop exit condition is divergent:
701 * That is any break happens under divergent condition or
702 * a break is preceeded by a divergent continue
703 */
704 nir_foreach_phi_src(src, phi) {
705 /* if any source value is divergent, the resulting value is divergent */
706 if (src->src.ssa->divergent) {
707 phi->dest.ssa.divergent = true;
708 return true;
709 }
710
711 nir_cf_node *current = src->pred->cf_node.parent;
712
713 /* check recursively the conditions if any is divergent */
714 while (current->type != nir_cf_node_loop) {
715 assert(current->type == nir_cf_node_if);
716 nir_if *if_node = nir_cf_node_as_if(current);
717 if (if_node->condition.ssa->divergent) {
718 phi->dest.ssa.divergent = true;
719 return true;
720 }
721 current = current->parent;
722 }
723
724 /* check if any divergent continue happened before the break */
725 nir_foreach_block_in_cf_node(block, &loop->cf_node) {
726 if (block == src->pred)
727 break;
728 if (!nir_block_ends_in_jump(block))
729 continue;
730
731 nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block));
732 if (jump->type != nir_jump_continue)
733 continue;
734
735 current = block->cf_node.parent;
736 bool is_divergent = false;
737 while (current != &loop->cf_node) {
738 /* the continue belongs to an inner loop */
739 if (current->type == nir_cf_node_loop) {
740 is_divergent = false;
741 break;
742 }
743 assert(current->type == nir_cf_node_if);
744 nir_if *if_node = nir_cf_node_as_if(current);
745 is_divergent |= if_node->condition.ssa->divergent;
746 current = current->parent;
747 }
748
749 if (is_divergent) {
750 phi->dest.ssa.divergent = true;
751 return true;
752 }
753 }
754 }
755 return false;
756 }
757
758 static bool
759 visit_if(nir_if *if_stmt, struct divergence_state *state)
760 {
761 bool progress = visit_cf_list(&if_stmt->then_list, state) |
762 visit_cf_list(&if_stmt->else_list, state);
763
764 /* handle phis after the IF */
765 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
766 if (instr->type != nir_instr_type_phi)
767 break;
768 progress |= visit_if_merge_phi(nir_instr_as_phi(instr), if_stmt->condition.ssa->divergent);
769 }
770
771 return progress;
772 }
773
774 static bool
775 visit_loop(nir_loop *loop, struct divergence_state *state)
776 {
777 bool progress = false;
778
779 /* handle loop header phis first */
780 nir_foreach_instr(instr, nir_loop_first_block(loop)) {
781 if (instr->type != nir_instr_type_phi)
782 break;
783 progress |= visit_loop_header_phi(nir_instr_as_phi(instr), loop);
784 }
785
786 bool repeat = true;
787 while (repeat) {
788 /* process loop body */
789 repeat = visit_cf_list(&loop->body, state);
790
791 if (repeat) {
792 repeat = false;
793 /* revisit loop header phis to see if something has changed */
794 nir_foreach_instr(instr, nir_loop_first_block(loop)) {
795 if (instr->type != nir_instr_type_phi)
796 break;
797 repeat |= visit_loop_header_phi(nir_instr_as_phi(instr), loop);
798 }
799 progress = true;
800 }
801 }
802
803 /* handle phis after the loop */
804 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
805 if (instr->type != nir_instr_type_phi)
806 break;
807 progress |= visit_loop_exit_phi(nir_instr_as_phi(instr), loop);
808 }
809
810 return progress;
811 }
812
813 static bool
814 visit_cf_list(struct exec_list *list, struct divergence_state *state)
815 {
816 bool has_changed = false;
817
818 foreach_list_typed(nir_cf_node, node, node, list) {
819 switch (node->type) {
820 case nir_cf_node_block:
821 has_changed |= visit_block(nir_cf_node_as_block(node), state);
822 break;
823 case nir_cf_node_if:
824 has_changed |= visit_if(nir_cf_node_as_if(node), state);
825 break;
826 case nir_cf_node_loop:
827 has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
828 break;
829 case nir_cf_node_function:
830 unreachable("NIR divergence analysis: Unsupported cf_node type.");
831 }
832 }
833
834 return has_changed;
835 }
836
837 static bool
838 set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
839 {
840 def->divergent = false;
841 return true;
842 }
843
844 void
845 nir_divergence_analysis(nir_shader *shader, nir_divergence_options options)
846 {
847 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
848
849 /* Set all SSA defs to non-divergent to start off */
850 nir_foreach_block(block, impl) {
851 nir_foreach_instr(instr, block)
852 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
853 }
854
855 struct divergence_state state = {
856 .options = options,
857 .stage = shader->info.stage,
858 };
859
860 visit_cf_list(&impl->body, &state);
861 }
862