nir: Add a nir_metadata_all enum value
[mesa.git] / src / compiler / nir / nir_divergence_analysis.c
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "nir.h"
26
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39 struct divergence_state {
40 const nir_divergence_options options;
41 const gl_shader_stage stage;
42
43 /** current control flow state */
44 /* True if some loop-active invocations might take a different control-flow path.
45 * A divergent break does not cause subsequent control-flow to be considered
46 * divergent because those invocations are no longer active in the loop.
47 * For a divergent if, both sides are considered divergent flow because
48 * the other side is still loop-active. */
49 bool divergent_loop_cf;
50 /* True if a divergent continue happened since the loop header */
51 bool divergent_loop_continue;
52 /* True if a divergent break happened since the loop header */
53 bool divergent_loop_break;
54
55 /* True if we visit the block for the fist time */
56 bool first_visit;
57 };
58
59 static bool
60 visit_cf_list(struct exec_list *list, struct divergence_state *state);
61
62 static bool
63 visit_alu(nir_alu_instr *instr)
64 {
65 if (instr->dest.dest.ssa.divergent)
66 return false;
67
68 unsigned num_src = nir_op_infos[instr->op].num_inputs;
69
70 for (unsigned i = 0; i < num_src; i++) {
71 if (instr->src[i].src.ssa->divergent) {
72 instr->dest.dest.ssa.divergent = true;
73 return true;
74 }
75 }
76
77 return false;
78 }
79
80 static bool
81 visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
82 {
83 if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84 return false;
85
86 if (instr->dest.ssa.divergent)
87 return false;
88
89 nir_divergence_options options = state->options;
90 gl_shader_stage stage = state->stage;
91 bool is_divergent = false;
92 switch (instr->intrinsic) {
93 /* Intrinsics which are always uniform */
94 case nir_intrinsic_shader_clock:
95 case nir_intrinsic_ballot:
96 case nir_intrinsic_read_invocation:
97 case nir_intrinsic_read_first_invocation:
98 case nir_intrinsic_vote_any:
99 case nir_intrinsic_vote_all:
100 case nir_intrinsic_vote_feq:
101 case nir_intrinsic_vote_ieq:
102 case nir_intrinsic_load_work_dim:
103 case nir_intrinsic_load_work_group_id:
104 case nir_intrinsic_load_num_work_groups:
105 case nir_intrinsic_load_local_group_size:
106 case nir_intrinsic_load_subgroup_id:
107 case nir_intrinsic_load_num_subgroups:
108 case nir_intrinsic_load_subgroup_size:
109 case nir_intrinsic_load_subgroup_eq_mask:
110 case nir_intrinsic_load_subgroup_ge_mask:
111 case nir_intrinsic_load_subgroup_gt_mask:
112 case nir_intrinsic_load_subgroup_le_mask:
113 case nir_intrinsic_load_subgroup_lt_mask:
114 case nir_intrinsic_first_invocation:
115 case nir_intrinsic_load_base_instance:
116 case nir_intrinsic_load_base_vertex:
117 case nir_intrinsic_load_first_vertex:
118 case nir_intrinsic_load_draw_id:
119 case nir_intrinsic_load_is_indexed_draw:
120 case nir_intrinsic_load_viewport_scale:
121 case nir_intrinsic_load_alpha_ref_float:
122 case nir_intrinsic_load_user_clip_plane:
123 case nir_intrinsic_load_viewport_x_scale:
124 case nir_intrinsic_load_viewport_y_scale:
125 case nir_intrinsic_load_viewport_z_scale:
126 case nir_intrinsic_load_viewport_offset:
127 case nir_intrinsic_load_viewport_z_offset:
128 case nir_intrinsic_load_blend_const_color_a_float:
129 case nir_intrinsic_load_blend_const_color_b_float:
130 case nir_intrinsic_load_blend_const_color_g_float:
131 case nir_intrinsic_load_blend_const_color_r_float:
132 case nir_intrinsic_load_blend_const_color_rgba:
133 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
134 case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
135 is_divergent = false;
136 break;
137
138 /* Intrinsics with divergence depending on shader stage and hardware */
139 case nir_intrinsic_load_input:
140 is_divergent = instr->src[0].ssa->divergent;
141 if (stage == MESA_SHADER_FRAGMENT)
142 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
143 else if (stage == MESA_SHADER_TESS_EVAL)
144 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
145 else
146 is_divergent = true;
147 break;
148 case nir_intrinsic_load_input_vertex:
149 is_divergent = instr->src[1].ssa->divergent;
150 assert(stage == MESA_SHADER_FRAGMENT);
151 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
152 break;
153 case nir_intrinsic_load_output:
154 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
155 is_divergent = instr->src[0].ssa->divergent;
156 if (stage == MESA_SHADER_TESS_CTRL)
157 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
158 else
159 is_divergent = true;
160 break;
161 case nir_intrinsic_load_layer_id:
162 case nir_intrinsic_load_front_face:
163 assert(stage == MESA_SHADER_FRAGMENT);
164 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
165 break;
166 case nir_intrinsic_load_view_index:
167 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
168 if (options & nir_divergence_view_index_uniform)
169 is_divergent = false;
170 else if (stage == MESA_SHADER_FRAGMENT)
171 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
172 break;
173 case nir_intrinsic_load_fs_input_interp_deltas:
174 assert(stage == MESA_SHADER_FRAGMENT);
175 is_divergent = instr->src[0].ssa->divergent;
176 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
177 break;
178 case nir_intrinsic_load_primitive_id:
179 if (stage == MESA_SHADER_FRAGMENT)
180 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
181 else if (stage == MESA_SHADER_TESS_CTRL)
182 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
183 else if (stage == MESA_SHADER_TESS_EVAL)
184 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
185 else if (stage == MESA_SHADER_GEOMETRY)
186 is_divergent = true;
187 else
188 unreachable("Invalid stage for load_primitive_id");
189 break;
190 case nir_intrinsic_load_tess_level_inner:
191 case nir_intrinsic_load_tess_level_outer:
192 if (stage == MESA_SHADER_TESS_CTRL)
193 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
194 else if (stage == MESA_SHADER_TESS_EVAL)
195 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
196 else
197 unreachable("Invalid stage for load_primitive_tess_level_*");
198 break;
199 case nir_intrinsic_load_patch_vertices_in:
200 if (stage == MESA_SHADER_TESS_EVAL)
201 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
202 else
203 assert(stage == MESA_SHADER_TESS_CTRL);
204 break;
205
206 /* Clustered reductions are uniform if cluster_size == subgroup_size or
207 * the source is uniform and the operation is invariant.
208 * Inclusive scans are uniform if
209 * the source is uniform and the operation is invariant
210 */
211 case nir_intrinsic_reduce:
212 if (nir_intrinsic_cluster_size(instr) == 0)
213 return false;
214 /* fallthrough */
215 case nir_intrinsic_inclusive_scan: {
216 nir_op op = nir_intrinsic_reduction_op(instr);
217 is_divergent = instr->src[0].ssa->divergent;
218 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
219 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
220 op != nir_op_iand && op != nir_op_ior)
221 is_divergent = true;
222 break;
223 }
224
225 /* Intrinsics with divergence depending on sources */
226 case nir_intrinsic_ballot_bitfield_extract:
227 case nir_intrinsic_ballot_find_lsb:
228 case nir_intrinsic_ballot_find_msb:
229 case nir_intrinsic_ballot_bit_count_reduce:
230 case nir_intrinsic_shuffle_xor:
231 case nir_intrinsic_shuffle_up:
232 case nir_intrinsic_shuffle_down:
233 case nir_intrinsic_quad_broadcast:
234 case nir_intrinsic_quad_swap_horizontal:
235 case nir_intrinsic_quad_swap_vertical:
236 case nir_intrinsic_quad_swap_diagonal:
237 case nir_intrinsic_load_deref:
238 case nir_intrinsic_load_ubo:
239 case nir_intrinsic_load_ssbo:
240 case nir_intrinsic_load_shared:
241 case nir_intrinsic_load_global:
242 case nir_intrinsic_load_uniform:
243 case nir_intrinsic_load_push_constant:
244 case nir_intrinsic_load_constant:
245 case nir_intrinsic_load_sample_pos_from_id:
246 case nir_intrinsic_load_kernel_input:
247 case nir_intrinsic_image_load:
248 case nir_intrinsic_image_deref_load:
249 case nir_intrinsic_bindless_image_load:
250 case nir_intrinsic_image_samples:
251 case nir_intrinsic_image_deref_samples:
252 case nir_intrinsic_bindless_image_samples:
253 case nir_intrinsic_get_buffer_size:
254 case nir_intrinsic_image_size:
255 case nir_intrinsic_image_deref_size:
256 case nir_intrinsic_bindless_image_size:
257 case nir_intrinsic_copy_deref:
258 case nir_intrinsic_deref_buffer_array_length:
259 case nir_intrinsic_vulkan_resource_index:
260 case nir_intrinsic_vulkan_resource_reindex:
261 case nir_intrinsic_load_vulkan_descriptor:
262 case nir_intrinsic_atomic_counter_read:
263 case nir_intrinsic_atomic_counter_read_deref:
264 case nir_intrinsic_quad_swizzle_amd:
265 case nir_intrinsic_masked_swizzle_amd: {
266 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
267 for (unsigned i = 0; i < num_srcs; i++) {
268 if (instr->src[i].ssa->divergent) {
269 is_divergent = true;
270 break;
271 }
272 }
273 break;
274 }
275
276 case nir_intrinsic_shuffle:
277 is_divergent = instr->src[0].ssa->divergent &&
278 instr->src[1].ssa->divergent;
279 break;
280
281 /* Intrinsics which are always divergent */
282 case nir_intrinsic_load_color0:
283 case nir_intrinsic_load_color1:
284 case nir_intrinsic_load_param:
285 case nir_intrinsic_load_sample_id:
286 case nir_intrinsic_load_sample_id_no_per_sample:
287 case nir_intrinsic_load_sample_mask_in:
288 case nir_intrinsic_load_interpolated_input:
289 case nir_intrinsic_load_barycentric_pixel:
290 case nir_intrinsic_load_barycentric_centroid:
291 case nir_intrinsic_load_barycentric_sample:
292 case nir_intrinsic_load_barycentric_model:
293 case nir_intrinsic_load_barycentric_at_sample:
294 case nir_intrinsic_load_barycentric_at_offset:
295 case nir_intrinsic_interp_deref_at_offset:
296 case nir_intrinsic_interp_deref_at_sample:
297 case nir_intrinsic_interp_deref_at_centroid:
298 case nir_intrinsic_interp_deref_at_vertex:
299 case nir_intrinsic_load_tess_coord:
300 case nir_intrinsic_load_point_coord:
301 case nir_intrinsic_load_frag_coord:
302 case nir_intrinsic_load_sample_pos:
303 case nir_intrinsic_load_vertex_id_zero_base:
304 case nir_intrinsic_load_vertex_id:
305 case nir_intrinsic_load_per_vertex_input:
306 case nir_intrinsic_load_per_vertex_output:
307 case nir_intrinsic_load_instance_id:
308 case nir_intrinsic_load_invocation_id:
309 case nir_intrinsic_load_local_invocation_id:
310 case nir_intrinsic_load_local_invocation_index:
311 case nir_intrinsic_load_global_invocation_id:
312 case nir_intrinsic_load_global_invocation_index:
313 case nir_intrinsic_load_subgroup_invocation:
314 case nir_intrinsic_load_helper_invocation:
315 case nir_intrinsic_is_helper_invocation:
316 case nir_intrinsic_load_scratch:
317 case nir_intrinsic_deref_atomic_add:
318 case nir_intrinsic_deref_atomic_imin:
319 case nir_intrinsic_deref_atomic_umin:
320 case nir_intrinsic_deref_atomic_imax:
321 case nir_intrinsic_deref_atomic_umax:
322 case nir_intrinsic_deref_atomic_and:
323 case nir_intrinsic_deref_atomic_or:
324 case nir_intrinsic_deref_atomic_xor:
325 case nir_intrinsic_deref_atomic_exchange:
326 case nir_intrinsic_deref_atomic_comp_swap:
327 case nir_intrinsic_deref_atomic_fadd:
328 case nir_intrinsic_deref_atomic_fmin:
329 case nir_intrinsic_deref_atomic_fmax:
330 case nir_intrinsic_deref_atomic_fcomp_swap:
331 case nir_intrinsic_ssbo_atomic_add:
332 case nir_intrinsic_ssbo_atomic_imin:
333 case nir_intrinsic_ssbo_atomic_umin:
334 case nir_intrinsic_ssbo_atomic_imax:
335 case nir_intrinsic_ssbo_atomic_umax:
336 case nir_intrinsic_ssbo_atomic_and:
337 case nir_intrinsic_ssbo_atomic_or:
338 case nir_intrinsic_ssbo_atomic_xor:
339 case nir_intrinsic_ssbo_atomic_exchange:
340 case nir_intrinsic_ssbo_atomic_comp_swap:
341 case nir_intrinsic_ssbo_atomic_fadd:
342 case nir_intrinsic_ssbo_atomic_fmax:
343 case nir_intrinsic_ssbo_atomic_fmin:
344 case nir_intrinsic_ssbo_atomic_fcomp_swap:
345 case nir_intrinsic_image_deref_atomic_add:
346 case nir_intrinsic_image_deref_atomic_imin:
347 case nir_intrinsic_image_deref_atomic_umin:
348 case nir_intrinsic_image_deref_atomic_imax:
349 case nir_intrinsic_image_deref_atomic_umax:
350 case nir_intrinsic_image_deref_atomic_and:
351 case nir_intrinsic_image_deref_atomic_or:
352 case nir_intrinsic_image_deref_atomic_xor:
353 case nir_intrinsic_image_deref_atomic_exchange:
354 case nir_intrinsic_image_deref_atomic_comp_swap:
355 case nir_intrinsic_image_deref_atomic_fadd:
356 case nir_intrinsic_image_atomic_add:
357 case nir_intrinsic_image_atomic_imin:
358 case nir_intrinsic_image_atomic_umin:
359 case nir_intrinsic_image_atomic_imax:
360 case nir_intrinsic_image_atomic_umax:
361 case nir_intrinsic_image_atomic_and:
362 case nir_intrinsic_image_atomic_or:
363 case nir_intrinsic_image_atomic_xor:
364 case nir_intrinsic_image_atomic_exchange:
365 case nir_intrinsic_image_atomic_comp_swap:
366 case nir_intrinsic_image_atomic_fadd:
367 case nir_intrinsic_bindless_image_atomic_add:
368 case nir_intrinsic_bindless_image_atomic_imin:
369 case nir_intrinsic_bindless_image_atomic_umin:
370 case nir_intrinsic_bindless_image_atomic_imax:
371 case nir_intrinsic_bindless_image_atomic_umax:
372 case nir_intrinsic_bindless_image_atomic_and:
373 case nir_intrinsic_bindless_image_atomic_or:
374 case nir_intrinsic_bindless_image_atomic_xor:
375 case nir_intrinsic_bindless_image_atomic_exchange:
376 case nir_intrinsic_bindless_image_atomic_comp_swap:
377 case nir_intrinsic_bindless_image_atomic_fadd:
378 case nir_intrinsic_shared_atomic_add:
379 case nir_intrinsic_shared_atomic_imin:
380 case nir_intrinsic_shared_atomic_umin:
381 case nir_intrinsic_shared_atomic_imax:
382 case nir_intrinsic_shared_atomic_umax:
383 case nir_intrinsic_shared_atomic_and:
384 case nir_intrinsic_shared_atomic_or:
385 case nir_intrinsic_shared_atomic_xor:
386 case nir_intrinsic_shared_atomic_exchange:
387 case nir_intrinsic_shared_atomic_comp_swap:
388 case nir_intrinsic_shared_atomic_fadd:
389 case nir_intrinsic_shared_atomic_fmin:
390 case nir_intrinsic_shared_atomic_fmax:
391 case nir_intrinsic_shared_atomic_fcomp_swap:
392 case nir_intrinsic_global_atomic_add:
393 case nir_intrinsic_global_atomic_imin:
394 case nir_intrinsic_global_atomic_umin:
395 case nir_intrinsic_global_atomic_imax:
396 case nir_intrinsic_global_atomic_umax:
397 case nir_intrinsic_global_atomic_and:
398 case nir_intrinsic_global_atomic_or:
399 case nir_intrinsic_global_atomic_xor:
400 case nir_intrinsic_global_atomic_exchange:
401 case nir_intrinsic_global_atomic_comp_swap:
402 case nir_intrinsic_global_atomic_fadd:
403 case nir_intrinsic_global_atomic_fmin:
404 case nir_intrinsic_global_atomic_fmax:
405 case nir_intrinsic_global_atomic_fcomp_swap:
406 case nir_intrinsic_atomic_counter_add:
407 case nir_intrinsic_atomic_counter_min:
408 case nir_intrinsic_atomic_counter_max:
409 case nir_intrinsic_atomic_counter_and:
410 case nir_intrinsic_atomic_counter_or:
411 case nir_intrinsic_atomic_counter_xor:
412 case nir_intrinsic_atomic_counter_inc:
413 case nir_intrinsic_atomic_counter_pre_dec:
414 case nir_intrinsic_atomic_counter_post_dec:
415 case nir_intrinsic_atomic_counter_exchange:
416 case nir_intrinsic_atomic_counter_comp_swap:
417 case nir_intrinsic_atomic_counter_add_deref:
418 case nir_intrinsic_atomic_counter_min_deref:
419 case nir_intrinsic_atomic_counter_max_deref:
420 case nir_intrinsic_atomic_counter_and_deref:
421 case nir_intrinsic_atomic_counter_or_deref:
422 case nir_intrinsic_atomic_counter_xor_deref:
423 case nir_intrinsic_atomic_counter_inc_deref:
424 case nir_intrinsic_atomic_counter_pre_dec_deref:
425 case nir_intrinsic_atomic_counter_post_dec_deref:
426 case nir_intrinsic_atomic_counter_exchange_deref:
427 case nir_intrinsic_atomic_counter_comp_swap_deref:
428 case nir_intrinsic_exclusive_scan:
429 case nir_intrinsic_ballot_bit_count_exclusive:
430 case nir_intrinsic_ballot_bit_count_inclusive:
431 case nir_intrinsic_write_invocation_amd:
432 case nir_intrinsic_mbcnt_amd:
433 case nir_intrinsic_elect:
434 is_divergent = true;
435 break;
436
437 default:
438 #ifdef NDEBUG
439 is_divergent = true;
440 break;
441 #else
442 nir_print_instr(&instr->instr, stderr);
443 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
444 #endif
445 }
446
447 instr->dest.ssa.divergent = is_divergent;
448 return is_divergent;
449 }
450
451 static bool
452 visit_tex(nir_tex_instr *instr)
453 {
454 if (instr->dest.ssa.divergent)
455 return false;
456
457 bool is_divergent = false;
458
459 for (unsigned i = 0; i < instr->num_srcs; i++) {
460 switch (instr->src[i].src_type) {
461 case nir_tex_src_sampler_deref:
462 case nir_tex_src_sampler_handle:
463 case nir_tex_src_sampler_offset:
464 is_divergent |= instr->src[i].src.ssa->divergent &&
465 instr->sampler_non_uniform;
466 break;
467 case nir_tex_src_texture_deref:
468 case nir_tex_src_texture_handle:
469 case nir_tex_src_texture_offset:
470 is_divergent |= instr->src[i].src.ssa->divergent &&
471 instr->texture_non_uniform;
472 break;
473 default:
474 is_divergent |= instr->src[i].src.ssa->divergent;
475 break;
476 }
477 }
478
479 instr->dest.ssa.divergent = is_divergent;
480 return is_divergent;
481 }
482
483 static bool
484 visit_load_const(nir_load_const_instr *instr)
485 {
486 return false;
487 }
488
489 static bool
490 visit_ssa_undef(nir_ssa_undef_instr *instr)
491 {
492 return false;
493 }
494
495 static bool
496 nir_variable_mode_is_uniform(nir_variable_mode mode) {
497 switch (mode) {
498 case nir_var_uniform:
499 case nir_var_mem_ubo:
500 case nir_var_mem_ssbo:
501 case nir_var_mem_shared:
502 case nir_var_mem_global:
503 return true;
504 default:
505 return false;
506 }
507 }
508
509 static bool
510 nir_variable_is_uniform(nir_variable *var, struct divergence_state *state)
511 {
512 if (nir_variable_mode_is_uniform(var->data.mode))
513 return true;
514
515 if (state->stage == MESA_SHADER_FRAGMENT &&
516 (state->options & nir_divergence_single_prim_per_subgroup) &&
517 var->data.mode == nir_var_shader_in &&
518 var->data.interpolation == INTERP_MODE_FLAT)
519 return true;
520
521 if (state->stage == MESA_SHADER_TESS_CTRL &&
522 (state->options & nir_divergence_single_patch_per_tcs_subgroup) &&
523 var->data.mode == nir_var_shader_out && var->data.patch)
524 return true;
525
526 if (state->stage == MESA_SHADER_TESS_EVAL &&
527 (state->options & nir_divergence_single_patch_per_tes_subgroup) &&
528 var->data.mode == nir_var_shader_in && var->data.patch)
529 return true;
530
531 return false;
532 }
533
534 static bool
535 visit_deref(nir_deref_instr *deref, struct divergence_state *state)
536 {
537 if (deref->dest.ssa.divergent)
538 return false;
539
540 bool is_divergent = false;
541 switch (deref->deref_type) {
542 case nir_deref_type_var:
543 is_divergent = !nir_variable_is_uniform(deref->var, state);
544 break;
545 case nir_deref_type_array:
546 case nir_deref_type_ptr_as_array:
547 is_divergent = deref->arr.index.ssa->divergent;
548 /* fallthrough */
549 case nir_deref_type_struct:
550 case nir_deref_type_array_wildcard:
551 is_divergent |= deref->parent.ssa->divergent;
552 break;
553 case nir_deref_type_cast:
554 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
555 deref->parent.ssa->divergent;
556 break;
557 }
558
559 deref->dest.ssa.divergent = is_divergent;
560 return is_divergent;
561 }
562
563 static bool
564 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
565 {
566 switch (jump->type) {
567 case nir_jump_continue:
568 if (state->divergent_loop_continue)
569 return false;
570 if (state->divergent_loop_cf)
571 state->divergent_loop_continue = true;
572 return state->divergent_loop_continue;
573 case nir_jump_break:
574 if (state->divergent_loop_break)
575 return false;
576 if (state->divergent_loop_cf)
577 state->divergent_loop_break = true;
578 return state->divergent_loop_break;
579 case nir_jump_return:
580 unreachable("NIR divergence analysis: Unsupported return instruction.");
581 }
582 return false;
583 }
584
585 static bool
586 set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
587 {
588 def->divergent = false;
589 return true;
590 }
591
592 static bool
593 visit_block(nir_block *block, struct divergence_state *state)
594 {
595 bool has_changed = false;
596
597 nir_foreach_instr(instr, block) {
598 /* phis are handled when processing the branches */
599 if (instr->type == nir_instr_type_phi)
600 continue;
601
602 if (state->first_visit)
603 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
604
605 switch (instr->type) {
606 case nir_instr_type_alu:
607 has_changed |= visit_alu(nir_instr_as_alu(instr));
608 break;
609 case nir_instr_type_intrinsic:
610 has_changed |= visit_intrinsic(nir_instr_as_intrinsic(instr), state);
611 break;
612 case nir_instr_type_tex:
613 has_changed |= visit_tex(nir_instr_as_tex(instr));
614 break;
615 case nir_instr_type_load_const:
616 has_changed |= visit_load_const(nir_instr_as_load_const(instr));
617 break;
618 case nir_instr_type_ssa_undef:
619 has_changed |= visit_ssa_undef(nir_instr_as_ssa_undef(instr));
620 break;
621 case nir_instr_type_deref:
622 has_changed |= visit_deref(nir_instr_as_deref(instr), state);
623 break;
624 case nir_instr_type_jump:
625 has_changed |= visit_jump(nir_instr_as_jump(instr), state);
626 break;
627 case nir_instr_type_phi:
628 case nir_instr_type_call:
629 case nir_instr_type_parallel_copy:
630 unreachable("NIR divergence analysis: Unsupported instruction type.");
631 }
632 }
633
634 return has_changed;
635 }
636
637 /* There are 3 types of phi instructions:
638 * (1) gamma: represent the joining point of different paths
639 * created by an “if-then-else” branch.
640 * The resulting value is divergent if the branch condition
641 * or any of the source values is divergent. */
642 static bool
643 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
644 {
645 if (phi->dest.ssa.divergent)
646 return false;
647
648 unsigned defined_srcs = 0;
649 nir_foreach_phi_src(src, phi) {
650 /* if any source value is divergent, the resulting value is divergent */
651 if (src->src.ssa->divergent) {
652 phi->dest.ssa.divergent = true;
653 return true;
654 }
655 if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
656 defined_srcs++;
657 }
658 }
659
660 /* if the condition is divergent and two sources defined, the definition is divergent */
661 if (defined_srcs > 1 && if_cond_divergent) {
662 phi->dest.ssa.divergent = true;
663 return true;
664 }
665
666 return false;
667 }
668
669 /* There are 3 types of phi instructions:
670 * (2) mu: which only exist at loop headers,
671 * merge initial and loop-carried values.
672 * The resulting value is divergent if any source value
673 * is divergent or a divergent loop continue condition
674 * is associated with a different ssa-def. */
675 static bool
676 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
677 {
678 if (phi->dest.ssa.divergent)
679 return false;
680
681 nir_ssa_def* same = NULL;
682 nir_foreach_phi_src(src, phi) {
683 /* if any source value is divergent, the resulting value is divergent */
684 if (src->src.ssa->divergent) {
685 phi->dest.ssa.divergent = true;
686 return true;
687 }
688 /* if this loop is uniform, we're done here */
689 if (!divergent_continue)
690 continue;
691 /* skip the loop preheader */
692 if (src->pred == preheader)
693 continue;
694 /* skip undef values */
695 if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef)
696 continue;
697
698 /* check if all loop-carried values are from the same ssa-def */
699 if (!same)
700 same = src->src.ssa;
701 else if (same != src->src.ssa) {
702 phi->dest.ssa.divergent = true;
703 return true;
704 }
705 }
706
707 return false;
708 }
709
710 /* There are 3 types of phi instructions:
711 * (3) eta: represent values that leave a loop.
712 * The resulting value is divergent if the source value is divergent
713 * or any loop exit condition is divergent for a value which is
714 * not loop-invariant.
715 * (note: there should be no phi for loop-invariant variables.) */
716 static bool
717 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
718 {
719 if (phi->dest.ssa.divergent)
720 return false;
721
722 if (divergent_break) {
723 phi->dest.ssa.divergent = true;
724 return true;
725 }
726
727 /* if any source value is divergent, the resulting value is divergent */
728 nir_foreach_phi_src(src, phi) {
729 if (src->src.ssa->divergent) {
730 phi->dest.ssa.divergent = true;
731 return true;
732 }
733 }
734
735 return false;
736 }
737
738 static bool
739 visit_if(nir_if *if_stmt, struct divergence_state *state)
740 {
741 bool progress = false;
742
743 struct divergence_state then_state = *state;
744 then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
745 progress |= visit_cf_list(&if_stmt->then_list, &then_state);
746
747 struct divergence_state else_state = *state;
748 else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
749 progress |= visit_cf_list(&if_stmt->else_list, &else_state);
750
751 /* handle phis after the IF */
752 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
753 if (instr->type != nir_instr_type_phi)
754 break;
755
756 if (state->first_visit)
757 nir_instr_as_phi(instr)->dest.ssa.divergent = false;
758 progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
759 if_stmt->condition.ssa->divergent);
760 }
761
762 /* join loop divergence information from both branch legs */
763 state->divergent_loop_continue |= then_state.divergent_loop_continue ||
764 else_state.divergent_loop_continue;
765 state->divergent_loop_break |= then_state.divergent_loop_break ||
766 else_state.divergent_loop_break;
767
768 /* A divergent continue makes succeeding loop CF divergent:
769 * not all loop-active invocations participate in the remaining loop-body
770 * which means that a following break might be taken by some invocations, only */
771 state->divergent_loop_cf |= state->divergent_loop_continue;
772
773 return progress;
774 }
775
776 static bool
777 visit_loop(nir_loop *loop, struct divergence_state *state)
778 {
779 bool progress = false;
780 nir_block *loop_header = nir_loop_first_block(loop);
781 nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
782
783 /* handle loop header phis first: we have no knowledge yet about
784 * the loop's control flow or any loop-carried sources. */
785 nir_foreach_instr(instr, loop_header) {
786 if (instr->type != nir_instr_type_phi)
787 break;
788
789 nir_phi_instr *phi = nir_instr_as_phi(instr);
790 if (!state->first_visit && phi->dest.ssa.divergent)
791 continue;
792
793 nir_foreach_phi_src(src, phi) {
794 if (src->pred == loop_preheader) {
795 phi->dest.ssa.divergent = src->src.ssa->divergent;
796 break;
797 }
798 }
799 progress |= phi->dest.ssa.divergent;
800 }
801
802 /* setup loop state */
803 struct divergence_state loop_state = *state;
804 loop_state.divergent_loop_cf = false;
805 loop_state.divergent_loop_continue = false;
806 loop_state.divergent_loop_break = false;
807
808 /* process loop body until no further changes are made */
809 bool repeat;
810 do {
811 progress |= visit_cf_list(&loop->body, &loop_state);
812 repeat = false;
813
814 /* revisit loop header phis to see if something has changed */
815 nir_foreach_instr(instr, loop_header) {
816 if (instr->type != nir_instr_type_phi)
817 break;
818
819 repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
820 loop_preheader,
821 loop_state.divergent_loop_continue);
822 }
823
824 loop_state.divergent_loop_cf = false;
825 loop_state.first_visit = false;
826 } while (repeat);
827
828 /* handle phis after the loop */
829 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
830 if (instr->type != nir_instr_type_phi)
831 break;
832
833 if (state->first_visit)
834 nir_instr_as_phi(instr)->dest.ssa.divergent = false;
835 progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
836 loop_state.divergent_loop_break);
837 }
838
839 return progress;
840 }
841
842 static bool
843 visit_cf_list(struct exec_list *list, struct divergence_state *state)
844 {
845 bool has_changed = false;
846
847 foreach_list_typed(nir_cf_node, node, node, list) {
848 switch (node->type) {
849 case nir_cf_node_block:
850 has_changed |= visit_block(nir_cf_node_as_block(node), state);
851 break;
852 case nir_cf_node_if:
853 has_changed |= visit_if(nir_cf_node_as_if(node), state);
854 break;
855 case nir_cf_node_loop:
856 has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
857 break;
858 case nir_cf_node_function:
859 unreachable("NIR divergence analysis: Unsupported cf_node type.");
860 }
861 }
862
863 return has_changed;
864 }
865
866 void
867 nir_divergence_analysis(nir_shader *shader, nir_divergence_options options)
868 {
869 struct divergence_state state = {
870 .options = options,
871 .stage = shader->info.stage,
872 .divergent_loop_cf = false,
873 .divergent_loop_continue = false,
874 .divergent_loop_break = false,
875 .first_visit = true,
876 };
877
878 visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
879 }
880