nir: lower interp_deref_at_vertex to load_input_vertex
[mesa.git] / src / compiler / nir / nir_divergence_analysis.c
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "nir.h"
26
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39 static bool
40 visit_cf_list(bool *divergent, struct exec_list *list,
41 nir_divergence_options options, gl_shader_stage stage);
42
43 static bool
44 visit_alu(bool *divergent, nir_alu_instr *instr)
45 {
46 if (divergent[instr->dest.dest.ssa.index])
47 return false;
48
49 unsigned num_src = nir_op_infos[instr->op].num_inputs;
50
51 for (unsigned i = 0; i < num_src; i++) {
52 if (divergent[instr->src[i].src.ssa->index]) {
53 divergent[instr->dest.dest.ssa.index] = true;
54 return true;
55 }
56 }
57
58 return false;
59 }
60
61 static bool
62 visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr,
63 nir_divergence_options options, gl_shader_stage stage)
64 {
65 if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
66 return false;
67
68 if (divergent[instr->dest.ssa.index])
69 return false;
70
71 bool is_divergent = false;
72 switch (instr->intrinsic) {
73 /* Intrinsics which are always uniform */
74 case nir_intrinsic_shader_clock:
75 case nir_intrinsic_ballot:
76 case nir_intrinsic_read_invocation:
77 case nir_intrinsic_read_first_invocation:
78 case nir_intrinsic_vote_any:
79 case nir_intrinsic_vote_all:
80 case nir_intrinsic_vote_feq:
81 case nir_intrinsic_vote_ieq:
82 case nir_intrinsic_load_work_dim:
83 case nir_intrinsic_load_work_group_id:
84 case nir_intrinsic_load_num_work_groups:
85 case nir_intrinsic_load_local_group_size:
86 case nir_intrinsic_load_subgroup_id:
87 case nir_intrinsic_load_num_subgroups:
88 case nir_intrinsic_load_subgroup_size:
89 case nir_intrinsic_load_subgroup_eq_mask:
90 case nir_intrinsic_load_subgroup_ge_mask:
91 case nir_intrinsic_load_subgroup_gt_mask:
92 case nir_intrinsic_load_subgroup_le_mask:
93 case nir_intrinsic_load_subgroup_lt_mask:
94 case nir_intrinsic_first_invocation:
95 case nir_intrinsic_load_base_instance:
96 case nir_intrinsic_load_base_vertex:
97 case nir_intrinsic_load_first_vertex:
98 case nir_intrinsic_load_draw_id:
99 case nir_intrinsic_load_is_indexed_draw:
100 case nir_intrinsic_load_viewport_scale:
101 case nir_intrinsic_load_alpha_ref_float:
102 case nir_intrinsic_load_user_clip_plane:
103 case nir_intrinsic_load_viewport_x_scale:
104 case nir_intrinsic_load_viewport_y_scale:
105 case nir_intrinsic_load_viewport_z_scale:
106 case nir_intrinsic_load_viewport_offset:
107 case nir_intrinsic_load_viewport_z_offset:
108 case nir_intrinsic_load_blend_const_color_a_float:
109 case nir_intrinsic_load_blend_const_color_b_float:
110 case nir_intrinsic_load_blend_const_color_g_float:
111 case nir_intrinsic_load_blend_const_color_r_float:
112 case nir_intrinsic_load_blend_const_color_rgba:
113 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
114 case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
115 is_divergent = false;
116 break;
117
118 /* Intrinsics with divergence depending on shader stage and hardware */
119 case nir_intrinsic_load_input:
120 is_divergent = divergent[instr->src[0].ssa->index];
121 if (stage == MESA_SHADER_FRAGMENT)
122 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
123 else if (stage == MESA_SHADER_TESS_EVAL)
124 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
125 else
126 is_divergent = true;
127 break;
128 case nir_intrinsic_load_input_vertex:
129 is_divergent = divergent[instr->src[1].ssa->index];
130 assert(stage == MESA_SHADER_FRAGMENT);
131 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
132 break;
133 case nir_intrinsic_load_output:
134 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
135 is_divergent = divergent[instr->src[0].ssa->index];
136 if (stage == MESA_SHADER_TESS_CTRL)
137 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
138 else
139 is_divergent = true;
140 break;
141 case nir_intrinsic_load_layer_id:
142 case nir_intrinsic_load_front_face:
143 assert(stage == MESA_SHADER_FRAGMENT);
144 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
145 break;
146 case nir_intrinsic_load_view_index:
147 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
148 if (options & nir_divergence_view_index_uniform)
149 is_divergent = false;
150 else if (stage == MESA_SHADER_FRAGMENT)
151 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
152 break;
153 case nir_intrinsic_load_fs_input_interp_deltas:
154 assert(stage == MESA_SHADER_FRAGMENT);
155 is_divergent = divergent[instr->src[0].ssa->index];
156 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
157 break;
158 case nir_intrinsic_load_primitive_id:
159 if (stage == MESA_SHADER_FRAGMENT)
160 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
161 else if (stage == MESA_SHADER_TESS_CTRL)
162 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
163 else if (stage == MESA_SHADER_TESS_EVAL)
164 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
165 else if (stage == MESA_SHADER_GEOMETRY)
166 is_divergent = true;
167 else
168 unreachable("Invalid stage for load_primitive_id");
169 break;
170 case nir_intrinsic_load_tess_level_inner:
171 case nir_intrinsic_load_tess_level_outer:
172 if (stage == MESA_SHADER_TESS_CTRL)
173 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
174 else if (stage == MESA_SHADER_TESS_EVAL)
175 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
176 else
177 unreachable("Invalid stage for load_primitive_tess_level_*");
178 break;
179 case nir_intrinsic_load_patch_vertices_in:
180 if (stage == MESA_SHADER_TESS_EVAL)
181 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
182 else
183 assert(stage == MESA_SHADER_TESS_CTRL);
184 break;
185
186 /* Clustered reductions are uniform if cluster_size == subgroup_size or
187 * the source is uniform and the operation is invariant.
188 * Inclusive scans are uniform if
189 * the source is uniform and the operation is invariant
190 */
191 case nir_intrinsic_reduce:
192 if (nir_intrinsic_cluster_size(instr) == 0)
193 return false;
194 /* fallthrough */
195 case nir_intrinsic_inclusive_scan: {
196 nir_op op = nir_intrinsic_reduction_op(instr);
197 is_divergent = divergent[instr->src[0].ssa->index];
198 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
199 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
200 op != nir_op_iand && op != nir_op_ior)
201 is_divergent = true;
202 break;
203 }
204
205 /* Intrinsics with divergence depending on sources */
206 case nir_intrinsic_ballot_bitfield_extract:
207 case nir_intrinsic_ballot_find_lsb:
208 case nir_intrinsic_ballot_find_msb:
209 case nir_intrinsic_ballot_bit_count_reduce:
210 case nir_intrinsic_shuffle_xor:
211 case nir_intrinsic_shuffle_up:
212 case nir_intrinsic_shuffle_down:
213 case nir_intrinsic_quad_broadcast:
214 case nir_intrinsic_quad_swap_horizontal:
215 case nir_intrinsic_quad_swap_vertical:
216 case nir_intrinsic_quad_swap_diagonal:
217 case nir_intrinsic_load_deref:
218 case nir_intrinsic_load_ubo:
219 case nir_intrinsic_load_ssbo:
220 case nir_intrinsic_load_shared:
221 case nir_intrinsic_load_global:
222 case nir_intrinsic_load_uniform:
223 case nir_intrinsic_load_push_constant:
224 case nir_intrinsic_load_constant:
225 case nir_intrinsic_load_sample_pos_from_id:
226 case nir_intrinsic_load_kernel_input:
227 case nir_intrinsic_image_load:
228 case nir_intrinsic_image_deref_load:
229 case nir_intrinsic_bindless_image_load:
230 case nir_intrinsic_image_samples:
231 case nir_intrinsic_image_deref_samples:
232 case nir_intrinsic_bindless_image_samples:
233 case nir_intrinsic_get_buffer_size:
234 case nir_intrinsic_image_size:
235 case nir_intrinsic_image_deref_size:
236 case nir_intrinsic_bindless_image_size:
237 case nir_intrinsic_copy_deref:
238 case nir_intrinsic_deref_buffer_array_length:
239 case nir_intrinsic_vulkan_resource_index:
240 case nir_intrinsic_vulkan_resource_reindex:
241 case nir_intrinsic_load_vulkan_descriptor:
242 case nir_intrinsic_atomic_counter_read:
243 case nir_intrinsic_atomic_counter_read_deref:
244 case nir_intrinsic_quad_swizzle_amd:
245 case nir_intrinsic_masked_swizzle_amd: {
246 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
247 for (unsigned i = 0; i < num_srcs; i++) {
248 if (divergent[instr->src[i].ssa->index]) {
249 is_divergent = true;
250 break;
251 }
252 }
253 break;
254 }
255
256 case nir_intrinsic_shuffle:
257 is_divergent = divergent[instr->src[0].ssa->index] &&
258 divergent[instr->src[1].ssa->index];
259 break;
260
261 /* Intrinsics which are always divergent */
262 case nir_intrinsic_load_color0:
263 case nir_intrinsic_load_color1:
264 case nir_intrinsic_load_param:
265 case nir_intrinsic_load_sample_id:
266 case nir_intrinsic_load_sample_id_no_per_sample:
267 case nir_intrinsic_load_sample_mask_in:
268 case nir_intrinsic_load_interpolated_input:
269 case nir_intrinsic_load_barycentric_pixel:
270 case nir_intrinsic_load_barycentric_centroid:
271 case nir_intrinsic_load_barycentric_sample:
272 case nir_intrinsic_load_barycentric_model:
273 case nir_intrinsic_load_barycentric_at_sample:
274 case nir_intrinsic_load_barycentric_at_offset:
275 case nir_intrinsic_interp_deref_at_offset:
276 case nir_intrinsic_interp_deref_at_sample:
277 case nir_intrinsic_interp_deref_at_centroid:
278 case nir_intrinsic_interp_deref_at_vertex:
279 case nir_intrinsic_load_tess_coord:
280 case nir_intrinsic_load_point_coord:
281 case nir_intrinsic_load_frag_coord:
282 case nir_intrinsic_load_sample_pos:
283 case nir_intrinsic_load_vertex_id_zero_base:
284 case nir_intrinsic_load_vertex_id:
285 case nir_intrinsic_load_per_vertex_input:
286 case nir_intrinsic_load_per_vertex_output:
287 case nir_intrinsic_load_instance_id:
288 case nir_intrinsic_load_invocation_id:
289 case nir_intrinsic_load_local_invocation_id:
290 case nir_intrinsic_load_local_invocation_index:
291 case nir_intrinsic_load_global_invocation_id:
292 case nir_intrinsic_load_global_invocation_index:
293 case nir_intrinsic_load_subgroup_invocation:
294 case nir_intrinsic_load_helper_invocation:
295 case nir_intrinsic_is_helper_invocation:
296 case nir_intrinsic_load_scratch:
297 case nir_intrinsic_deref_atomic_add:
298 case nir_intrinsic_deref_atomic_imin:
299 case nir_intrinsic_deref_atomic_umin:
300 case nir_intrinsic_deref_atomic_imax:
301 case nir_intrinsic_deref_atomic_umax:
302 case nir_intrinsic_deref_atomic_and:
303 case nir_intrinsic_deref_atomic_or:
304 case nir_intrinsic_deref_atomic_xor:
305 case nir_intrinsic_deref_atomic_exchange:
306 case nir_intrinsic_deref_atomic_comp_swap:
307 case nir_intrinsic_deref_atomic_fadd:
308 case nir_intrinsic_deref_atomic_fmin:
309 case nir_intrinsic_deref_atomic_fmax:
310 case nir_intrinsic_deref_atomic_fcomp_swap:
311 case nir_intrinsic_ssbo_atomic_add:
312 case nir_intrinsic_ssbo_atomic_imin:
313 case nir_intrinsic_ssbo_atomic_umin:
314 case nir_intrinsic_ssbo_atomic_imax:
315 case nir_intrinsic_ssbo_atomic_umax:
316 case nir_intrinsic_ssbo_atomic_and:
317 case nir_intrinsic_ssbo_atomic_or:
318 case nir_intrinsic_ssbo_atomic_xor:
319 case nir_intrinsic_ssbo_atomic_exchange:
320 case nir_intrinsic_ssbo_atomic_comp_swap:
321 case nir_intrinsic_ssbo_atomic_fadd:
322 case nir_intrinsic_ssbo_atomic_fmax:
323 case nir_intrinsic_ssbo_atomic_fmin:
324 case nir_intrinsic_ssbo_atomic_fcomp_swap:
325 case nir_intrinsic_image_deref_atomic_add:
326 case nir_intrinsic_image_deref_atomic_imin:
327 case nir_intrinsic_image_deref_atomic_umin:
328 case nir_intrinsic_image_deref_atomic_imax:
329 case nir_intrinsic_image_deref_atomic_umax:
330 case nir_intrinsic_image_deref_atomic_and:
331 case nir_intrinsic_image_deref_atomic_or:
332 case nir_intrinsic_image_deref_atomic_xor:
333 case nir_intrinsic_image_deref_atomic_exchange:
334 case nir_intrinsic_image_deref_atomic_comp_swap:
335 case nir_intrinsic_image_deref_atomic_fadd:
336 case nir_intrinsic_image_atomic_add:
337 case nir_intrinsic_image_atomic_imin:
338 case nir_intrinsic_image_atomic_umin:
339 case nir_intrinsic_image_atomic_imax:
340 case nir_intrinsic_image_atomic_umax:
341 case nir_intrinsic_image_atomic_and:
342 case nir_intrinsic_image_atomic_or:
343 case nir_intrinsic_image_atomic_xor:
344 case nir_intrinsic_image_atomic_exchange:
345 case nir_intrinsic_image_atomic_comp_swap:
346 case nir_intrinsic_image_atomic_fadd:
347 case nir_intrinsic_bindless_image_atomic_add:
348 case nir_intrinsic_bindless_image_atomic_imin:
349 case nir_intrinsic_bindless_image_atomic_umin:
350 case nir_intrinsic_bindless_image_atomic_imax:
351 case nir_intrinsic_bindless_image_atomic_umax:
352 case nir_intrinsic_bindless_image_atomic_and:
353 case nir_intrinsic_bindless_image_atomic_or:
354 case nir_intrinsic_bindless_image_atomic_xor:
355 case nir_intrinsic_bindless_image_atomic_exchange:
356 case nir_intrinsic_bindless_image_atomic_comp_swap:
357 case nir_intrinsic_bindless_image_atomic_fadd:
358 case nir_intrinsic_shared_atomic_add:
359 case nir_intrinsic_shared_atomic_imin:
360 case nir_intrinsic_shared_atomic_umin:
361 case nir_intrinsic_shared_atomic_imax:
362 case nir_intrinsic_shared_atomic_umax:
363 case nir_intrinsic_shared_atomic_and:
364 case nir_intrinsic_shared_atomic_or:
365 case nir_intrinsic_shared_atomic_xor:
366 case nir_intrinsic_shared_atomic_exchange:
367 case nir_intrinsic_shared_atomic_comp_swap:
368 case nir_intrinsic_shared_atomic_fadd:
369 case nir_intrinsic_shared_atomic_fmin:
370 case nir_intrinsic_shared_atomic_fmax:
371 case nir_intrinsic_shared_atomic_fcomp_swap:
372 case nir_intrinsic_global_atomic_add:
373 case nir_intrinsic_global_atomic_imin:
374 case nir_intrinsic_global_atomic_umin:
375 case nir_intrinsic_global_atomic_imax:
376 case nir_intrinsic_global_atomic_umax:
377 case nir_intrinsic_global_atomic_and:
378 case nir_intrinsic_global_atomic_or:
379 case nir_intrinsic_global_atomic_xor:
380 case nir_intrinsic_global_atomic_exchange:
381 case nir_intrinsic_global_atomic_comp_swap:
382 case nir_intrinsic_global_atomic_fadd:
383 case nir_intrinsic_global_atomic_fmin:
384 case nir_intrinsic_global_atomic_fmax:
385 case nir_intrinsic_global_atomic_fcomp_swap:
386 case nir_intrinsic_atomic_counter_add:
387 case nir_intrinsic_atomic_counter_min:
388 case nir_intrinsic_atomic_counter_max:
389 case nir_intrinsic_atomic_counter_and:
390 case nir_intrinsic_atomic_counter_or:
391 case nir_intrinsic_atomic_counter_xor:
392 case nir_intrinsic_atomic_counter_inc:
393 case nir_intrinsic_atomic_counter_pre_dec:
394 case nir_intrinsic_atomic_counter_post_dec:
395 case nir_intrinsic_atomic_counter_exchange:
396 case nir_intrinsic_atomic_counter_comp_swap:
397 case nir_intrinsic_atomic_counter_add_deref:
398 case nir_intrinsic_atomic_counter_min_deref:
399 case nir_intrinsic_atomic_counter_max_deref:
400 case nir_intrinsic_atomic_counter_and_deref:
401 case nir_intrinsic_atomic_counter_or_deref:
402 case nir_intrinsic_atomic_counter_xor_deref:
403 case nir_intrinsic_atomic_counter_inc_deref:
404 case nir_intrinsic_atomic_counter_pre_dec_deref:
405 case nir_intrinsic_atomic_counter_post_dec_deref:
406 case nir_intrinsic_atomic_counter_exchange_deref:
407 case nir_intrinsic_atomic_counter_comp_swap_deref:
408 case nir_intrinsic_exclusive_scan:
409 case nir_intrinsic_ballot_bit_count_exclusive:
410 case nir_intrinsic_ballot_bit_count_inclusive:
411 case nir_intrinsic_write_invocation_amd:
412 case nir_intrinsic_mbcnt_amd:
413 is_divergent = true;
414 break;
415
416 default:
417 #ifdef NDEBUG
418 is_divergent = true;
419 break;
420 #else
421 nir_print_instr(&instr->instr, stderr);
422 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
423 #endif
424 }
425
426 divergent[instr->dest.ssa.index] = is_divergent;
427 return is_divergent;
428 }
429
430 static bool
431 visit_tex(bool *divergent, nir_tex_instr *instr)
432 {
433 if (divergent[instr->dest.ssa.index])
434 return false;
435
436 bool is_divergent = false;
437
438 for (unsigned i = 0; i < instr->num_srcs; i++) {
439 switch (instr->src[i].src_type) {
440 case nir_tex_src_sampler_deref:
441 case nir_tex_src_sampler_handle:
442 case nir_tex_src_sampler_offset:
443 is_divergent |= divergent[instr->src[i].src.ssa->index] &&
444 instr->sampler_non_uniform;
445 break;
446 case nir_tex_src_texture_deref:
447 case nir_tex_src_texture_handle:
448 case nir_tex_src_texture_offset:
449 is_divergent |= divergent[instr->src[i].src.ssa->index] &&
450 instr->texture_non_uniform;
451 break;
452 default:
453 is_divergent |= divergent[instr->src[i].src.ssa->index];
454 break;
455 }
456 }
457
458 divergent[instr->dest.ssa.index] = is_divergent;
459 return is_divergent;
460 }
461
462 static bool
463 visit_phi(bool *divergent, nir_phi_instr *instr)
464 {
465 /* There are 3 types of phi instructions:
466 * (1) gamma: represent the joining point of different paths
467 * created by an “if-then-else” branch.
468 * The resulting value is divergent if the branch condition
469 * or any of the source values is divergent.
470 *
471 * (2) mu: which only exist at loop headers,
472 * merge initial and loop-carried values.
473 * The resulting value is divergent if any source value
474 * is divergent or a divergent loop continue condition
475 * is associated with a different ssa-def.
476 *
477 * (3) eta: represent values that leave a loop.
478 * The resulting value is divergent if the source value is divergent
479 * or any loop exit condition is divergent for a value which is
480 * not loop-invariant.
481 * (note: there should be no phi for loop-invariant variables.)
482 */
483
484 if (divergent[instr->dest.ssa.index])
485 return false;
486
487 nir_foreach_phi_src(src, instr) {
488 /* if any source value is divergent, the resulting value is divergent */
489 if (divergent[src->src.ssa->index]) {
490 divergent[instr->dest.ssa.index] = true;
491 return true;
492 }
493 }
494
495 nir_cf_node *prev = nir_cf_node_prev(&instr->instr.block->cf_node);
496
497 if (!prev) {
498 /* mu: if no predecessor node exists, the phi must be at a loop header */
499 nir_loop *loop = nir_cf_node_as_loop(instr->instr.block->cf_node.parent);
500 prev = nir_cf_node_prev(&loop->cf_node);
501 nir_ssa_def* same = NULL;
502 bool all_same = true;
503
504 /* first, check if all loop-carried values are from the same ssa-def */
505 nir_foreach_phi_src(src, instr) {
506 if (src->pred == nir_cf_node_as_block(prev))
507 continue;
508 if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef)
509 continue;
510 if (!same)
511 same = src->src.ssa;
512 else if (same != src->src.ssa)
513 all_same = false;
514 }
515
516 /* if all loop-carried values are the same, the resulting value is uniform */
517 if (all_same)
518 return false;
519
520 /* check if the loop-carried values come from different ssa-defs
521 * and the corresponding condition is divergent. */
522 nir_foreach_phi_src(src, instr) {
523 /* skip the loop preheader */
524 if (src->pred == nir_cf_node_as_block(prev))
525 continue;
526
527 /* skip the unconditional back-edge */
528 if (src->pred == nir_loop_last_block(loop))
529 continue;
530
531 /* if the value is undef, we don't need to check the condition */
532 if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef)
533 continue;
534
535 nir_cf_node *current = src->pred->cf_node.parent;
536 /* check recursively the conditions if any is divergent */
537 while (current->type != nir_cf_node_loop) {
538 assert (current->type == nir_cf_node_if);
539 nir_if *if_node = nir_cf_node_as_if(current);
540 if (divergent[if_node->condition.ssa->index]) {
541 divergent[instr->dest.ssa.index] = true;
542 return true;
543 }
544 current = current->parent;
545 }
546 assert(current == &loop->cf_node);
547 }
548
549 } else if (prev->type == nir_cf_node_if) {
550 /* if only one of the incoming values is defined, the resulting value is uniform */
551 unsigned defined_srcs = 0;
552 nir_foreach_phi_src(src, instr) {
553 if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef)
554 defined_srcs++;
555 }
556 if (defined_srcs <= 1)
557 return false;
558
559 /* gamma: check if the condition is divergent */
560 nir_if *if_node = nir_cf_node_as_if(prev);
561 if (divergent[if_node->condition.ssa->index]) {
562 divergent[instr->dest.ssa.index] = true;
563 return true;
564 }
565
566 } else {
567 /* eta: the predecessor must be a loop */
568 assert(prev->type == nir_cf_node_loop);
569
570 /* Check if any loop exit condition is divergent:
571 * That is any break happens under divergent condition or
572 * a break is preceeded by a divergent continue
573 */
574 nir_foreach_phi_src(src, instr) {
575 nir_cf_node *current = src->pred->cf_node.parent;
576
577 /* check recursively the conditions if any is divergent */
578 while (current->type != nir_cf_node_loop) {
579 assert(current->type == nir_cf_node_if);
580 nir_if *if_node = nir_cf_node_as_if(current);
581 if (divergent[if_node->condition.ssa->index]) {
582 divergent[instr->dest.ssa.index] = true;
583 return true;
584 }
585 current = current->parent;
586 }
587 assert(current == prev);
588
589 /* check if any divergent continue happened before the break */
590 nir_foreach_block_in_cf_node(block, prev) {
591 if (block == src->pred)
592 break;
593 if (!nir_block_ends_in_jump(block))
594 continue;
595
596 nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block));
597 if (jump->type != nir_jump_continue)
598 continue;
599
600 current = block->cf_node.parent;
601 bool is_divergent = false;
602 while (current != prev) {
603 /* the continue belongs to an inner loop */
604 if (current->type == nir_cf_node_loop) {
605 is_divergent = false;
606 break;
607 }
608 assert(current->type == nir_cf_node_if);
609 nir_if *if_node = nir_cf_node_as_if(current);
610 is_divergent |= divergent[if_node->condition.ssa->index];
611 current = current->parent;
612 }
613
614 if (is_divergent) {
615 divergent[instr->dest.ssa.index] = true;
616 return true;
617 }
618 }
619 }
620 }
621
622 return false;
623 }
624
625 static bool
626 visit_load_const(bool *divergent, nir_load_const_instr *instr)
627 {
628 return false;
629 }
630
631 static bool
632 visit_ssa_undef(bool *divergent, nir_ssa_undef_instr *instr)
633 {
634 return false;
635 }
636
637 static bool
638 nir_variable_mode_is_uniform(nir_variable_mode mode) {
639 switch (mode) {
640 case nir_var_uniform:
641 case nir_var_mem_ubo:
642 case nir_var_mem_ssbo:
643 case nir_var_mem_shared:
644 case nir_var_mem_global:
645 return true;
646 default:
647 return false;
648 }
649 }
650
651 static bool
652 nir_variable_is_uniform(nir_variable *var, nir_divergence_options options,
653 gl_shader_stage stage)
654 {
655 if (nir_variable_mode_is_uniform(var->data.mode))
656 return true;
657
658 if (stage == MESA_SHADER_FRAGMENT &&
659 (options & nir_divergence_single_prim_per_subgroup) &&
660 var->data.mode == nir_var_shader_in &&
661 var->data.interpolation == INTERP_MODE_FLAT)
662 return true;
663
664 if (stage == MESA_SHADER_TESS_CTRL &&
665 (options & nir_divergence_single_patch_per_tcs_subgroup) &&
666 var->data.mode == nir_var_shader_out && var->data.patch)
667 return true;
668
669 if (stage == MESA_SHADER_TESS_EVAL &&
670 (options & nir_divergence_single_patch_per_tes_subgroup) &&
671 var->data.mode == nir_var_shader_in && var->data.patch)
672 return true;
673
674 return false;
675 }
676
677 static bool
678 visit_deref(bool *divergent, nir_deref_instr *deref,
679 nir_divergence_options options, gl_shader_stage stage)
680 {
681 if (divergent[deref->dest.ssa.index])
682 return false;
683
684 bool is_divergent = false;
685 switch (deref->deref_type) {
686 case nir_deref_type_var:
687 is_divergent = !nir_variable_is_uniform(deref->var, options, stage);
688 break;
689 case nir_deref_type_array:
690 case nir_deref_type_ptr_as_array:
691 is_divergent = divergent[deref->arr.index.ssa->index];
692 /* fallthrough */
693 case nir_deref_type_struct:
694 case nir_deref_type_array_wildcard:
695 is_divergent |= divergent[deref->parent.ssa->index];
696 break;
697 case nir_deref_type_cast:
698 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
699 divergent[deref->parent.ssa->index];
700 break;
701 }
702
703 divergent[deref->dest.ssa.index] = is_divergent;
704 return is_divergent;
705 }
706
707 static bool
708 visit_block(bool *divergent, nir_block *block, nir_divergence_options options,
709 gl_shader_stage stage)
710 {
711 bool has_changed = false;
712
713 nir_foreach_instr(instr, block) {
714 switch (instr->type) {
715 case nir_instr_type_alu:
716 has_changed |= visit_alu(divergent, nir_instr_as_alu(instr));
717 break;
718 case nir_instr_type_intrinsic:
719 has_changed |= visit_intrinsic(divergent, nir_instr_as_intrinsic(instr),
720 options, stage);
721 break;
722 case nir_instr_type_tex:
723 has_changed |= visit_tex(divergent, nir_instr_as_tex(instr));
724 break;
725 case nir_instr_type_phi:
726 has_changed |= visit_phi(divergent, nir_instr_as_phi(instr));
727 break;
728 case nir_instr_type_load_const:
729 has_changed |= visit_load_const(divergent, nir_instr_as_load_const(instr));
730 break;
731 case nir_instr_type_ssa_undef:
732 has_changed |= visit_ssa_undef(divergent, nir_instr_as_ssa_undef(instr));
733 break;
734 case nir_instr_type_deref:
735 has_changed |= visit_deref(divergent, nir_instr_as_deref(instr),
736 options, stage);
737 break;
738 case nir_instr_type_jump:
739 break;
740 case nir_instr_type_call:
741 case nir_instr_type_parallel_copy:
742 unreachable("NIR divergence analysis: Unsupported instruction type.");
743 }
744 }
745
746 return has_changed;
747 }
748
749 static bool
750 visit_if(bool *divergent, nir_if *if_stmt, nir_divergence_options options, gl_shader_stage stage)
751 {
752 return visit_cf_list(divergent, &if_stmt->then_list, options, stage) |
753 visit_cf_list(divergent, &if_stmt->else_list, options, stage);
754 }
755
756 static bool
757 visit_loop(bool *divergent, nir_loop *loop, nir_divergence_options options, gl_shader_stage stage)
758 {
759 bool has_changed = false;
760 bool repeat = true;
761
762 /* TODO: restructure this and the phi handling more efficiently */
763 while (repeat) {
764 repeat = visit_cf_list(divergent, &loop->body, options, stage);
765 has_changed |= repeat;
766 }
767
768 return has_changed;
769 }
770
771 static bool
772 visit_cf_list(bool *divergent, struct exec_list *list,
773 nir_divergence_options options, gl_shader_stage stage)
774 {
775 bool has_changed = false;
776
777 foreach_list_typed(nir_cf_node, node, node, list) {
778 switch (node->type) {
779 case nir_cf_node_block:
780 has_changed |= visit_block(divergent, nir_cf_node_as_block(node),
781 options, stage);
782 break;
783 case nir_cf_node_if:
784 has_changed |= visit_if(divergent, nir_cf_node_as_if(node),
785 options, stage);
786 break;
787 case nir_cf_node_loop:
788 has_changed |= visit_loop(divergent, nir_cf_node_as_loop(node),
789 options, stage);
790 break;
791 case nir_cf_node_function:
792 unreachable("NIR divergence analysis: Unsupported cf_node type.");
793 }
794 }
795
796 return has_changed;
797 }
798
799
800 bool*
801 nir_divergence_analysis(nir_shader *shader, nir_divergence_options options)
802 {
803 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
804 bool *t = rzalloc_array(shader, bool, impl->ssa_alloc);
805
806 visit_cf_list(t, &impl->body, options, shader->info.stage);
807
808 return t;
809 }