nir: Add a new subgroups lowering pass
[mesa.git] / src / compiler / nir / nir_lower_subgroups.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 /**
28 * \file nir_opt_intrinsics.c
29 */
30
31 static nir_ssa_def *
32 lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
33 {
34 /* This is safe to call on scalar things but it would be silly */
35 assert(intrin->dest.ssa.num_components > 1);
36
37 nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0],
38 intrin->num_components);
39 nir_ssa_def *reads[4];
40
41 for (unsigned i = 0; i < intrin->num_components; i++) {
42 nir_intrinsic_instr *chan_intrin =
43 nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
44 nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest,
45 1, intrin->dest.ssa.bit_size, NULL);
46 chan_intrin->num_components = 1;
47
48 /* value */
49 chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
50 /* invocation */
51 if (intrin->intrinsic == nir_intrinsic_read_invocation)
52 nir_src_copy(&chan_intrin->src[1], &intrin->src[1], chan_intrin);
53
54 nir_builder_instr_insert(b, &chan_intrin->instr);
55
56 reads[i] = &chan_intrin->dest.ssa;
57 }
58
59 return nir_vec(b, reads, intrin->num_components);
60 }
61
62 static nir_ssa_def *
63 high_subgroup_mask(nir_builder *b,
64 nir_ssa_def *count,
65 uint64_t base_mask)
66 {
67 /* group_mask could probably be calculated more efficiently but we want to
68 * be sure not to shift by 64 if the subgroup size is 64 because the GLSL
69 * shift operator is undefined in that case. In any case if we were worried
70 * about efficency this should probably be done further down because the
71 * subgroup size is likely to be known at compile time.
72 */
73 nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
74 nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
75 nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);
76 nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
77 nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
78
79 return nir_iand(b, higher_bits, group_mask);
80 }
81
82 static nir_ssa_def *
83 lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
84 const nir_lower_subgroups_options *options)
85 {
86 switch (intrin->intrinsic) {
87 case nir_intrinsic_vote_any:
88 case nir_intrinsic_vote_all:
89 if (options->lower_vote_trivial)
90 return nir_ssa_for_src(b, intrin->src[0], 1);
91 break;
92
93 case nir_intrinsic_vote_eq:
94 if (options->lower_vote_trivial)
95 return nir_imm_int(b, NIR_TRUE);
96 break;
97
98 case nir_intrinsic_read_invocation:
99 case nir_intrinsic_read_first_invocation:
100 if (options->lower_to_scalar && intrin->num_components > 1)
101 return lower_read_invocation_to_scalar(b, intrin);
102 break;
103
104 case nir_intrinsic_load_subgroup_eq_mask:
105 case nir_intrinsic_load_subgroup_ge_mask:
106 case nir_intrinsic_load_subgroup_gt_mask:
107 case nir_intrinsic_load_subgroup_le_mask:
108 case nir_intrinsic_load_subgroup_lt_mask: {
109 if (!options->lower_subgroup_masks)
110 return NULL;
111
112 nir_ssa_def *count = nir_load_subgroup_invocation(b);
113
114 switch (intrin->intrinsic) {
115 case nir_intrinsic_load_subgroup_eq_mask:
116 return nir_ishl(b, nir_imm_int64(b, 1ull), count);
117 case nir_intrinsic_load_subgroup_ge_mask:
118 return high_subgroup_mask(b, count, ~0ull);
119 case nir_intrinsic_load_subgroup_gt_mask:
120 return high_subgroup_mask(b, count, ~1ull);
121 case nir_intrinsic_load_subgroup_le_mask:
122 return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count));
123 case nir_intrinsic_load_subgroup_lt_mask:
124 return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count));
125 default:
126 unreachable("you seriously can't tell this is unreachable?");
127 }
128 break;
129 }
130 default:
131 break;
132 }
133
134 return NULL;
135 }
136
137 static bool
138 lower_subgroups_impl(nir_function_impl *impl,
139 const nir_lower_subgroups_options *options)
140 {
141 nir_builder b;
142 nir_builder_init(&b, impl);
143 bool progress = false;
144
145 nir_foreach_block(block, impl) {
146 nir_foreach_instr_safe(instr, block) {
147 if (instr->type != nir_instr_type_intrinsic)
148 continue;
149
150 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
151 b.cursor = nir_before_instr(instr);
152
153 nir_ssa_def *lower = lower_subgroups_intrin(&b, intrin, options);
154 if (!lower)
155 continue;
156
157 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(lower));
158 nir_instr_remove(instr);
159 progress = true;
160 }
161 }
162
163 return progress;
164 }
165
166 bool
167 nir_lower_subgroups(nir_shader *shader,
168 const nir_lower_subgroups_options *options)
169 {
170 bool progress = false;
171
172 nir_foreach_function(function, shader) {
173 if (!function->impl)
174 continue;
175
176 if (lower_subgroups_impl(function->impl, options)) {
177 progress = true;
178 nir_metadata_preserve(function->impl, nir_metadata_block_index |
179 nir_metadata_dominance);
180 }
181 }
182
183 return progress;
184 }