intel/nir: Pass the nir_builder by reference in lower_alpha_to_coverage
[mesa.git] / src / intel / compiler / brw_nir_lower_cs_intrinsics.c
1 /*
2 * Copyright (c) 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_nir.h"
25 #include "compiler/nir/nir_builder.h"
26
27 struct lower_intrinsics_state {
28 nir_shader *nir;
29 nir_function_impl *impl;
30 bool progress;
31 nir_builder builder;
32 };
33
34 static bool
35 lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
36 nir_block *block)
37 {
38 bool progress = false;
39 nir_builder *b = &state->builder;
40 nir_shader *nir = state->nir;
41
42 /* Reuse calculated values inside the block. */
43 nir_ssa_def *local_index = NULL;
44 nir_ssa_def *local_id = NULL;
45
46 nir_foreach_instr_safe(instr, block) {
47 if (instr->type != nir_instr_type_intrinsic)
48 continue;
49
50 nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
51
52 b->cursor = nir_after_instr(&intrinsic->instr);
53
54 nir_ssa_def *sysval;
55 switch (intrinsic->intrinsic) {
56 case nir_intrinsic_load_local_group_size:
57 case nir_intrinsic_load_work_group_id:
58 /* Convert this to 32-bit if it's not */
59 if (intrinsic->dest.ssa.bit_size == 64) {
60 intrinsic->dest.ssa.bit_size = 32;
61 sysval = nir_u2u64(b, &intrinsic->dest.ssa);
62 nir_ssa_def_rewrite_uses_after(&intrinsic->dest.ssa,
63 nir_src_for_ssa(sysval),
64 sysval->parent_instr);
65 }
66 continue;
67
68 case nir_intrinsic_load_local_invocation_index:
69 case nir_intrinsic_load_local_invocation_id: {
70 /* First time we are using those, so let's calculate them. */
71 if (!local_index) {
72 assert(!local_id);
73
74 nir_ssa_def *subgroup_id = nir_load_subgroup_id(b);
75
76 nir_ssa_def *thread_local_id =
77 nir_imul(b, subgroup_id, nir_load_simd_width_intel(b));
78 nir_ssa_def *channel = nir_load_subgroup_invocation(b);
79 nir_ssa_def *linear = nir_iadd(b, channel, thread_local_id);
80
81 nir_ssa_def *size_x;
82 nir_ssa_def *size_y;
83 if (state->nir->info.cs.local_size_variable) {
84 nir_ssa_def *size_xyz = nir_load_local_group_size(b);
85 size_x = nir_channel(b, size_xyz, 0);
86 size_y = nir_channel(b, size_xyz, 1);
87 } else {
88 size_x = nir_imm_int(b, nir->info.cs.local_size[0]);
89 size_y = nir_imm_int(b, nir->info.cs.local_size[1]);
90 }
91
92 /* The local invocation index and ID must respect the following
93 *
94 * gl_LocalInvocationID.x =
95 * gl_LocalInvocationIndex % gl_WorkGroupSize.x;
96 * gl_LocalInvocationID.y =
97 * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
98 * gl_WorkGroupSize.y;
99 * gl_LocalInvocationID.z =
100 * (gl_LocalInvocationIndex /
101 * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
102 * gl_WorkGroupSize.z;
103 *
104 * However, the final % gl_WorkGroupSize.z does nothing unless we
105 * accidentally end up with a gl_LocalInvocationIndex that is too
106 * large so it can safely be omitted.
107 */
108
109 if (state->nir->info.cs.derivative_group != DERIVATIVE_GROUP_QUADS) {
110 /* If we are not grouping in quads, just set the local invocatio
111 * index linearly, and calculate local invocation ID from that.
112 */
113 local_index = linear;
114
115 nir_ssa_def *id_x, *id_y, *id_z;
116 id_x = nir_umod(b, local_index, size_x);
117 id_y = nir_umod(b, nir_udiv(b, local_index, size_x), size_y);
118 id_z = nir_udiv(b, local_index, nir_imul(b, size_x, size_y));
119 local_id = nir_vec3(b, id_x, id_y, id_z);
120 } else {
121 /* For quads, first we figure out the 2x2 grid the invocation
122 * belongs to -- treating extra Z layers as just more rows.
123 * Then map that into local invocation ID (trivial) and local
124 * invocation index. Skipping Z simplify index calculation.
125 */
126
127 nir_ssa_def *one = nir_imm_int(b, 1);
128 nir_ssa_def *double_size_x = nir_ishl(b, size_x, one);
129
130 /* ID within a pair of rows, where each group of 4 is 2x2 quad. */
131 nir_ssa_def *row_pair_id = nir_umod(b, linear, double_size_x);
132 nir_ssa_def *y_row_pairs = nir_udiv(b, linear, double_size_x);
133
134 nir_ssa_def *x =
135 nir_ior(b,
136 nir_iand(b, row_pair_id, one),
137 nir_iand(b, nir_ishr(b, row_pair_id, one),
138 nir_imm_int(b, 0xfffffffe)));
139 nir_ssa_def *y =
140 nir_ior(b,
141 nir_ishl(b, y_row_pairs, one),
142 nir_iand(b, nir_ishr(b, row_pair_id, one), one));
143
144 local_id = nir_vec3(b, x,
145 nir_umod(b, y, size_y),
146 nir_udiv(b, y, size_y));
147 local_index = nir_iadd(b, x, nir_imul(b, y, size_x));
148 }
149 }
150
151 assert(local_id);
152 assert(local_index);
153 if (intrinsic->intrinsic == nir_intrinsic_load_local_invocation_id)
154 sysval = local_id;
155 else
156 sysval = local_index;
157 break;
158 }
159
160 case nir_intrinsic_load_num_subgroups: {
161 nir_ssa_def *size;
162 if (state->nir->info.cs.local_size_variable) {
163 nir_ssa_def *size_xyz = nir_load_local_group_size(b);
164 nir_ssa_def *size_x = nir_channel(b, size_xyz, 0);
165 nir_ssa_def *size_y = nir_channel(b, size_xyz, 1);
166 nir_ssa_def *size_z = nir_channel(b, size_xyz, 2);
167 size = nir_imul(b, nir_imul(b, size_x, size_y), size_z);
168 } else {
169 size = nir_imm_int(b, nir->info.cs.local_size[0] *
170 nir->info.cs.local_size[1] *
171 nir->info.cs.local_size[2]);
172 }
173
174 /* Calculate the equivalent of DIV_ROUND_UP. */
175 nir_ssa_def *simd_width = nir_load_simd_width_intel(b);
176 sysval =
177 nir_udiv(b, nir_iadd_imm(b, nir_iadd(b, size, simd_width), -1),
178 simd_width);
179 break;
180 }
181
182 default:
183 continue;
184 }
185
186 if (intrinsic->dest.ssa.bit_size == 64)
187 sysval = nir_u2u64(b, sysval);
188
189 nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, nir_src_for_ssa(sysval));
190 nir_instr_remove(&intrinsic->instr);
191
192 state->progress = true;
193 }
194
195 return progress;
196 }
197
198 static void
199 lower_cs_intrinsics_convert_impl(struct lower_intrinsics_state *state)
200 {
201 nir_builder_init(&state->builder, state->impl);
202
203 nir_foreach_block(block, state->impl) {
204 lower_cs_intrinsics_convert_block(state, block);
205 }
206
207 nir_metadata_preserve(state->impl,
208 nir_metadata_block_index | nir_metadata_dominance);
209 }
210
211 bool
212 brw_nir_lower_cs_intrinsics(nir_shader *nir)
213 {
214 assert(nir->info.stage == MESA_SHADER_COMPUTE ||
215 nir->info.stage == MESA_SHADER_KERNEL);
216
217 struct lower_intrinsics_state state = {
218 .nir = nir,
219 };
220
221 /* Constraints from NV_compute_shader_derivatives. */
222 if (!nir->info.cs.local_size_variable) {
223 if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS) {
224 assert(nir->info.cs.local_size[0] % 2 == 0);
225 assert(nir->info.cs.local_size[1] % 2 == 0);
226 } else if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_LINEAR) {
227 ASSERTED unsigned local_workgroup_size =
228 nir->info.cs.local_size[0] *
229 nir->info.cs.local_size[1] *
230 nir->info.cs.local_size[2];
231 assert(local_workgroup_size % 4 == 0);
232 }
233 }
234
235 nir_foreach_function(function, nir) {
236 if (function->impl) {
237 state.impl = function->impl;
238 lower_cs_intrinsics_convert_impl(&state);
239 }
240 }
241
242 return state.progress;
243 }