glsl/linker: check for xfb_offset aliasing
[mesa.git] / src / compiler / glsl / gl_nir_lower_buffers.c
1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "compiler/nir/nir.h"
25 #include "compiler/nir/nir_builder.h"
26 #include "gl_nir.h"
27 #include "ir_uniform.h"
28
29 #include "main/compiler.h"
30 #include "main/mtypes.h"
31
32 static nir_ssa_def *
33 get_block_array_index(nir_builder *b, nir_deref_instr *deref,
34 const struct gl_shader_program *shader_program)
35 {
36 unsigned array_elements = 1;
37
38 /* Build a block name such as "block[2][0]" for finding in the list of
39 * blocks later on as well as an optional dynamic index which gets added
40 * to the block index later.
41 */
42 const char *block_name = "";
43 nir_ssa_def *nonconst_index = NULL;
44 while (deref->deref_type == nir_deref_type_array) {
45 nir_deref_instr *parent = nir_deref_instr_parent(deref);
46 assert(parent && glsl_type_is_array(parent->type));
47 unsigned arr_size = glsl_get_length(parent->type);
48
49 if (nir_src_is_const(deref->arr.index)) {
50 unsigned arr_index = nir_src_as_uint(deref->arr.index);
51 arr_index = MIN2(arr_index, arr_size - 1);
52
53 /* We're walking the deref from the tail so prepend the array index */
54 block_name = ralloc_asprintf(b->shader, "[%u]%s", arr_index,
55 block_name);
56 } else {
57 nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
58 arr_index = nir_umin(b, arr_index, nir_imm_int(b, arr_size - 1));
59 nir_ssa_def *arr_offset = nir_imul_imm(b, arr_index, array_elements);
60 if (nonconst_index)
61 nonconst_index = nir_iadd(b, nonconst_index, arr_offset);
62 else
63 nonconst_index = arr_offset;
64
65 /* We're walking the deref from the tail so prepend the array index */
66 block_name = ralloc_asprintf(b->shader, "[0]%s", block_name);
67 }
68
69 array_elements *= arr_size;
70 deref = parent;
71 }
72
73 assert(deref->deref_type == nir_deref_type_var);
74 block_name = ralloc_asprintf(b->shader, "%s%s",
75 glsl_get_type_name(deref->var->interface_type),
76 block_name);
77
78 struct gl_linked_shader *linked_shader =
79 shader_program->_LinkedShaders[b->shader->info.stage];
80
81 unsigned num_blocks;
82 struct gl_uniform_block **blocks;
83 if (deref->mode == nir_var_mem_ubo) {
84 num_blocks = linked_shader->Program->info.num_ubos;
85 blocks = linked_shader->Program->sh.UniformBlocks;
86 } else {
87 assert(deref->mode == nir_var_mem_ssbo);
88 num_blocks = linked_shader->Program->info.num_ssbos;
89 blocks = linked_shader->Program->sh.ShaderStorageBlocks;
90 }
91
92 for (unsigned i = 0; i < num_blocks; i++) {
93 if (strcmp(block_name, blocks[i]->Name) == 0) {
94 if (nonconst_index)
95 return nir_iadd_imm(b, nonconst_index, i);
96 else
97 return nir_imm_int(b, i);
98 }
99 }
100
101 unreachable("Failed to find the block by name");
102 }
103
104 static void
105 get_block_index_offset(nir_variable *var,
106 const struct gl_shader_program *shader_program,
107 gl_shader_stage stage,
108 unsigned *index, unsigned *offset)
109 {
110
111 struct gl_linked_shader *linked_shader =
112 shader_program->_LinkedShaders[stage];
113
114 unsigned num_blocks;
115 struct gl_uniform_block **blocks;
116 if (var->data.mode == nir_var_mem_ubo) {
117 num_blocks = linked_shader->Program->info.num_ubos;
118 blocks = linked_shader->Program->sh.UniformBlocks;
119 } else {
120 assert(var->data.mode == nir_var_mem_ssbo);
121 num_blocks = linked_shader->Program->info.num_ssbos;
122 blocks = linked_shader->Program->sh.ShaderStorageBlocks;
123 }
124
125 const char *block_name = glsl_get_type_name(var->interface_type);
126 for (unsigned i = 0; i < num_blocks; i++) {
127 if (strcmp(block_name, blocks[i]->Name) == 0) {
128 *index = i;
129 *offset = blocks[i]->Uniforms[var->data.location].Offset;
130 return;
131 }
132 }
133
134 unreachable("Failed to find the block by name");
135 }
136
137 static bool
138 lower_buffer_interface_derefs_impl(nir_function_impl *impl,
139 const struct gl_shader_program *shader_program)
140 {
141 bool progress = false;
142
143 nir_builder b;
144 nir_builder_init(&b, impl);
145
146 nir_foreach_block(block, impl) {
147 nir_foreach_instr_safe(instr, block) {
148 switch (instr->type) {
149 case nir_instr_type_deref: {
150 nir_deref_instr *deref = nir_instr_as_deref(instr);
151 if (!(deref->mode & (nir_var_mem_ubo | nir_var_mem_ssbo)))
152 break;
153
154 /* We use nir_address_format_32bit_index_offset */
155 assert(deref->dest.is_ssa);
156 assert(deref->dest.ssa.bit_size == 32);
157 deref->dest.ssa.num_components = 2;
158
159 progress = true;
160
161 b.cursor = nir_before_instr(&deref->instr);
162
163 nir_ssa_def *ptr;
164 if (deref->deref_type == nir_deref_type_var &&
165 !glsl_type_is_interface(glsl_without_array(deref->var->type))) {
166 /* This variable is contained in an interface block rather than
167 * containing one. We need the block index and its offset
168 * inside that block
169 */
170 unsigned index, offset;
171 get_block_index_offset(deref->var, shader_program,
172 b.shader->info.stage,
173 &index, &offset);
174 ptr = nir_imm_ivec2(&b, index, offset);
175 } else if (glsl_type_is_interface(deref->type)) {
176 /* This is the last deref before the block boundary.
177 * Everything after this point is a byte offset and will be
178 * handled by nir_lower_explicit_io().
179 */
180 nir_ssa_def *index = get_block_array_index(&b, deref,
181 shader_program);
182 ptr = nir_vec2(&b, index, nir_imm_int(&b, 0));
183 } else {
184 /* This will get handled by nir_lower_explicit_io(). */
185 break;
186 }
187
188 nir_deref_instr *cast = nir_build_deref_cast(&b, ptr, deref->mode,
189 deref->type, 0);
190 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
191 nir_src_for_ssa(&cast->dest.ssa));
192 nir_deref_instr_remove_if_unused(deref);
193 break;
194 }
195
196 case nir_instr_type_intrinsic: {
197 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
198 switch (intrin->intrinsic) {
199 case nir_intrinsic_load_deref: {
200 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
201 if (!(deref->mode & (nir_var_mem_ubo | nir_var_mem_ssbo)))
202 break;
203
204 /* UBO and SSBO Booleans are 32-bit integers where any non-zero
205 * value is considered true. NIR Booleans, on the other hand
206 * are 1-bit values until you get to a very late stage of the
207 * compilation process. We need to turn those 1-bit loads into
208 * a 32-bit load wrapped in an i2b to get a proper NIR boolean
209 * from the SSBO.
210 */
211 if (glsl_type_is_boolean(deref->type)) {
212 assert(intrin->dest.is_ssa);
213 b.cursor = nir_after_instr(&intrin->instr);
214 intrin->dest.ssa.bit_size = 32;
215 nir_ssa_def *bval = nir_i2b(&b, &intrin->dest.ssa);
216 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
217 nir_src_for_ssa(bval),
218 bval->parent_instr);
219 progress = true;
220 }
221 break;
222 }
223
224 case nir_intrinsic_store_deref: {
225 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
226 if (!(deref->mode & (nir_var_mem_ubo | nir_var_mem_ssbo)))
227 break;
228
229 /* SSBO Booleans are 32-bit integers where any non-zero value
230 * is considered true. NIR Booleans, on the other hand are
231 * 1-bit values until you get to a very late stage of the
232 * compilation process. We need to turn those 1-bit stores
233 * into a b2i32 followed by a 32-bit store. Technically the
234 * value we write doesn't have to be 0/1 so once Booleans are
235 * lowered to 32-bit values, we have an unneeded sanitation
236 * step but in practice it doesn't cost much.
237 */
238 if (glsl_type_is_boolean(deref->type)) {
239 assert(intrin->src[1].is_ssa);
240 b.cursor = nir_before_instr(&intrin->instr);
241 nir_ssa_def *ival = nir_b2i32(&b, intrin->src[1].ssa);
242 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
243 nir_src_for_ssa(ival));
244 progress = true;
245 }
246 break;
247 }
248
249 case nir_intrinsic_copy_deref:
250 unreachable("copy_deref should be lowered by now");
251 break;
252
253 default:
254 /* Nothing to do */
255 break;
256 }
257 break;
258 }
259
260 default:
261 break; /* Nothing to do */
262 }
263 }
264 }
265
266 if (progress) {
267 nir_metadata_preserve(impl, nir_metadata_block_index |
268 nir_metadata_dominance);
269 }
270
271 return progress;
272 }
273
274 bool
275 gl_nir_lower_buffers(nir_shader *shader,
276 const struct gl_shader_program *shader_program)
277 {
278 bool progress = false;
279
280 /* First, we lower the derefs to turn block variable and array derefs into
281 * a nir_address_format_32bit_index_offset pointer. From there forward,
282 * we leave the derefs in place and let nir_lower_explicit_io handle them.
283 */
284 nir_foreach_function(function, shader) {
285 if (function->impl &&
286 lower_buffer_interface_derefs_impl(function->impl, shader_program))
287 progress = true;
288 }
289
290 /* If that did something, we validate and then call nir_lower_explicit_io
291 * to finish the process.
292 */
293 if (progress) {
294 nir_validate_shader(shader, "Lowering buffer interface derefs");
295 nir_lower_explicit_io(shader, nir_var_mem_ubo | nir_var_mem_ssbo,
296 nir_address_format_32bit_index_offset);
297 }
298
299 return progress;
300 }