nir/large_constants: Use ralloc for var_infos
[mesa.git] / src / compiler / nir / nir_opt_large_constants.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 struct var_info {
29 bool is_constant;
30 bool found_read;
31
32 /* Block that has all the variable stores. All the blocks with reads
33 * should be dominated by this block.
34 */
35 nir_block *block;
36 };
37
38 static nir_ssa_def *
39 build_constant_load(nir_builder *b, nir_deref_instr *deref,
40 glsl_type_size_align_func size_align)
41 {
42 nir_variable *var = nir_deref_instr_get_variable(deref);
43
44 const unsigned bit_size = glsl_get_bit_size(deref->type);
45 const unsigned num_components = glsl_get_vector_elements(deref->type);
46
47 UNUSED unsigned var_size, var_align;
48 size_align(var->type, &var_size, &var_align);
49 assert(var->data.location % var_align == 0);
50
51 nir_intrinsic_instr *load =
52 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_constant);
53 load->num_components = num_components;
54 nir_intrinsic_set_base(load, var->data.location);
55 nir_intrinsic_set_range(load, var_size);
56 load->src[0] = nir_src_for_ssa(nir_build_deref_offset(b, deref, size_align));
57 nir_ssa_dest_init(&load->instr, &load->dest,
58 num_components, bit_size, NULL);
59 nir_builder_instr_insert(b, &load->instr);
60
61 if (load->dest.ssa.bit_size < 8) {
62 /* Booleans are special-cased to be 32-bit
63 *
64 * Ideally, for drivers that can handle 32-bit booleans, we wouldn't
65 * emit the i2b here. However, at this point, the driver is likely to
66 * still have 1-bit booleans so we need to at least convert bit sizes.
67 * Unfortunately, we don't have a good way to annotate the load as
68 * loading a known boolean value so the optimizer isn't going to be
69 * able to get rid of the conversion. Some day, we may solve that
70 * problem but not today.
71 */
72 assert(glsl_type_is_boolean(deref->type));
73 load->dest.ssa.bit_size = 32;
74 return nir_i2b(b, &load->dest.ssa);
75 } else {
76 return &load->dest.ssa;
77 }
78 }
79
80 static void
81 handle_constant_store(nir_builder *b, nir_intrinsic_instr *store,
82 glsl_type_size_align_func size_align)
83 {
84 nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
85 assert(!nir_deref_instr_has_indirect(deref));
86
87 nir_variable *var = nir_deref_instr_get_variable(deref);
88
89 const unsigned bit_size = glsl_get_bit_size(deref->type);
90 const unsigned num_components = glsl_get_vector_elements(deref->type);
91
92 char *dst = (char *)b->shader->constant_data +
93 var->data.location +
94 nir_deref_instr_get_const_offset(deref, size_align);
95
96 nir_const_value *val = nir_src_as_const_value(store->src[1]);
97 switch (bit_size) {
98 case 1:
99 /* Booleans are special-cased to be 32-bit */
100 for (unsigned i = 0; i < num_components; i++)
101 ((int32_t *)dst)[i] = -(int)val[i].b;
102 break;
103
104 case 8:
105 for (unsigned i = 0; i < num_components; i++)
106 ((uint8_t *)dst)[i] = val[i].u8;
107 break;
108
109 case 16:
110 for (unsigned i = 0; i < num_components; i++)
111 ((uint16_t *)dst)[i] = val[i].u16;
112 break;
113
114 case 32:
115 for (unsigned i = 0; i < num_components; i++)
116 ((uint32_t *)dst)[i] = val[i].u32;
117 break;
118
119 case 64:
120 for (unsigned i = 0; i < num_components; i++)
121 ((uint64_t *)dst)[i] = val[i].u64;
122 break;
123
124 default:
125 unreachable("Invalid bit size");
126 }
127 }
128
129 /** Lower large constant variables to shader constant data
130 *
131 * This pass looks for large (type_size(var->type) > threshold) variables
132 * which are statically constant and moves them into shader constant data.
133 * This is especially useful when large tables are baked into the shader
134 * source code because they can be moved into a UBO by the driver to reduce
135 * register pressure and make indirect access cheaper.
136 */
137 bool
138 nir_opt_large_constants(nir_shader *shader,
139 glsl_type_size_align_func size_align,
140 unsigned threshold)
141 {
142 /* Default to a natural alignment if none is provided */
143 if (size_align == NULL)
144 size_align = glsl_get_natural_size_align_bytes;
145
146 /* This only works with a single entrypoint */
147 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
148
149 /* This pass can only be run once */
150 assert(shader->constant_data == NULL && shader->constant_data_size == 0);
151
152 /* The index parameter is unused for local variables so we'll use it for
153 * indexing into our array of variable metadata.
154 */
155 unsigned num_locals = 0;
156 nir_foreach_variable(var, &impl->locals)
157 var->data.index = num_locals++;
158
159 if (num_locals == 0)
160 return false;
161
162 struct var_info *var_infos = ralloc_array(NULL, struct var_info, num_locals);
163 for (unsigned i = 0; i < num_locals; i++) {
164 var_infos[i] = (struct var_info) {
165 .is_constant = true,
166 .found_read = false,
167 };
168 }
169
170 nir_metadata_require(impl, nir_metadata_dominance);
171
172 /* First, walk through the shader and figure out what variables we can
173 * lower to the constant blob.
174 */
175 nir_foreach_block(block, impl) {
176 nir_foreach_instr(instr, block) {
177 if (instr->type != nir_instr_type_intrinsic)
178 continue;
179
180 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
181
182 bool src_is_const = false;
183 nir_deref_instr *src_deref = NULL, *dst_deref = NULL;
184 switch (intrin->intrinsic) {
185 case nir_intrinsic_store_deref:
186 dst_deref = nir_src_as_deref(intrin->src[0]);
187 src_is_const = nir_src_is_const(intrin->src[1]);
188 break;
189
190 case nir_intrinsic_load_deref:
191 src_deref = nir_src_as_deref(intrin->src[0]);
192 break;
193
194 case nir_intrinsic_copy_deref:
195 /* We always assume the src and therefore the dst are not
196 * constants here. Copy and constant propagation passes should
197 * have taken care of this in most cases anyway.
198 */
199 dst_deref = nir_src_as_deref(intrin->src[0]);
200 src_deref = nir_src_as_deref(intrin->src[1]);
201 src_is_const = false;
202 break;
203
204 default:
205 continue;
206 }
207
208 if (dst_deref && dst_deref->mode == nir_var_function_temp) {
209 nir_variable *var = nir_deref_instr_get_variable(dst_deref);
210 assert(var->data.mode == nir_var_function_temp);
211
212 struct var_info *info = &var_infos[var->data.index];
213 if (!info->is_constant)
214 continue;
215
216 if (!info->block)
217 info->block = block;
218
219 /* We only consider variables constant if they only have constant
220 * stores, all the stores come before any reads, and all stores
221 * come from the same block. We also can't handle indirect stores.
222 */
223 if (!src_is_const || info->found_read || block != info->block ||
224 nir_deref_instr_has_indirect(dst_deref))
225 info->is_constant = false;
226 }
227
228 if (src_deref && src_deref->mode == nir_var_function_temp) {
229 nir_variable *var = nir_deref_instr_get_variable(src_deref);
230 assert(var->data.mode == nir_var_function_temp);
231
232 /* We only consider variables constant if all the reads are
233 * dominated by the block that writes to it.
234 */
235 struct var_info *info = &var_infos[var->data.index];
236 if (!info->is_constant)
237 continue;
238
239 if (!info->block || !nir_block_dominates(info->block, block))
240 info->is_constant = false;
241
242 info->found_read = true;
243 }
244 }
245 }
246
247 shader->constant_data_size = 0;
248 nir_foreach_variable(var, &impl->locals) {
249 struct var_info *info = &var_infos[var->data.index];
250 if (!info->is_constant)
251 continue;
252
253 unsigned var_size, var_align;
254 size_align(var->type, &var_size, &var_align);
255 if (var_size <= threshold || !info->found_read) {
256 /* Don't bother lowering small stuff or data that's never read */
257 info->is_constant = false;
258 continue;
259 }
260
261 var->data.location = ALIGN_POT(shader->constant_data_size, var_align);
262 shader->constant_data_size = var->data.location + var_size;
263 }
264
265 if (shader->constant_data_size == 0) {
266 ralloc_free(var_infos);
267 return false;
268 }
269
270 shader->constant_data = rzalloc_size(shader, shader->constant_data_size);
271
272 nir_builder b;
273 nir_builder_init(&b, impl);
274
275 nir_foreach_block(block, impl) {
276 nir_foreach_instr_safe(instr, block) {
277 if (instr->type != nir_instr_type_intrinsic)
278 continue;
279
280 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
281
282 switch (intrin->intrinsic) {
283 case nir_intrinsic_load_deref: {
284 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
285 if (deref->mode != nir_var_function_temp)
286 continue;
287
288 nir_variable *var = nir_deref_instr_get_variable(deref);
289 struct var_info *info = &var_infos[var->data.index];
290 if (info->is_constant) {
291 b.cursor = nir_after_instr(&intrin->instr);
292 nir_ssa_def *val = build_constant_load(&b, deref, size_align);
293 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
294 nir_src_for_ssa(val));
295 nir_instr_remove(&intrin->instr);
296 nir_deref_instr_remove_if_unused(deref);
297 }
298 break;
299 }
300
301 case nir_intrinsic_store_deref: {
302 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
303 if (deref->mode != nir_var_function_temp)
304 continue;
305
306 nir_variable *var = nir_deref_instr_get_variable(deref);
307 struct var_info *info = &var_infos[var->data.index];
308 if (info->is_constant) {
309 b.cursor = nir_after_instr(&intrin->instr);
310 handle_constant_store(&b, intrin, size_align);
311 nir_instr_remove(&intrin->instr);
312 nir_deref_instr_remove_if_unused(deref);
313 }
314 break;
315 }
316
317 case nir_intrinsic_copy_deref: {
318 nir_deref_instr *deref = nir_src_as_deref(intrin->src[1]);
319 if (deref->mode != nir_var_function_temp)
320 continue;
321
322 nir_variable *var = nir_deref_instr_get_variable(deref);
323 struct var_info *info = &var_infos[var->data.index];
324 if (info->is_constant) {
325 b.cursor = nir_after_instr(&intrin->instr);
326 nir_ssa_def *val = build_constant_load(&b, deref, size_align);
327 nir_store_deref(&b, nir_src_as_deref(intrin->src[0]), val, ~0);
328 nir_instr_remove(&intrin->instr);
329 nir_deref_instr_remove_if_unused(deref);
330 }
331 break;
332 }
333
334 default:
335 continue;
336 }
337 }
338 }
339
340 /* Clean up the now unused variables */
341 nir_foreach_variable_safe(var, &impl->locals) {
342 if (var_infos[var->data.index].is_constant)
343 exec_node_remove(&var->node);
344 }
345
346 ralloc_free(var_infos);
347
348 nir_metadata_preserve(impl, nir_metadata_block_index |
349 nir_metadata_dominance);
350 return true;
351 }