nir: rename global/local to private/function memory
[mesa.git] / src / compiler / nir / nir_opt_large_constants.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 struct var_info {
29 bool is_constant;
30 bool found_read;
31 };
32
33 static nir_ssa_def *
34 build_constant_load(nir_builder *b, nir_deref_instr *deref,
35 glsl_type_size_align_func size_align)
36 {
37 nir_variable *var = nir_deref_instr_get_variable(deref);
38
39 const unsigned bit_size = glsl_get_bit_size(deref->type);
40 const unsigned num_components = glsl_get_vector_elements(deref->type);
41
42 UNUSED unsigned var_size, var_align;
43 size_align(var->type, &var_size, &var_align);
44 assert(var->data.location % var_align == 0);
45
46 nir_intrinsic_instr *load =
47 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_constant);
48 load->num_components = num_components;
49 nir_intrinsic_set_base(load, var->data.location);
50 nir_intrinsic_set_range(load, var_size);
51 load->src[0] = nir_src_for_ssa(nir_build_deref_offset(b, deref, size_align));
52 nir_ssa_dest_init(&load->instr, &load->dest,
53 num_components, bit_size, NULL);
54 nir_builder_instr_insert(b, &load->instr);
55
56 if (load->dest.ssa.bit_size < 8) {
57 /* Booleans are special-cased to be 32-bit
58 *
59 * Ideally, for drivers that can handle 32-bit booleans, we wouldn't
60 * emit the i2b here. However, at this point, the driver is likely to
61 * still have 1-bit booleans so we need to at least convert bit sizes.
62 * Unfortunately, we don't have a good way to annotate the load as
63 * loading a known boolean value so the optimizer isn't going to be
64 * able to get rid of the conversion. Some day, we may solve that
65 * problem but not today.
66 */
67 assert(glsl_type_is_boolean(deref->type));
68 load->dest.ssa.bit_size = 32;
69 return nir_i2b(b, &load->dest.ssa);
70 } else {
71 return &load->dest.ssa;
72 }
73 }
74
75 static void
76 handle_constant_store(nir_builder *b, nir_intrinsic_instr *store,
77 glsl_type_size_align_func size_align)
78 {
79 nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
80 assert(!nir_deref_instr_has_indirect(deref));
81
82 nir_variable *var = nir_deref_instr_get_variable(deref);
83
84 const unsigned bit_size = glsl_get_bit_size(deref->type);
85 const unsigned num_components = glsl_get_vector_elements(deref->type);
86
87 char *dst = (char *)b->shader->constant_data +
88 var->data.location +
89 nir_deref_instr_get_const_offset(deref, size_align);
90
91 nir_const_value *val = nir_src_as_const_value(store->src[1]);
92 switch (bit_size) {
93 case 1:
94 /* Booleans are special-cased to be 32-bit */
95 for (unsigned i = 0; i < num_components; i++)
96 ((int32_t *)dst)[i] = -(int)val->b[i];
97 break;
98
99 case 8:
100 for (unsigned i = 0; i < num_components; i++)
101 ((uint8_t *)dst)[i] = val->u8[i];
102 break;
103
104 case 16:
105 for (unsigned i = 0; i < num_components; i++)
106 ((uint16_t *)dst)[i] = val->u16[i];
107 break;
108
109 case 32:
110 for (unsigned i = 0; i < num_components; i++)
111 ((uint32_t *)dst)[i] = val->u32[i];
112 break;
113
114 case 64:
115 for (unsigned i = 0; i < num_components; i++)
116 ((uint64_t *)dst)[i] = val->u64[i];
117 break;
118
119 default:
120 unreachable("Invalid bit size");
121 }
122 }
123
124 /** Lower large constant variables to shader constant data
125 *
126 * This pass looks for large (type_size(var->type) > threshold) variables
127 * which are statically constant and moves them into shader constant data.
128 * This is especially useful when large tables are baked into the shader
129 * source code because they can be moved into a UBO by the driver to reduce
130 * register pressure and make indirect access cheaper.
131 */
132 bool
133 nir_opt_large_constants(nir_shader *shader,
134 glsl_type_size_align_func size_align,
135 unsigned threshold)
136 {
137 /* Default to a natural alignment if none is provided */
138 if (size_align == NULL)
139 size_align = glsl_get_natural_size_align_bytes;
140
141 /* This only works with a single entrypoint */
142 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
143
144 /* This pass can only be run once */
145 assert(shader->constant_data == NULL && shader->constant_data_size == 0);
146
147 /* The index parameter is unused for local variables so we'll use it for
148 * indexing into our array of variable metadata.
149 */
150 unsigned num_locals = 0;
151 nir_foreach_variable(var, &impl->locals)
152 var->data.index = num_locals++;
153
154 struct var_info *var_infos = malloc(num_locals * sizeof(struct var_info));
155 for (unsigned i = 0; i < num_locals; i++) {
156 var_infos[i] = (struct var_info) {
157 .is_constant = true,
158 .found_read = false,
159 };
160 }
161
162 /* First, walk through the shader and figure out what variables we can
163 * lower to the constant blob.
164 */
165 bool first_block = true;
166 nir_foreach_block(block, impl) {
167 nir_foreach_instr(instr, block) {
168 if (instr->type != nir_instr_type_intrinsic)
169 continue;
170
171 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
172
173 bool src_is_const = false;
174 nir_deref_instr *src_deref = NULL, *dst_deref = NULL;
175 switch (intrin->intrinsic) {
176 case nir_intrinsic_store_deref:
177 dst_deref = nir_src_as_deref(intrin->src[0]);
178 src_is_const = nir_src_is_const(intrin->src[1]);
179 break;
180
181 case nir_intrinsic_load_deref:
182 src_deref = nir_src_as_deref(intrin->src[0]);
183 break;
184
185 case nir_intrinsic_copy_deref:
186 /* We always assume the src and therefore the dst are not
187 * constants here. Copy and constant propagation passes should
188 * have taken care of this in most cases anyway.
189 */
190 dst_deref = nir_src_as_deref(intrin->src[0]);
191 src_deref = nir_src_as_deref(intrin->src[1]);
192 src_is_const = false;
193 break;
194
195 default:
196 continue;
197 }
198
199 if (dst_deref && dst_deref->mode == nir_var_function) {
200 nir_variable *var = nir_deref_instr_get_variable(dst_deref);
201 assert(var->data.mode == nir_var_function);
202
203 /* We only consider variables constant if they only have constant
204 * stores, all the stores come before any reads, and all stores
205 * come in the first block. We also can't handle indirect stores.
206 */
207 struct var_info *info = &var_infos[var->data.index];
208 if (!src_is_const || info->found_read || !first_block ||
209 nir_deref_instr_has_indirect(dst_deref))
210 info->is_constant = false;
211 }
212
213 if (src_deref && src_deref->mode == nir_var_function) {
214 nir_variable *var = nir_deref_instr_get_variable(src_deref);
215 assert(var->data.mode == nir_var_function);
216
217 var_infos[var->data.index].found_read = true;
218 }
219 }
220 first_block = false;
221 }
222
223 shader->constant_data_size = 0;
224 nir_foreach_variable(var, &impl->locals) {
225 struct var_info *info = &var_infos[var->data.index];
226 if (!info->is_constant)
227 continue;
228
229 unsigned var_size, var_align;
230 size_align(var->type, &var_size, &var_align);
231 if (var_size <= threshold || !info->found_read) {
232 /* Don't bother lowering small stuff or data that's never read */
233 info->is_constant = false;
234 continue;
235 }
236
237 var->data.location = ALIGN_POT(shader->constant_data_size, var_align);
238 shader->constant_data_size = var->data.location + var_size;
239 }
240
241 if (shader->constant_data_size == 0) {
242 free(var_infos);
243 return false;
244 }
245
246 shader->constant_data = rzalloc_size(shader, shader->constant_data_size);
247
248 nir_builder b;
249 nir_builder_init(&b, impl);
250
251 nir_foreach_block(block, impl) {
252 nir_foreach_instr_safe(instr, block) {
253 if (instr->type != nir_instr_type_intrinsic)
254 continue;
255
256 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
257
258 switch (intrin->intrinsic) {
259 case nir_intrinsic_load_deref: {
260 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
261 if (deref->mode != nir_var_function)
262 continue;
263
264 nir_variable *var = nir_deref_instr_get_variable(deref);
265 struct var_info *info = &var_infos[var->data.index];
266 if (info->is_constant) {
267 b.cursor = nir_after_instr(&intrin->instr);
268 nir_ssa_def *val = build_constant_load(&b, deref, size_align);
269 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
270 nir_src_for_ssa(val));
271 nir_instr_remove(&intrin->instr);
272 nir_deref_instr_remove_if_unused(deref);
273 }
274 break;
275 }
276
277 case nir_intrinsic_store_deref: {
278 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
279 if (deref->mode != nir_var_function)
280 continue;
281
282 nir_variable *var = nir_deref_instr_get_variable(deref);
283 struct var_info *info = &var_infos[var->data.index];
284 if (info->is_constant) {
285 b.cursor = nir_after_instr(&intrin->instr);
286 handle_constant_store(&b, intrin, size_align);
287 nir_instr_remove(&intrin->instr);
288 nir_deref_instr_remove_if_unused(deref);
289 }
290 break;
291 }
292
293 case nir_intrinsic_copy_deref: {
294 nir_deref_instr *deref = nir_src_as_deref(intrin->src[1]);
295 if (deref->mode != nir_var_function)
296 continue;
297
298 nir_variable *var = nir_deref_instr_get_variable(deref);
299 struct var_info *info = &var_infos[var->data.index];
300 if (info->is_constant) {
301 b.cursor = nir_after_instr(&intrin->instr);
302 nir_ssa_def *val = build_constant_load(&b, deref, size_align);
303 nir_store_deref(&b, nir_src_as_deref(intrin->src[0]), val, ~0);
304 nir_instr_remove(&intrin->instr);
305 nir_deref_instr_remove_if_unused(deref);
306 }
307 break;
308 }
309
310 default:
311 continue;
312 }
313 }
314 }
315
316 /* Clean up the now unused variables */
317 nir_foreach_variable_safe(var, &impl->locals) {
318 if (var_infos[var->data.index].is_constant)
319 exec_node_remove(&var->node);
320 }
321
322 free(var_infos);
323
324 nir_metadata_preserve(impl, nir_metadata_block_index |
325 nir_metadata_dominance);
326 return true;
327 }