2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
35 /* Block that has all the variable stores. All the blocks with reads
36 * should be dominated by this block.
40 /* If is_constant, hold the collected constant data for this var. */
41 uint32_t constant_data_size
;
46 var_info_cmp(const void *_a
, const void *_b
)
48 const struct var_info
*a
= _a
;
49 const struct var_info
*b
= _b
;
50 uint32_t a_size
= a
->constant_data_size
;
51 uint32_t b_size
= b
->constant_data_size
;
53 if (a_size
< b_size
) {
55 } else if (a_size
> b_size
) {
57 } else if (a_size
== 0) {
58 /* Don't call memcmp with invalid pointers. */
61 return memcmp(a
->constant_data
, b
->constant_data
, a_size
);
66 build_constant_load(nir_builder
*b
, nir_deref_instr
*deref
,
67 glsl_type_size_align_func size_align
)
69 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
71 const unsigned bit_size
= glsl_get_bit_size(deref
->type
);
72 const unsigned num_components
= glsl_get_vector_elements(deref
->type
);
74 UNUSED
unsigned var_size
, var_align
;
75 size_align(var
->type
, &var_size
, &var_align
);
76 assert(var
->data
.location
% var_align
== 0);
78 UNUSED
unsigned deref_size
, deref_align
;
79 size_align(deref
->type
, &deref_size
, &deref_align
);
81 nir_intrinsic_instr
*load
=
82 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_constant
);
83 load
->num_components
= num_components
;
84 nir_intrinsic_set_base(load
, var
->data
.location
);
85 nir_intrinsic_set_range(load
, var_size
);
86 nir_intrinsic_set_align(load
, deref_align
, 0);
87 load
->src
[0] = nir_src_for_ssa(nir_build_deref_offset(b
, deref
, size_align
));
88 nir_ssa_dest_init(&load
->instr
, &load
->dest
,
89 num_components
, bit_size
, NULL
);
90 nir_builder_instr_insert(b
, &load
->instr
);
92 if (load
->dest
.ssa
.bit_size
< 8) {
93 /* Booleans are special-cased to be 32-bit */
94 assert(glsl_type_is_boolean(deref
->type
));
95 assert(deref_size
== num_components
* 4);
96 load
->dest
.ssa
.bit_size
= 32;
97 return nir_b2b1(b
, &load
->dest
.ssa
);
99 assert(deref_size
== num_components
* bit_size
/ 8);
100 return &load
->dest
.ssa
;
105 handle_constant_store(void *mem_ctx
, struct var_info
*info
,
106 nir_deref_instr
*deref
, nir_const_value
*val
,
108 glsl_type_size_align_func size_align
)
110 assert(!nir_deref_instr_has_indirect(deref
));
111 const unsigned bit_size
= glsl_get_bit_size(deref
->type
);
112 const unsigned num_components
= glsl_get_vector_elements(deref
->type
);
114 if (info
->constant_data_size
== 0) {
115 unsigned var_size
, var_align
;
116 size_align(info
->var
->type
, &var_size
, &var_align
);
117 info
->constant_data_size
= var_size
;
118 info
->constant_data
= rzalloc_size(mem_ctx
, var_size
);
121 char *dst
= (char *)info
->constant_data
+
122 nir_deref_instr_get_const_offset(deref
, size_align
);
124 for (unsigned i
= 0; i
< num_components
; i
++) {
125 if (!(writemask
& (1 << i
)))
130 /* Booleans are special-cased to be 32-bit */
131 ((int32_t *)dst
)[i
] = -(int)val
[i
].b
;
135 ((uint8_t *)dst
)[i
] = val
[i
].u8
;
139 ((uint16_t *)dst
)[i
] = val
[i
].u16
;
143 ((uint32_t *)dst
)[i
] = val
[i
].u32
;
147 ((uint64_t *)dst
)[i
] = val
[i
].u64
;
151 unreachable("Invalid bit size");
156 /** Lower large constant variables to shader constant data
158 * This pass looks for large (type_size(var->type) > threshold) variables
159 * which are statically constant and moves them into shader constant data.
160 * This is especially useful when large tables are baked into the shader
161 * source code because they can be moved into a UBO by the driver to reduce
162 * register pressure and make indirect access cheaper.
165 nir_opt_large_constants(nir_shader
*shader
,
166 glsl_type_size_align_func size_align
,
169 /* Default to a natural alignment if none is provided */
170 if (size_align
== NULL
)
171 size_align
= glsl_get_natural_size_align_bytes
;
173 /* This only works with a single entrypoint */
174 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
176 unsigned num_locals
= nir_function_impl_index_vars(impl
);
178 if (num_locals
== 0) {
179 nir_shader_preserve_all_metadata(shader
);
183 struct var_info
*var_infos
= ralloc_array(NULL
, struct var_info
, num_locals
);
184 nir_foreach_function_temp_variable(var
, impl
) {
185 var_infos
[var
->index
] = (struct var_info
) {
192 nir_metadata_require(impl
, nir_metadata_dominance
);
194 /* First, walk through the shader and figure out what variables we can
195 * lower to the constant blob.
197 nir_foreach_block(block
, impl
) {
198 nir_foreach_instr(instr
, block
) {
199 if (instr
->type
== nir_instr_type_deref
) {
200 /* If we ever see a complex use of a deref_var, we have to assume
201 * that variable is non-constant because we can't guarantee we
202 * will find all of the writers of that variable.
204 nir_deref_instr
*deref
= nir_instr_as_deref(instr
);
205 if (deref
->deref_type
== nir_deref_type_var
&&
206 deref
->mode
== nir_var_function_temp
&&
207 nir_deref_instr_has_complex_use(deref
))
208 var_infos
[deref
->var
->index
].is_constant
= false;
212 if (instr
->type
!= nir_instr_type_intrinsic
)
215 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
217 bool src_is_const
= false;
218 nir_deref_instr
*src_deref
= NULL
, *dst_deref
= NULL
;
219 unsigned writemask
= 0;
220 switch (intrin
->intrinsic
) {
221 case nir_intrinsic_store_deref
:
222 dst_deref
= nir_src_as_deref(intrin
->src
[0]);
223 src_is_const
= nir_src_is_const(intrin
->src
[1]);
224 writemask
= nir_intrinsic_write_mask(intrin
);
227 case nir_intrinsic_load_deref
:
228 src_deref
= nir_src_as_deref(intrin
->src
[0]);
231 case nir_intrinsic_copy_deref
:
232 assert(!"Lowering of copy_deref with large constants is prohibited");
239 if (dst_deref
&& dst_deref
->mode
== nir_var_function_temp
) {
240 nir_variable
*var
= nir_deref_instr_get_variable(dst_deref
);
244 assert(var
->data
.mode
== nir_var_function_temp
);
246 struct var_info
*info
= &var_infos
[var
->index
];
247 if (!info
->is_constant
)
253 /* We only consider variables constant if they only have constant
254 * stores, all the stores come before any reads, and all stores
255 * come from the same block. We also can't handle indirect stores.
257 if (!src_is_const
|| info
->found_read
|| block
!= info
->block
||
258 nir_deref_instr_has_indirect(dst_deref
)) {
259 info
->is_constant
= false;
261 nir_const_value
*val
= nir_src_as_const_value(intrin
->src
[1]);
262 handle_constant_store(var_infos
, info
, dst_deref
, val
, writemask
,
267 if (src_deref
&& src_deref
->mode
== nir_var_function_temp
) {
268 nir_variable
*var
= nir_deref_instr_get_variable(src_deref
);
272 assert(var
->data
.mode
== nir_var_function_temp
);
274 /* We only consider variables constant if all the reads are
275 * dominated by the block that writes to it.
277 struct var_info
*info
= &var_infos
[var
->index
];
278 if (!info
->is_constant
)
281 if (!info
->block
|| !nir_block_dominates(info
->block
, block
))
282 info
->is_constant
= false;
284 info
->found_read
= true;
289 /* Allocate constant data space for each variable that just has constant
290 * data. We sort them by size and content so we can easily find
293 const unsigned old_constant_data_size
= shader
->constant_data_size
;
294 qsort(var_infos
, num_locals
, sizeof(struct var_info
), var_info_cmp
);
295 for (int i
= 0; i
< num_locals
; i
++) {
296 struct var_info
*info
= &var_infos
[i
];
298 /* Fix up indices after we sorted. */
299 info
->var
->index
= i
;
301 if (!info
->is_constant
)
304 unsigned var_size
, var_align
;
305 size_align(info
->var
->type
, &var_size
, &var_align
);
306 if (var_size
<= threshold
|| !info
->found_read
) {
307 /* Don't bother lowering small stuff or data that's never read */
308 info
->is_constant
= false;
312 if (i
> 0 && var_info_cmp(info
, &var_infos
[i
- 1]) == 0) {
313 info
->var
->data
.location
= var_infos
[i
- 1].var
->data
.location
;
314 info
->duplicate
= true;
316 info
->var
->data
.location
= ALIGN_POT(shader
->constant_data_size
, var_align
);
317 shader
->constant_data_size
= info
->var
->data
.location
+ var_size
;
321 if (shader
->constant_data_size
== old_constant_data_size
) {
322 nir_shader_preserve_all_metadata(shader
);
323 ralloc_free(var_infos
);
327 assert(shader
->constant_data_size
> old_constant_data_size
);
328 shader
->constant_data
= rerzalloc_size(shader
, shader
->constant_data
,
329 old_constant_data_size
,
330 shader
->constant_data_size
);
331 for (int i
= 0; i
< num_locals
; i
++) {
332 struct var_info
*info
= &var_infos
[i
];
333 if (!info
->duplicate
&& info
->is_constant
) {
334 memcpy((char *)shader
->constant_data
+ info
->var
->data
.location
,
335 info
->constant_data
, info
->constant_data_size
);
340 nir_builder_init(&b
, impl
);
342 nir_foreach_block(block
, impl
) {
343 nir_foreach_instr_safe(instr
, block
) {
344 if (instr
->type
!= nir_instr_type_intrinsic
)
347 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
349 switch (intrin
->intrinsic
) {
350 case nir_intrinsic_load_deref
: {
351 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
352 if (deref
->mode
!= nir_var_function_temp
)
355 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
359 struct var_info
*info
= &var_infos
[var
->index
];
360 if (info
->is_constant
) {
361 b
.cursor
= nir_after_instr(&intrin
->instr
);
362 nir_ssa_def
*val
= build_constant_load(&b
, deref
, size_align
);
363 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
364 nir_src_for_ssa(val
));
365 nir_instr_remove(&intrin
->instr
);
366 nir_deref_instr_remove_if_unused(deref
);
371 case nir_intrinsic_store_deref
: {
372 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
373 if (deref
->mode
!= nir_var_function_temp
)
376 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
380 struct var_info
*info
= &var_infos
[var
->index
];
381 if (info
->is_constant
) {
382 nir_instr_remove(&intrin
->instr
);
383 nir_deref_instr_remove_if_unused(deref
);
387 case nir_intrinsic_copy_deref
:
394 /* Clean up the now unused variables */
395 for (int i
= 0; i
< num_locals
; i
++) {
396 struct var_info
*info
= &var_infos
[i
];
397 if (info
->is_constant
)
398 exec_node_remove(&info
->var
->node
);
401 ralloc_free(var_infos
);
403 nir_metadata_preserve(impl
, nir_metadata_block_index
|
404 nir_metadata_dominance
);