2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
32 * Implements SSA-based constant folding.
35 struct constant_fold_state
{
37 nir_function_impl
*impl
;
41 #define SRC_COMP(T, IDX, CMP) src[IDX]->value.T[instr->src[IDX].swizzle[CMP]]
42 #define SRC(T, IDX) SRC_COMP(T, IDX, i)
43 #define DEST_COMP(T, CMP) dest->value.T[CMP]
44 #define DEST(T) DEST_COMP(T, i)
46 #define FOLD_PER_COMP(EXPR) \
47 for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; i++) { \
52 constant_fold_alu_instr(nir_alu_instr
*instr
, void *mem_ctx
)
54 nir_load_const_instr
*src
[4], *dest
;
56 if (!instr
->dest
.dest
.is_ssa
)
59 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
60 if (!instr
->src
[i
].src
.is_ssa
)
63 if (instr
->src
[i
].src
.ssa
->parent_instr
->type
!= nir_instr_type_load_const
)
66 /* We shouldn't have any source modifiers in the optimization loop. */
67 assert(!instr
->src
[i
].abs
&& !instr
->src
[i
].negate
);
69 src
[i
] = nir_instr_as_load_const(instr
->src
[i
].src
.ssa
->parent_instr
);
72 /* We shouldn't have any saturate modifiers in the optimization loop. */
73 assert(!instr
->dest
.saturate
);
75 dest
= nir_load_const_instr_create(mem_ctx
);
76 dest
->array_elems
= 0;
77 dest
->num_components
= instr
->dest
.dest
.ssa
.num_components
;
81 FOLD_PER_COMP(DEST(i
) = -SRC(i
, 0));
84 FOLD_PER_COMP(DEST(f
) = -SRC(f
, 0));
87 FOLD_PER_COMP(DEST(i
) = ~SRC(i
, 0));
90 FOLD_PER_COMP(DEST(f
) = (SRC(f
, 0) == 0.0f
) ? 1.0f
: 0.0f
);
93 FOLD_PER_COMP(DEST(f
) = 1.0f
/ SRC(f
, 0));
96 FOLD_PER_COMP(DEST(f
) = 1.0f
/ sqrt(SRC(f
, 0)));
99 FOLD_PER_COMP(DEST(f
) = sqrtf(SRC(f
, 0)));
102 FOLD_PER_COMP(DEST(f
) = expf(SRC(f
, 0)));
105 FOLD_PER_COMP(DEST(f
) = logf(SRC(f
, 0)));
108 FOLD_PER_COMP(DEST(f
) = exp2f(SRC(f
, 0)));
111 FOLD_PER_COMP(DEST(f
) = log2f(SRC(f
, 0)));
114 FOLD_PER_COMP(DEST(i
) = SRC(f
, 0));
117 FOLD_PER_COMP(DEST(u
) = SRC(f
, 0));
120 FOLD_PER_COMP(DEST(f
) = SRC(i
, 0));
123 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) == 0.0f
) ? NIR_FALSE
: NIR_TRUE
);
126 FOLD_PER_COMP(DEST(f
) = SRC(u
, 0) ? 1.0f
: 0.0f
);
129 FOLD_PER_COMP(DEST(u
) = SRC(i
, 0) ? NIR_TRUE
: NIR_FALSE
);
132 FOLD_PER_COMP(DEST(f
) = SRC(u
, 0));
135 DEST_COMP(u
, 0) = (SRC_COMP(u
, 0, 0) || SRC_COMP(u
, 0, 1)) ?
136 NIR_TRUE
: NIR_FALSE
;
139 FOLD_PER_COMP(DEST(f
) = SRC(f
, 0) + SRC(f
, 1));
142 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) + SRC(i
, 1));
145 FOLD_PER_COMP(DEST(f
) = SRC(f
, 0) - SRC(f
, 1));
148 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) - SRC(i
, 1));
151 FOLD_PER_COMP(DEST(f
) = SRC(f
, 0) * SRC(f
, 1));
154 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) * SRC(i
, 1));
157 FOLD_PER_COMP(DEST(f
) = SRC(f
, 0) / SRC(f
, 1));
160 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) / SRC(i
, 1));
163 FOLD_PER_COMP(DEST(u
) = SRC(u
, 0) / SRC(u
, 1));
166 FOLD_PER_COMP(DEST(u
) = (SRC(f
, 0) < SRC(f
, 1)) ? NIR_TRUE
: NIR_FALSE
);
169 FOLD_PER_COMP(DEST(u
) = (SRC(f
, 0) >= SRC(f
, 1)) ? NIR_TRUE
: NIR_FALSE
);
172 FOLD_PER_COMP(DEST(u
) = (SRC(f
, 0) == SRC(f
, 1)) ? NIR_TRUE
: NIR_FALSE
);
175 FOLD_PER_COMP(DEST(u
) = (SRC(f
, 0) != SRC(f
, 1)) ? NIR_TRUE
: NIR_FALSE
);
178 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) < SRC(i
, 1)) ? NIR_TRUE
: NIR_FALSE
);
181 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) >= SRC(i
, 1)) ? NIR_TRUE
: NIR_FALSE
);
184 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) == SRC(i
, 1)) ? NIR_TRUE
: NIR_FALSE
);
187 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) != SRC(i
, 1)) ? NIR_TRUE
: NIR_FALSE
);
190 FOLD_PER_COMP(DEST(u
) = (SRC(u
, 0) < SRC(u
, 1)) ? NIR_TRUE
: NIR_FALSE
);
193 FOLD_PER_COMP(DEST(u
) = (SRC(u
, 0) >= SRC(u
, 1)) ? NIR_TRUE
: NIR_FALSE
);
196 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) << SRC(i
, 1));
199 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) >> SRC(i
, 1));
202 FOLD_PER_COMP(DEST(u
) = SRC(u
, 0) >> SRC(u
, 1));
205 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) & SRC(i
, 1));
208 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) | SRC(i
, 1));
211 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) ^ SRC(i
, 1));
218 dest
->dest
.is_ssa
= true;
219 nir_ssa_def_init(&dest
->instr
, &dest
->dest
.ssa
,
220 instr
->dest
.dest
.ssa
.num_components
,
221 instr
->dest
.dest
.ssa
.name
);
223 nir_instr_insert_before(&instr
->instr
, &dest
->instr
);
227 .ssa
= &dest
->dest
.ssa
,
230 nir_ssa_def_rewrite_uses(&instr
->dest
.dest
.ssa
, new_src
, mem_ctx
);
232 nir_instr_remove(&instr
->instr
);
239 constant_fold_deref(nir_instr
*instr
, nir_deref_var
*deref
)
241 bool progress
= false;
243 for (nir_deref
*tail
= deref
->deref
.child
; tail
; tail
= tail
->child
) {
244 if (tail
->deref_type
!= nir_deref_type_array
)
247 nir_deref_array
*arr
= nir_deref_as_array(tail
);
249 if (arr
->deref_array_type
== nir_deref_array_type_indirect
&&
250 arr
->indirect
.is_ssa
&&
251 arr
->indirect
.ssa
->parent_instr
->type
== nir_instr_type_load_const
) {
252 nir_load_const_instr
*indirect
=
253 nir_instr_as_load_const(arr
->indirect
.ssa
->parent_instr
);
255 arr
->base_offset
+= indirect
->value
.u
[0];
262 nir_instr_rewrite_src(instr
, &arr
->indirect
, empty
);
264 arr
->deref_array_type
= nir_deref_array_type_direct
;
274 constant_fold_intrinsic_instr(nir_intrinsic_instr
*instr
)
276 bool progress
= false;
278 unsigned num_vars
= nir_intrinsic_infos
[instr
->intrinsic
].num_variables
;
279 for (unsigned i
= 0; i
< num_vars
; i
++) {
280 progress
|= constant_fold_deref(&instr
->instr
, instr
->variables
[i
]);
287 constant_fold_tex_instr(nir_tex_instr
*instr
)
290 return constant_fold_deref(&instr
->instr
, instr
->sampler
);
296 constant_fold_block(nir_block
*block
, void *void_state
)
298 struct constant_fold_state
*state
= void_state
;
300 nir_foreach_instr_safe(block
, instr
) {
301 switch (instr
->type
) {
302 case nir_instr_type_alu
:
303 state
->progress
|= constant_fold_alu_instr(nir_instr_as_alu(instr
),
306 case nir_instr_type_intrinsic
:
308 constant_fold_intrinsic_instr(nir_instr_as_intrinsic(instr
));
310 case nir_instr_type_tex
:
311 state
->progress
|= constant_fold_tex_instr(nir_instr_as_tex(instr
));
314 /* Don't know how to constant fold */
323 nir_opt_constant_folding_impl(nir_function_impl
*impl
)
325 struct constant_fold_state state
;
327 state
.mem_ctx
= ralloc_parent(impl
);
329 state
.progress
= false;
331 nir_foreach_block(impl
, constant_fold_block
, &state
);
334 nir_metadata_preserve(impl
, nir_metadata_block_index
|
335 nir_metadata_dominance
);
337 return state
.progress
;
341 nir_opt_constant_folding(nir_shader
*shader
)
343 bool progress
= false;
345 nir_foreach_overload(shader
, overload
) {
347 progress
|= nir_opt_constant_folding_impl(overload
->impl
);