2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
32 * Implements SSA-based constant folding.
35 struct constant_fold_state
{
37 nir_function_impl
*impl
;
41 #define SRC_COMP(T, IDX, CMP) src[IDX]->value.T[instr->src[IDX].swizzle[CMP]]
42 #define SRC(T, IDX) SRC_COMP(T, IDX, i)
43 #define DEST_COMP(T, CMP) dest->value.T[CMP]
44 #define DEST(T) DEST_COMP(T, i)
46 #define FOLD_PER_COMP(EXPR) \
47 for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; i++) { \
52 constant_fold_alu_instr(nir_alu_instr
*instr
, void *mem_ctx
)
54 nir_load_const_instr
*src
[4], *dest
;
56 if (!instr
->dest
.dest
.is_ssa
)
59 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
60 if (!instr
->src
[i
].src
.is_ssa
)
63 if (instr
->src
[i
].src
.ssa
->parent_instr
->type
!= nir_instr_type_load_const
)
66 /* We shouldn't have any source modifiers in the optimization loop. */
67 assert(!instr
->src
[i
].abs
&& !instr
->src
[i
].negate
);
69 src
[i
] = nir_instr_as_load_const(instr
->src
[i
].src
.ssa
->parent_instr
);
72 /* We shouldn't have any saturate modifiers in the optimization loop. */
73 assert(!instr
->dest
.saturate
);
75 dest
= nir_load_const_instr_create(mem_ctx
,
76 instr
->dest
.dest
.ssa
.num_components
);
80 FOLD_PER_COMP(DEST(i
) = -SRC(i
, 0));
83 FOLD_PER_COMP(DEST(f
) = -SRC(f
, 0));
86 FOLD_PER_COMP(DEST(i
) = ~SRC(i
, 0));
89 FOLD_PER_COMP(DEST(f
) = (SRC(f
, 0) == 0.0f
) ? 1.0f
: 0.0f
);
92 FOLD_PER_COMP(DEST(f
) = 1.0f
/ SRC(f
, 0));
95 FOLD_PER_COMP(DEST(f
) = 1.0f
/ sqrt(SRC(f
, 0)));
98 FOLD_PER_COMP(DEST(f
) = sqrtf(SRC(f
, 0)));
101 FOLD_PER_COMP(DEST(f
) = expf(SRC(f
, 0)));
104 FOLD_PER_COMP(DEST(f
) = logf(SRC(f
, 0)));
107 FOLD_PER_COMP(DEST(f
) = exp2f(SRC(f
, 0)));
110 FOLD_PER_COMP(DEST(f
) = log2f(SRC(f
, 0)));
113 FOLD_PER_COMP(DEST(i
) = SRC(f
, 0));
116 FOLD_PER_COMP(DEST(u
) = SRC(f
, 0));
119 FOLD_PER_COMP(DEST(f
) = SRC(i
, 0));
122 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) == 0.0f
) ? NIR_FALSE
: NIR_TRUE
);
125 FOLD_PER_COMP(DEST(f
) = SRC(u
, 0) ? 1.0f
: 0.0f
);
128 FOLD_PER_COMP(DEST(u
) = SRC(i
, 0) ? NIR_TRUE
: NIR_FALSE
);
131 FOLD_PER_COMP(DEST(f
) = SRC(u
, 0));
134 DEST_COMP(u
, 0) = (SRC_COMP(u
, 0, 0) || SRC_COMP(u
, 0, 1)) ?
135 NIR_TRUE
: NIR_FALSE
;
138 FOLD_PER_COMP(DEST(f
) = SRC(f
, 0) + SRC(f
, 1));
141 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) + SRC(i
, 1));
144 FOLD_PER_COMP(DEST(f
) = SRC(f
, 0) - SRC(f
, 1));
147 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) - SRC(i
, 1));
150 FOLD_PER_COMP(DEST(f
) = SRC(f
, 0) * SRC(f
, 1));
153 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) * SRC(i
, 1));
156 FOLD_PER_COMP(DEST(f
) = SRC(f
, 0) / SRC(f
, 1));
159 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) / SRC(i
, 1));
162 FOLD_PER_COMP(DEST(u
) = SRC(u
, 0) / SRC(u
, 1));
165 FOLD_PER_COMP(DEST(u
) = (SRC(f
, 0) < SRC(f
, 1)) ? NIR_TRUE
: NIR_FALSE
);
168 FOLD_PER_COMP(DEST(u
) = (SRC(f
, 0) >= SRC(f
, 1)) ? NIR_TRUE
: NIR_FALSE
);
171 FOLD_PER_COMP(DEST(u
) = (SRC(f
, 0) == SRC(f
, 1)) ? NIR_TRUE
: NIR_FALSE
);
174 FOLD_PER_COMP(DEST(u
) = (SRC(f
, 0) != SRC(f
, 1)) ? NIR_TRUE
: NIR_FALSE
);
177 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) < SRC(i
, 1)) ? NIR_TRUE
: NIR_FALSE
);
180 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) >= SRC(i
, 1)) ? NIR_TRUE
: NIR_FALSE
);
183 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) == SRC(i
, 1)) ? NIR_TRUE
: NIR_FALSE
);
186 FOLD_PER_COMP(DEST(u
) = (SRC(i
, 0) != SRC(i
, 1)) ? NIR_TRUE
: NIR_FALSE
);
189 FOLD_PER_COMP(DEST(u
) = (SRC(u
, 0) < SRC(u
, 1)) ? NIR_TRUE
: NIR_FALSE
);
192 FOLD_PER_COMP(DEST(u
) = (SRC(u
, 0) >= SRC(u
, 1)) ? NIR_TRUE
: NIR_FALSE
);
195 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) << SRC(i
, 1));
198 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) >> SRC(i
, 1));
201 FOLD_PER_COMP(DEST(u
) = SRC(u
, 0) >> SRC(u
, 1));
204 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) & SRC(i
, 1));
207 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) | SRC(i
, 1));
210 FOLD_PER_COMP(DEST(i
) = SRC(i
, 0) ^ SRC(i
, 1));
217 nir_instr_insert_before(&instr
->instr
, &dest
->instr
);
224 nir_ssa_def_rewrite_uses(&instr
->dest
.dest
.ssa
, new_src
, mem_ctx
);
226 nir_instr_remove(&instr
->instr
);
233 constant_fold_deref(nir_instr
*instr
, nir_deref_var
*deref
)
235 bool progress
= false;
237 for (nir_deref
*tail
= deref
->deref
.child
; tail
; tail
= tail
->child
) {
238 if (tail
->deref_type
!= nir_deref_type_array
)
241 nir_deref_array
*arr
= nir_deref_as_array(tail
);
243 if (arr
->deref_array_type
== nir_deref_array_type_indirect
&&
244 arr
->indirect
.is_ssa
&&
245 arr
->indirect
.ssa
->parent_instr
->type
== nir_instr_type_load_const
) {
246 nir_load_const_instr
*indirect
=
247 nir_instr_as_load_const(arr
->indirect
.ssa
->parent_instr
);
249 arr
->base_offset
+= indirect
->value
.u
[0];
256 nir_instr_rewrite_src(instr
, &arr
->indirect
, empty
);
258 arr
->deref_array_type
= nir_deref_array_type_direct
;
268 constant_fold_intrinsic_instr(nir_intrinsic_instr
*instr
)
270 bool progress
= false;
272 unsigned num_vars
= nir_intrinsic_infos
[instr
->intrinsic
].num_variables
;
273 for (unsigned i
= 0; i
< num_vars
; i
++) {
274 progress
|= constant_fold_deref(&instr
->instr
, instr
->variables
[i
]);
281 constant_fold_tex_instr(nir_tex_instr
*instr
)
284 return constant_fold_deref(&instr
->instr
, instr
->sampler
);
290 constant_fold_block(nir_block
*block
, void *void_state
)
292 struct constant_fold_state
*state
= void_state
;
294 nir_foreach_instr_safe(block
, instr
) {
295 switch (instr
->type
) {
296 case nir_instr_type_alu
:
297 state
->progress
|= constant_fold_alu_instr(nir_instr_as_alu(instr
),
300 case nir_instr_type_intrinsic
:
302 constant_fold_intrinsic_instr(nir_instr_as_intrinsic(instr
));
304 case nir_instr_type_tex
:
305 state
->progress
|= constant_fold_tex_instr(nir_instr_as_tex(instr
));
308 /* Don't know how to constant fold */
317 nir_opt_constant_folding_impl(nir_function_impl
*impl
)
319 struct constant_fold_state state
;
321 state
.mem_ctx
= ralloc_parent(impl
);
323 state
.progress
= false;
325 nir_foreach_block(impl
, constant_fold_block
, &state
);
328 nir_metadata_preserve(impl
, nir_metadata_block_index
|
329 nir_metadata_dominance
);
331 return state
.progress
;
335 nir_opt_constant_folding(nir_shader
*shader
)
337 bool progress
= false;
339 nir_foreach_overload(shader
, overload
) {
341 progress
|= nir_opt_constant_folding_impl(overload
->impl
);