2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
28 #include "nir_constant_expressions.h"
32 * Implements SSA-based constant folding.
35 struct constant_fold_state
{
37 unsigned execution_mode
;
38 bool has_load_constant
;
39 bool has_indirect_load_const
;
43 constant_fold_alu_instr(struct constant_fold_state
*state
, nir_alu_instr
*instr
)
45 nir_const_value src
[NIR_MAX_VEC_COMPONENTS
][NIR_MAX_VEC_COMPONENTS
];
47 if (!instr
->dest
.dest
.is_ssa
)
50 /* In the case that any outputs/inputs have unsized types, then we need to
51 * guess the bit-size. In this case, the validator ensures that all
52 * bit-sizes match so we can just take the bit-size from first
53 * output/input with an unsized type. If all the outputs/inputs are sized
54 * then we don't need to guess the bit-size at all because the code we
55 * generate for constant opcodes in this case already knows the sizes of
56 * the types involved and does not need the provided bit-size for anything
57 * (although it still requires to receive a valid bit-size).
59 unsigned bit_size
= 0;
60 if (!nir_alu_type_get_type_size(nir_op_infos
[instr
->op
].output_type
))
61 bit_size
= instr
->dest
.dest
.ssa
.bit_size
;
63 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
64 if (!instr
->src
[i
].src
.is_ssa
)
68 !nir_alu_type_get_type_size(nir_op_infos
[instr
->op
].input_types
[i
]))
69 bit_size
= instr
->src
[i
].src
.ssa
->bit_size
;
71 nir_instr
*src_instr
= instr
->src
[i
].src
.ssa
->parent_instr
;
73 if (src_instr
->type
!= nir_instr_type_load_const
)
75 nir_load_const_instr
* load_const
= nir_instr_as_load_const(src_instr
);
77 for (unsigned j
= 0; j
< nir_ssa_alu_instr_src_components(instr
, i
);
79 src
[i
][j
] = load_const
->value
[instr
->src
[i
].swizzle
[j
]];
82 /* We shouldn't have any source modifiers in the optimization loop. */
83 assert(!instr
->src
[i
].abs
&& !instr
->src
[i
].negate
);
89 /* We shouldn't have any saturate modifiers in the optimization loop. */
90 assert(!instr
->dest
.saturate
);
92 nir_const_value dest
[NIR_MAX_VEC_COMPONENTS
];
93 nir_const_value
*srcs
[NIR_MAX_VEC_COMPONENTS
];
94 memset(dest
, 0, sizeof(dest
));
95 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; ++i
)
97 nir_eval_const_opcode(instr
->op
, dest
, instr
->dest
.dest
.ssa
.num_components
,
98 bit_size
, srcs
, state
->execution_mode
);
100 nir_load_const_instr
*new_instr
=
101 nir_load_const_instr_create(state
->shader
,
102 instr
->dest
.dest
.ssa
.num_components
,
103 instr
->dest
.dest
.ssa
.bit_size
);
105 memcpy(new_instr
->value
, dest
, sizeof(*new_instr
->value
) * new_instr
->def
.num_components
);
107 nir_instr_insert_before(&instr
->instr
, &new_instr
->instr
);
109 nir_ssa_def_rewrite_uses(&instr
->dest
.dest
.ssa
,
110 nir_src_for_ssa(&new_instr
->def
));
112 nir_instr_remove(&instr
->instr
);
119 constant_fold_intrinsic_instr(struct constant_fold_state
*state
, nir_intrinsic_instr
*instr
)
121 bool progress
= false;
123 if ((instr
->intrinsic
== nir_intrinsic_demote_if
||
124 instr
->intrinsic
== nir_intrinsic_discard_if
) &&
125 nir_src_is_const(instr
->src
[0])) {
126 if (nir_src_as_bool(instr
->src
[0])) {
127 nir_intrinsic_op op
= instr
->intrinsic
== nir_intrinsic_discard_if
?
128 nir_intrinsic_discard
:
129 nir_intrinsic_demote
;
130 nir_intrinsic_instr
*new_instr
= nir_intrinsic_instr_create(state
->shader
, op
);
131 nir_instr_insert_before(&instr
->instr
, &new_instr
->instr
);
132 nir_instr_remove(&instr
->instr
);
135 /* We're not discarding, just delete the instruction */
136 nir_instr_remove(&instr
->instr
);
139 } else if (instr
->intrinsic
== nir_intrinsic_load_constant
) {
140 state
->has_load_constant
= true;
142 if (!nir_src_is_const(instr
->src
[0])) {
143 state
->has_indirect_load_const
= true;
147 unsigned offset
= nir_src_as_uint(instr
->src
[0]);
148 unsigned base
= nir_intrinsic_base(instr
);
149 unsigned range
= nir_intrinsic_range(instr
);
150 assert(base
+ range
<= state
->shader
->constant_data_size
);
152 nir_instr
*new_instr
= NULL
;
153 if (offset
>= range
) {
154 nir_ssa_undef_instr
*undef
=
155 nir_ssa_undef_instr_create(state
->shader
,
156 instr
->num_components
,
157 instr
->dest
.ssa
.bit_size
);
159 nir_ssa_def_rewrite_uses(&instr
->dest
.ssa
, nir_src_for_ssa(&undef
->def
));
160 new_instr
= &undef
->instr
;
162 nir_load_const_instr
*load_const
=
163 nir_load_const_instr_create(state
->shader
,
164 instr
->num_components
,
165 instr
->dest
.ssa
.bit_size
);
167 uint8_t *data
= (uint8_t*)state
->shader
->constant_data
+ base
;
168 for (unsigned i
= 0; i
< instr
->num_components
; i
++) {
169 unsigned bytes
= instr
->dest
.ssa
.bit_size
/ 8;
170 bytes
= MIN2(bytes
, range
- offset
);
172 memcpy(&load_const
->value
[i
].u64
, data
+ offset
, bytes
);
176 nir_ssa_def_rewrite_uses(&instr
->dest
.ssa
, nir_src_for_ssa(&load_const
->def
));
177 new_instr
= &load_const
->instr
;
180 nir_instr_insert_before(&instr
->instr
, new_instr
);
181 nir_instr_remove(&instr
->instr
);
189 constant_fold_block(struct constant_fold_state
*state
, nir_block
*block
)
191 bool progress
= false;
193 nir_foreach_instr_safe(instr
, block
) {
194 switch (instr
->type
) {
195 case nir_instr_type_alu
:
196 progress
|= constant_fold_alu_instr(state
, nir_instr_as_alu(instr
));
198 case nir_instr_type_intrinsic
:
200 constant_fold_intrinsic_instr(state
, nir_instr_as_intrinsic(instr
));
203 /* Don't know how to constant fold */
212 nir_opt_constant_folding_impl(struct constant_fold_state
*state
, nir_function_impl
*impl
)
214 bool progress
= false;
216 nir_foreach_block(block
, impl
) {
217 progress
|= constant_fold_block(state
, block
);
221 nir_metadata_preserve(impl
, nir_metadata_block_index
|
222 nir_metadata_dominance
);
224 nir_metadata_preserve(impl
, nir_metadata_all
);
231 nir_opt_constant_folding(nir_shader
*shader
)
233 bool progress
= false;
234 struct constant_fold_state state
;
235 state
.shader
= shader
;
236 state
.execution_mode
= shader
->info
.float_controls_execution_mode
;
237 state
.has_load_constant
= false;
238 state
.has_indirect_load_const
= false;
240 nir_foreach_function(function
, shader
) {
242 progress
|= nir_opt_constant_folding_impl(&state
, function
->impl
);
245 /* This doesn't free the constant data if there are no constant loads because
246 * the data might still be used but the loads have been lowered to load_ubo
248 if (state
.has_load_constant
&& !state
.has_indirect_load_const
&&
249 shader
->constant_data_size
) {
250 ralloc_free(shader
->constant_data
);
251 shader
->constant_data
= NULL
;
252 shader
->constant_data_size
= 0;