2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
31 * Implements common subexpression elimination
40 nir_alu_srcs_equal(nir_alu_instr
*alu1
, nir_alu_instr
*alu2
, unsigned src1
,
43 if (alu1
->src
[src1
].abs
!= alu2
->src
[src2
].abs
||
44 alu1
->src
[src1
].negate
!= alu2
->src
[src2
].negate
)
47 for (unsigned i
= 0; i
< nir_ssa_alu_instr_src_components(alu1
, src1
); i
++) {
48 if (alu1
->src
[src1
].swizzle
[i
] != alu2
->src
[src2
].swizzle
[i
])
52 return nir_srcs_equal(alu1
->src
[src1
].src
, alu2
->src
[src2
].src
);
56 nir_instrs_equal(nir_instr
*instr1
, nir_instr
*instr2
)
58 if (instr1
->type
!= instr2
->type
)
61 switch (instr1
->type
) {
62 case nir_instr_type_alu
: {
63 nir_alu_instr
*alu1
= nir_instr_as_alu(instr1
);
64 nir_alu_instr
*alu2
= nir_instr_as_alu(instr2
);
66 if (alu1
->op
!= alu2
->op
)
69 /* TODO: We can probably acutally do something more inteligent such
70 * as allowing different numbers and taking a maximum or something
72 if (alu1
->dest
.dest
.ssa
.num_components
!= alu2
->dest
.dest
.ssa
.num_components
)
75 if (nir_op_infos
[alu1
->op
].algebraic_properties
& NIR_OP_IS_COMMUTATIVE
) {
76 assert(nir_op_infos
[alu1
->op
].num_inputs
== 2);
77 return (nir_alu_srcs_equal(alu1
, alu2
, 0, 0) &&
78 nir_alu_srcs_equal(alu1
, alu2
, 1, 1)) ||
79 (nir_alu_srcs_equal(alu1
, alu2
, 0, 1) &&
80 nir_alu_srcs_equal(alu1
, alu2
, 1, 0));
82 for (unsigned i
= 0; i
< nir_op_infos
[alu1
->op
].num_inputs
; i
++) {
83 if (!nir_alu_srcs_equal(alu1
, alu2
, i
, i
))
89 case nir_instr_type_tex
:
91 case nir_instr_type_load_const
: {
92 nir_load_const_instr
*load1
= nir_instr_as_load_const(instr1
);
93 nir_load_const_instr
*load2
= nir_instr_as_load_const(instr2
);
95 if (load1
->def
.num_components
!= load2
->def
.num_components
)
98 return memcmp(load1
->value
.f
, load2
->value
.f
,
99 load1
->def
.num_components
* sizeof(*load2
->value
.f
)) == 0;
101 case nir_instr_type_phi
: {
102 nir_phi_instr
*phi1
= nir_instr_as_phi(instr1
);
103 nir_phi_instr
*phi2
= nir_instr_as_phi(instr2
);
105 if (phi1
->instr
.block
!= phi2
->instr
.block
)
108 nir_foreach_phi_src(phi1
, src1
) {
109 nir_foreach_phi_src(phi2
, src2
) {
110 if (src1
->pred
== src2
->pred
) {
111 if (!nir_srcs_equal(src1
->src
, src2
->src
))
121 case nir_instr_type_intrinsic
: {
122 nir_intrinsic_instr
*intrinsic1
= nir_instr_as_intrinsic(instr1
);
123 nir_intrinsic_instr
*intrinsic2
= nir_instr_as_intrinsic(instr2
);
124 const nir_intrinsic_info
*info
=
125 &nir_intrinsic_infos
[intrinsic1
->intrinsic
];
127 if (intrinsic1
->intrinsic
!= intrinsic2
->intrinsic
||
128 intrinsic1
->num_components
!= intrinsic2
->num_components
)
131 if (info
->has_dest
&& intrinsic1
->dest
.ssa
.num_components
!=
132 intrinsic2
->dest
.ssa
.num_components
)
135 for (unsigned i
= 0; i
< info
->num_srcs
; i
++) {
136 if (!nir_srcs_equal(intrinsic1
->src
[i
], intrinsic2
->src
[i
]))
140 assert(info
->num_variables
== 0);
142 for (unsigned i
= 0; i
< info
->num_indices
; i
++) {
143 if (intrinsic1
->const_index
[i
] != intrinsic2
->const_index
[i
])
149 case nir_instr_type_call
:
150 case nir_instr_type_jump
:
151 case nir_instr_type_ssa_undef
:
152 case nir_instr_type_parallel_copy
:
154 unreachable("Invalid instruction type");
161 src_is_ssa(nir_src
*src
, void *data
)
168 dest_is_ssa(nir_dest
*dest
, void *data
)
175 nir_instr_can_cse(nir_instr
*instr
)
177 /* We only handle SSA. */
178 if (!nir_foreach_dest(instr
, dest_is_ssa
, NULL
) ||
179 !nir_foreach_src(instr
, src_is_ssa
, NULL
))
182 switch (instr
->type
) {
183 case nir_instr_type_alu
:
184 case nir_instr_type_load_const
:
185 case nir_instr_type_phi
:
187 case nir_instr_type_tex
:
188 return false; /* TODO */
189 case nir_instr_type_intrinsic
: {
190 const nir_intrinsic_info
*info
=
191 &nir_intrinsic_infos
[nir_instr_as_intrinsic(instr
)->intrinsic
];
192 return (info
->flags
& NIR_INTRINSIC_CAN_ELIMINATE
) &&
193 (info
->flags
& NIR_INTRINSIC_CAN_REORDER
) &&
194 info
->num_variables
== 0; /* not implemented yet */
196 case nir_instr_type_call
:
197 case nir_instr_type_jump
:
198 case nir_instr_type_ssa_undef
:
200 case nir_instr_type_parallel_copy
:
202 unreachable("Invalid instruction type");
209 nir_instr_get_dest_ssa_def(nir_instr
*instr
)
211 switch (instr
->type
) {
212 case nir_instr_type_alu
:
213 assert(nir_instr_as_alu(instr
)->dest
.dest
.is_ssa
);
214 return &nir_instr_as_alu(instr
)->dest
.dest
.ssa
;
215 case nir_instr_type_load_const
:
216 return &nir_instr_as_load_const(instr
)->def
;
217 case nir_instr_type_phi
:
218 assert(nir_instr_as_phi(instr
)->dest
.is_ssa
);
219 return &nir_instr_as_phi(instr
)->dest
.ssa
;
220 case nir_instr_type_intrinsic
:
221 assert(nir_instr_as_intrinsic(instr
)->dest
.is_ssa
);
222 return &nir_instr_as_intrinsic(instr
)->dest
.ssa
;
224 unreachable("We never ask for any of these");
229 nir_opt_cse_instr(nir_instr
*instr
, struct cse_state
*state
)
231 if (!nir_instr_can_cse(instr
))
234 for (struct exec_node
*node
= instr
->node
.prev
;
235 !exec_node_is_head_sentinel(node
); node
= node
->prev
) {
236 nir_instr
*other
= exec_node_data(nir_instr
, node
, node
);
237 if (nir_instrs_equal(instr
, other
)) {
238 nir_ssa_def
*other_def
= nir_instr_get_dest_ssa_def(other
);
239 nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr
),
240 nir_src_for_ssa(other_def
),
242 nir_instr_remove(instr
);
243 state
->progress
= true;
248 for (nir_block
*block
= instr
->block
->imm_dom
;
249 block
!= NULL
; block
= block
->imm_dom
) {
250 nir_foreach_instr_reverse(block
, other
) {
251 if (nir_instrs_equal(instr
, other
)) {
252 nir_ssa_def
*other_def
= nir_instr_get_dest_ssa_def(other
);
253 nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr
),
254 nir_src_for_ssa(other_def
),
256 nir_instr_remove(instr
);
257 state
->progress
= true;
265 nir_opt_cse_block(nir_block
*block
, void *void_state
)
267 struct cse_state
*state
= void_state
;
269 nir_foreach_instr_safe(block
, instr
)
270 nir_opt_cse_instr(instr
, state
);
276 nir_opt_cse_impl(nir_function_impl
*impl
)
278 struct cse_state state
;
280 state
.mem_ctx
= ralloc_parent(impl
);
281 state
.progress
= false;
283 nir_metadata_require(impl
, nir_metadata_dominance
);
285 nir_foreach_block(impl
, nir_opt_cse_block
, &state
);
288 nir_metadata_preserve(impl
, nir_metadata_block_index
|
289 nir_metadata_dominance
);
291 return state
.progress
;
295 nir_opt_cse(nir_shader
*shader
)
297 bool progress
= false;
299 nir_foreach_overload(shader
, overload
) {
301 progress
|= nir_opt_cse_impl(overload
->impl
);