2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
31 * Implements common subexpression elimination
40 nir_alu_srcs_equal(nir_alu_src src1
, nir_alu_src src2
, uint8_t read_mask
)
42 if (src1
.abs
!= src2
.abs
|| src1
.negate
!= src2
.negate
)
45 for (int i
= 0; i
< 4; ++i
) {
46 if (!(read_mask
& (1 << i
)))
49 if (src1
.swizzle
[i
] != src2
.swizzle
[i
])
53 return nir_srcs_equal(src1
.src
, src2
.src
);
57 nir_instrs_equal(nir_instr
*instr1
, nir_instr
*instr2
)
59 if (instr1
->type
!= instr2
->type
)
62 switch (instr1
->type
) {
63 case nir_instr_type_alu
: {
64 nir_alu_instr
*alu1
= nir_instr_as_alu(instr1
);
65 nir_alu_instr
*alu2
= nir_instr_as_alu(instr2
);
67 if (alu1
->op
!= alu2
->op
)
70 /* TODO: We can probably acutally do something more inteligent such
71 * as allowing different numbers and taking a maximum or something
73 if (alu1
->dest
.dest
.ssa
.num_components
!= alu2
->dest
.dest
.ssa
.num_components
)
76 for (unsigned i
= 0; i
< nir_op_infos
[alu1
->op
].num_inputs
; i
++) {
77 if (!nir_alu_srcs_equal(alu1
->src
[i
], alu2
->src
[i
],
78 (1 << alu1
->dest
.dest
.ssa
.num_components
) - 1))
83 case nir_instr_type_tex
:
85 case nir_instr_type_load_const
: {
86 nir_load_const_instr
*load1
= nir_instr_as_load_const(instr1
);
87 nir_load_const_instr
*load2
= nir_instr_as_load_const(instr2
);
89 if (load1
->num_components
!= load2
->num_components
)
92 return memcmp(load1
->value
.f
, load2
->value
.f
,
93 load1
->num_components
* sizeof load2
->value
.f
) == 0;
95 case nir_instr_type_phi
: {
96 nir_phi_instr
*phi1
= nir_instr_as_phi(instr1
);
97 nir_phi_instr
*phi2
= nir_instr_as_phi(instr2
);
99 if (phi1
->instr
.block
!= phi2
->instr
.block
)
102 foreach_list_typed(nir_phi_src
, src1
, node
, &phi1
->srcs
) {
103 foreach_list_typed(nir_phi_src
, src2
, node
, &phi2
->srcs
) {
104 if (src1
->pred
== src2
->pred
) {
105 if (!nir_srcs_equal(src1
->src
, src2
->src
))
115 case nir_instr_type_intrinsic
:
116 case nir_instr_type_call
:
117 case nir_instr_type_jump
:
118 case nir_instr_type_ssa_undef
:
119 case nir_instr_type_parallel_copy
:
121 unreachable("Invalid instruction type");
128 src_is_ssa(nir_src
*src
, void *data
)
134 dest_is_ssa(nir_dest
*dest
, void *data
)
140 nir_instr_can_cse(nir_instr
*instr
)
142 switch (instr
->type
) {
143 case nir_instr_type_alu
:
144 case nir_instr_type_load_const
:
145 case nir_instr_type_phi
:
146 return nir_foreach_dest(instr
, dest_is_ssa
, NULL
) &&
147 nir_foreach_src(instr
, src_is_ssa
, NULL
);
148 case nir_instr_type_tex
:
149 return false; /* TODO */
150 case nir_instr_type_intrinsic
:
151 case nir_instr_type_call
:
152 case nir_instr_type_jump
:
153 case nir_instr_type_ssa_undef
:
155 case nir_instr_type_parallel_copy
:
157 unreachable("Invalid instruction type");
164 nir_instr_get_dest_ssa_def(nir_instr
*instr
)
166 switch (instr
->type
) {
167 case nir_instr_type_alu
:
168 assert(nir_instr_as_alu(instr
)->dest
.dest
.is_ssa
);
169 return &nir_instr_as_alu(instr
)->dest
.dest
.ssa
;
170 case nir_instr_type_load_const
:
171 assert(nir_instr_as_load_const(instr
)->dest
.is_ssa
);
172 return &nir_instr_as_load_const(instr
)->dest
.ssa
;
173 case nir_instr_type_phi
:
174 assert(nir_instr_as_phi(instr
)->dest
.is_ssa
);
175 return &nir_instr_as_phi(instr
)->dest
.ssa
;
177 unreachable("We never ask for any of these");
182 nir_opt_cse_instr(nir_instr
*instr
, struct cse_state
*state
)
184 if (!nir_instr_can_cse(instr
))
187 for (struct exec_node
*node
= instr
->node
.prev
;
188 !exec_node_is_head_sentinel(node
); node
= node
->prev
) {
189 nir_instr
*other
= exec_node_data(nir_instr
, node
, node
);
190 if (nir_instrs_equal(instr
, other
)) {
191 nir_src other_dest_src
= {
193 .ssa
= nir_instr_get_dest_ssa_def(other
),
195 nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr
),
196 other_dest_src
, state
->mem_ctx
);
197 nir_instr_remove(instr
);
198 state
->progress
= true;
203 for (nir_block
*block
= instr
->block
->imm_dom
;
204 block
!= NULL
; block
= block
->imm_dom
) {
205 nir_foreach_instr_reverse(block
, other
) {
206 if (nir_instrs_equal(instr
, other
)) {
207 nir_src other_dest_src
= {
209 .ssa
= nir_instr_get_dest_ssa_def(other
),
211 nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr
),
212 other_dest_src
, state
->mem_ctx
);
213 nir_instr_remove(instr
);
214 state
->progress
= true;
222 nir_opt_cse_block(nir_block
*block
, void *void_state
)
224 struct cse_state
*state
= void_state
;
226 nir_foreach_instr_safe(block
, instr
)
227 nir_opt_cse_instr(instr
, state
);
233 nir_opt_cse_impl(nir_function_impl
*impl
)
235 struct cse_state state
;
237 state
.mem_ctx
= ralloc_parent(impl
);
238 state
.progress
= false;
240 nir_metadata_require(impl
, nir_metadata_dominance
);
242 nir_foreach_block(impl
, nir_opt_cse_block
, &state
);
245 nir_metadata_preserve(impl
, nir_metadata_block_index
|
246 nir_metadata_dominance
);
248 return state
.progress
;
252 nir_opt_cse(nir_shader
*shader
)
254 bool progress
= false;
256 nir_foreach_overload(shader
, overload
) {
258 progress
|= nir_opt_cse_impl(overload
->impl
);