2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
31 * Implements common subexpression elimination
40 nir_alu_srcs_equal(nir_alu_src src1
, nir_alu_src src2
, uint8_t read_mask
)
42 if (src1
.abs
!= src2
.abs
|| src1
.negate
!= src2
.negate
)
45 for (int i
= 0; i
< 4; ++i
) {
46 if (!(read_mask
& (1 << i
)))
49 if (src1
.swizzle
[i
] != src2
.swizzle
[i
])
53 return nir_srcs_equal(src1
.src
, src2
.src
);
57 nir_instrs_equal(nir_instr
*instr1
, nir_instr
*instr2
)
59 if (instr1
->type
!= instr2
->type
)
62 switch (instr1
->type
) {
63 case nir_instr_type_alu
: {
64 nir_alu_instr
*alu1
= nir_instr_as_alu(instr1
);
65 nir_alu_instr
*alu2
= nir_instr_as_alu(instr2
);
67 if (alu1
->op
!= alu2
->op
)
70 /* TODO: We can probably acutally do something more inteligent such
71 * as allowing different numbers and taking a maximum or something
73 if (alu1
->dest
.dest
.ssa
.num_components
!= alu2
->dest
.dest
.ssa
.num_components
)
76 for (unsigned i
= 0; i
< nir_op_infos
[alu1
->op
].num_inputs
; i
++) {
77 if (!nir_alu_srcs_equal(alu1
->src
[i
], alu2
->src
[i
],
78 (1 << alu1
->dest
.dest
.ssa
.num_components
) - 1))
83 case nir_instr_type_tex
:
85 case nir_instr_type_load_const
: {
86 nir_load_const_instr
*load1
= nir_instr_as_load_const(instr1
);
87 nir_load_const_instr
*load2
= nir_instr_as_load_const(instr2
);
89 if (load1
->def
.num_components
!= load2
->def
.num_components
)
92 return memcmp(load1
->value
.f
, load2
->value
.f
,
93 load1
->def
.num_components
* sizeof(*load2
->value
.f
)) == 0;
95 case nir_instr_type_phi
: {
96 nir_phi_instr
*phi1
= nir_instr_as_phi(instr1
);
97 nir_phi_instr
*phi2
= nir_instr_as_phi(instr2
);
99 if (phi1
->instr
.block
!= phi2
->instr
.block
)
102 nir_foreach_phi_src(phi1
, src1
) {
103 nir_foreach_phi_src(phi2
, src2
) {
104 if (src1
->pred
== src2
->pred
) {
105 if (!nir_srcs_equal(src1
->src
, src2
->src
))
115 case nir_instr_type_intrinsic
: {
116 nir_intrinsic_instr
*intrinsic1
= nir_instr_as_intrinsic(instr1
);
117 nir_intrinsic_instr
*intrinsic2
= nir_instr_as_intrinsic(instr2
);
118 const nir_intrinsic_info
*info
=
119 &nir_intrinsic_infos
[intrinsic1
->intrinsic
];
121 if (intrinsic1
->intrinsic
!= intrinsic2
->intrinsic
||
122 intrinsic1
->num_components
!= intrinsic2
->num_components
)
125 if (info
->has_dest
&& intrinsic1
->dest
.ssa
.num_components
!=
126 intrinsic2
->dest
.ssa
.num_components
)
129 for (unsigned i
= 0; i
< info
->num_srcs
; i
++) {
130 if (!nir_srcs_equal(intrinsic1
->src
[i
], intrinsic2
->src
[i
]))
134 assert(info
->num_variables
== 0);
136 for (unsigned i
= 0; i
< info
->num_indices
; i
++) {
137 if (intrinsic1
->const_index
[i
] != intrinsic2
->const_index
[i
])
143 case nir_instr_type_call
:
144 case nir_instr_type_jump
:
145 case nir_instr_type_ssa_undef
:
146 case nir_instr_type_parallel_copy
:
148 unreachable("Invalid instruction type");
155 src_is_ssa(nir_src
*src
, void *data
)
161 dest_is_ssa(nir_dest
*dest
, void *data
)
167 nir_instr_can_cse(nir_instr
*instr
)
169 /* We only handle SSA. */
170 if (!nir_foreach_dest(instr
, dest_is_ssa
, NULL
) ||
171 !nir_foreach_src(instr
, src_is_ssa
, NULL
))
174 switch (instr
->type
) {
175 case nir_instr_type_alu
:
176 case nir_instr_type_load_const
:
177 case nir_instr_type_phi
:
179 case nir_instr_type_tex
:
180 return false; /* TODO */
181 case nir_instr_type_intrinsic
: {
182 const nir_intrinsic_info
*info
=
183 &nir_intrinsic_infos
[nir_instr_as_intrinsic(instr
)->intrinsic
];
184 return (info
->flags
& NIR_INTRINSIC_CAN_ELIMINATE
) &&
185 (info
->flags
& NIR_INTRINSIC_CAN_REORDER
) &&
186 info
->num_variables
== 0; /* not implemented yet */
188 case nir_instr_type_call
:
189 case nir_instr_type_jump
:
190 case nir_instr_type_ssa_undef
:
192 case nir_instr_type_parallel_copy
:
194 unreachable("Invalid instruction type");
201 nir_instr_get_dest_ssa_def(nir_instr
*instr
)
203 switch (instr
->type
) {
204 case nir_instr_type_alu
:
205 assert(nir_instr_as_alu(instr
)->dest
.dest
.is_ssa
);
206 return &nir_instr_as_alu(instr
)->dest
.dest
.ssa
;
207 case nir_instr_type_load_const
:
208 return &nir_instr_as_load_const(instr
)->def
;
209 case nir_instr_type_phi
:
210 assert(nir_instr_as_phi(instr
)->dest
.is_ssa
);
211 return &nir_instr_as_phi(instr
)->dest
.ssa
;
212 case nir_instr_type_intrinsic
:
213 assert(nir_instr_as_intrinsic(instr
)->dest
.is_ssa
);
214 return &nir_instr_as_intrinsic(instr
)->dest
.ssa
;
216 unreachable("We never ask for any of these");
221 nir_opt_cse_instr(nir_instr
*instr
, struct cse_state
*state
)
223 if (!nir_instr_can_cse(instr
))
226 for (struct exec_node
*node
= instr
->node
.prev
;
227 !exec_node_is_head_sentinel(node
); node
= node
->prev
) {
228 nir_instr
*other
= exec_node_data(nir_instr
, node
, node
);
229 if (nir_instrs_equal(instr
, other
)) {
230 nir_ssa_def
*other_def
= nir_instr_get_dest_ssa_def(other
);
231 nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr
),
232 nir_src_for_ssa(other_def
),
234 nir_instr_remove(instr
);
235 state
->progress
= true;
240 for (nir_block
*block
= instr
->block
->imm_dom
;
241 block
!= NULL
; block
= block
->imm_dom
) {
242 nir_foreach_instr_reverse(block
, other
) {
243 if (nir_instrs_equal(instr
, other
)) {
244 nir_ssa_def
*other_def
= nir_instr_get_dest_ssa_def(other
);
245 nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr
),
246 nir_src_for_ssa(other_def
),
248 nir_instr_remove(instr
);
249 state
->progress
= true;
257 nir_opt_cse_block(nir_block
*block
, void *void_state
)
259 struct cse_state
*state
= void_state
;
261 nir_foreach_instr_safe(block
, instr
)
262 nir_opt_cse_instr(instr
, state
);
268 nir_opt_cse_impl(nir_function_impl
*impl
)
270 struct cse_state state
;
272 state
.mem_ctx
= ralloc_parent(impl
);
273 state
.progress
= false;
275 nir_metadata_require(impl
, nir_metadata_dominance
);
277 nir_foreach_block(impl
, nir_opt_cse_block
, &state
);
280 nir_metadata_preserve(impl
, nir_metadata_block_index
|
281 nir_metadata_dominance
);
283 return state
.progress
;
287 nir_opt_cse(nir_shader
*shader
)
289 bool progress
= false;
291 nir_foreach_overload(shader
, overload
) {
293 progress
|= nir_opt_cse_impl(overload
->impl
);