2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
31 * Implements common subexpression elimination
40 nir_alu_srcs_equal(nir_alu_instr
*alu1
, nir_alu_instr
*alu2
, unsigned src1
,
43 if (alu1
->src
[src1
].abs
!= alu2
->src
[src2
].abs
||
44 alu1
->src
[src1
].negate
!= alu2
->src
[src2
].negate
)
47 for (unsigned i
= 0; i
< nir_ssa_alu_instr_src_components(alu1
, src1
); i
++) {
48 if (alu1
->src
[src1
].swizzle
[i
] != alu2
->src
[src2
].swizzle
[i
])
52 return nir_srcs_equal(alu1
->src
[src1
].src
, alu2
->src
[src2
].src
);
56 nir_instrs_equal(nir_instr
*instr1
, nir_instr
*instr2
)
58 if (instr1
->type
!= instr2
->type
)
61 switch (instr1
->type
) {
62 case nir_instr_type_alu
: {
63 nir_alu_instr
*alu1
= nir_instr_as_alu(instr1
);
64 nir_alu_instr
*alu2
= nir_instr_as_alu(instr2
);
66 if (alu1
->op
!= alu2
->op
)
69 /* TODO: We can probably acutally do something more inteligent such
70 * as allowing different numbers and taking a maximum or something
72 if (alu1
->dest
.dest
.ssa
.num_components
!= alu2
->dest
.dest
.ssa
.num_components
)
75 if (nir_op_infos
[alu1
->op
].algebraic_properties
& NIR_OP_IS_COMMUTATIVE
) {
76 assert(nir_op_infos
[alu1
->op
].num_inputs
== 2);
77 return (nir_alu_srcs_equal(alu1
, alu2
, 0, 0) &&
78 nir_alu_srcs_equal(alu1
, alu2
, 1, 1)) ||
79 (nir_alu_srcs_equal(alu1
, alu2
, 0, 1) &&
80 nir_alu_srcs_equal(alu1
, alu2
, 1, 0));
82 for (unsigned i
= 0; i
< nir_op_infos
[alu1
->op
].num_inputs
; i
++) {
83 if (!nir_alu_srcs_equal(alu1
, alu2
, i
, i
))
89 case nir_instr_type_tex
: {
90 nir_tex_instr
*tex1
= nir_instr_as_tex(instr1
);
91 nir_tex_instr
*tex2
= nir_instr_as_tex(instr2
);
93 if (tex1
->op
!= tex2
->op
)
96 if (tex1
->num_srcs
!= tex2
->num_srcs
)
98 for (unsigned i
= 0; i
< tex1
->num_srcs
; i
++) {
99 if (tex1
->src
[i
].src_type
!= tex2
->src
[i
].src_type
||
100 !nir_srcs_equal(tex1
->src
[i
].src
, tex2
->src
[i
].src
)) {
105 if (tex1
->coord_components
!= tex2
->coord_components
||
106 tex1
->sampler_dim
!= tex2
->sampler_dim
||
107 tex1
->is_array
!= tex2
->is_array
||
108 tex1
->is_shadow
!= tex2
->is_shadow
||
109 tex1
->is_new_style_shadow
!= tex2
->is_new_style_shadow
||
110 memcmp(tex1
->const_offset
, tex2
->const_offset
,
111 sizeof(tex1
->const_offset
)) != 0 ||
112 tex1
->component
!= tex2
->component
||
113 tex1
->sampler_index
!= tex2
->sampler_index
||
114 tex1
->sampler_array_size
!= tex2
->sampler_array_size
) {
118 /* Don't support un-lowered sampler derefs currently. */
119 if (tex1
->sampler
|| tex2
->sampler
)
124 case nir_instr_type_load_const
: {
125 nir_load_const_instr
*load1
= nir_instr_as_load_const(instr1
);
126 nir_load_const_instr
*load2
= nir_instr_as_load_const(instr2
);
128 if (load1
->def
.num_components
!= load2
->def
.num_components
)
131 return memcmp(load1
->value
.f
, load2
->value
.f
,
132 load1
->def
.num_components
* sizeof(*load2
->value
.f
)) == 0;
134 case nir_instr_type_phi
: {
135 nir_phi_instr
*phi1
= nir_instr_as_phi(instr1
);
136 nir_phi_instr
*phi2
= nir_instr_as_phi(instr2
);
138 if (phi1
->instr
.block
!= phi2
->instr
.block
)
141 nir_foreach_phi_src(phi1
, src1
) {
142 nir_foreach_phi_src(phi2
, src2
) {
143 if (src1
->pred
== src2
->pred
) {
144 if (!nir_srcs_equal(src1
->src
, src2
->src
))
154 case nir_instr_type_intrinsic
: {
155 nir_intrinsic_instr
*intrinsic1
= nir_instr_as_intrinsic(instr1
);
156 nir_intrinsic_instr
*intrinsic2
= nir_instr_as_intrinsic(instr2
);
157 const nir_intrinsic_info
*info
=
158 &nir_intrinsic_infos
[intrinsic1
->intrinsic
];
160 if (intrinsic1
->intrinsic
!= intrinsic2
->intrinsic
||
161 intrinsic1
->num_components
!= intrinsic2
->num_components
)
164 if (info
->has_dest
&& intrinsic1
->dest
.ssa
.num_components
!=
165 intrinsic2
->dest
.ssa
.num_components
)
168 for (unsigned i
= 0; i
< info
->num_srcs
; i
++) {
169 if (!nir_srcs_equal(intrinsic1
->src
[i
], intrinsic2
->src
[i
]))
173 assert(info
->num_variables
== 0);
175 for (unsigned i
= 0; i
< info
->num_indices
; i
++) {
176 if (intrinsic1
->const_index
[i
] != intrinsic2
->const_index
[i
])
182 case nir_instr_type_call
:
183 case nir_instr_type_jump
:
184 case nir_instr_type_ssa_undef
:
185 case nir_instr_type_parallel_copy
:
187 unreachable("Invalid instruction type");
194 src_is_ssa(nir_src
*src
, void *data
)
201 dest_is_ssa(nir_dest
*dest
, void *data
)
208 nir_instr_can_cse(nir_instr
*instr
)
210 /* We only handle SSA. */
211 if (!nir_foreach_dest(instr
, dest_is_ssa
, NULL
) ||
212 !nir_foreach_src(instr
, src_is_ssa
, NULL
))
215 switch (instr
->type
) {
216 case nir_instr_type_alu
:
217 case nir_instr_type_tex
:
218 case nir_instr_type_load_const
:
219 case nir_instr_type_phi
:
221 case nir_instr_type_intrinsic
: {
222 const nir_intrinsic_info
*info
=
223 &nir_intrinsic_infos
[nir_instr_as_intrinsic(instr
)->intrinsic
];
224 return (info
->flags
& NIR_INTRINSIC_CAN_ELIMINATE
) &&
225 (info
->flags
& NIR_INTRINSIC_CAN_REORDER
) &&
226 info
->num_variables
== 0; /* not implemented yet */
228 case nir_instr_type_call
:
229 case nir_instr_type_jump
:
230 case nir_instr_type_ssa_undef
:
232 case nir_instr_type_parallel_copy
:
234 unreachable("Invalid instruction type");
241 nir_instr_get_dest_ssa_def(nir_instr
*instr
)
243 switch (instr
->type
) {
244 case nir_instr_type_alu
:
245 assert(nir_instr_as_alu(instr
)->dest
.dest
.is_ssa
);
246 return &nir_instr_as_alu(instr
)->dest
.dest
.ssa
;
247 case nir_instr_type_tex
:
248 assert(nir_instr_as_tex(instr
)->dest
.is_ssa
);
249 return &nir_instr_as_tex(instr
)->dest
.ssa
;
250 case nir_instr_type_load_const
:
251 return &nir_instr_as_load_const(instr
)->def
;
252 case nir_instr_type_phi
:
253 assert(nir_instr_as_phi(instr
)->dest
.is_ssa
);
254 return &nir_instr_as_phi(instr
)->dest
.ssa
;
255 case nir_instr_type_intrinsic
:
256 assert(nir_instr_as_intrinsic(instr
)->dest
.is_ssa
);
257 return &nir_instr_as_intrinsic(instr
)->dest
.ssa
;
259 unreachable("We never ask for any of these");
264 nir_opt_cse_instr(nir_instr
*instr
, struct cse_state
*state
)
266 if (!nir_instr_can_cse(instr
))
269 for (struct exec_node
*node
= instr
->node
.prev
;
270 !exec_node_is_head_sentinel(node
); node
= node
->prev
) {
271 nir_instr
*other
= exec_node_data(nir_instr
, node
, node
);
272 if (nir_instrs_equal(instr
, other
)) {
273 nir_ssa_def
*other_def
= nir_instr_get_dest_ssa_def(other
);
274 nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr
),
275 nir_src_for_ssa(other_def
),
277 nir_instr_remove(instr
);
278 state
->progress
= true;
283 for (nir_block
*block
= instr
->block
->imm_dom
;
284 block
!= NULL
; block
= block
->imm_dom
) {
285 nir_foreach_instr_reverse(block
, other
) {
286 if (nir_instrs_equal(instr
, other
)) {
287 nir_ssa_def
*other_def
= nir_instr_get_dest_ssa_def(other
);
288 nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr
),
289 nir_src_for_ssa(other_def
),
291 nir_instr_remove(instr
);
292 state
->progress
= true;
300 nir_opt_cse_block(nir_block
*block
, void *void_state
)
302 struct cse_state
*state
= void_state
;
304 nir_foreach_instr_safe(block
, instr
)
305 nir_opt_cse_instr(instr
, state
);
311 nir_opt_cse_impl(nir_function_impl
*impl
)
313 struct cse_state state
;
315 state
.mem_ctx
= ralloc_parent(impl
);
316 state
.progress
= false;
318 nir_metadata_require(impl
, nir_metadata_dominance
);
320 nir_foreach_block(impl
, nir_opt_cse_block
, &state
);
323 nir_metadata_preserve(impl
, nir_metadata_block_index
|
324 nir_metadata_dominance
);
326 return state
.progress
;
330 nir_opt_cse(nir_shader
*shader
)
332 bool progress
= false;
334 nir_foreach_overload(shader
, overload
) {
336 progress
|= nir_opt_cse_impl(overload
->impl
);