2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define IFC_DUMP(q) do { q } while (0)
35 #include "sb_shader.h"
40 int if_conversion::run() {
42 regions_vec
&rv
= sh
.get_regions();
44 unsigned converted
= 0;
45 for (regions_vec::reverse_iterator I
= rv
.rbegin(); I
!= rv
.rend(); ) {
48 I
= regions_vec::reverse_iterator(rv
.erase((++I
).base()));
56 void if_conversion::convert_kill_instructions(region_node
*r
,
57 value
*em
, bool branch
,
61 for (node_iterator I
= c
->begin(), E
= c
->end(), N
; I
!= E
; I
= N
) {
64 if (!I
->is_alu_inst())
67 alu_node
*a
= static_cast<alu_node
*>(*I
);
68 unsigned flags
= a
->bc
.op_ptr
->flags
;
70 if (!(flags
& AF_KILL
))
73 // ignore predicated or non-const kill instructions
74 if (a
->pred
|| !a
->src
[0]->is_const() || !a
->src
[1]->is_const())
77 literal l0
= a
->src
[0]->literal_value
;
78 literal l1
= a
->src
[1]->literal_value
;
80 expr_handler::apply_alu_src_mod(a
->bc
, 0, l0
);
81 expr_handler::apply_alu_src_mod(a
->bc
, 1, l1
);
83 if (expr_handler::evaluate_condition(flags
, l0
, l1
)) {
84 // kill with constant 'true' condition, we'll convert it to the
85 // conditional kill outside of the if-then-else block
90 cnd
= get_select_value_for_em(sh
, em
);
92 // more than one kill with the same condition, just remove it
97 a
->bc
.set_op(branch
? ALU_OP2_KILLE_INT
: ALU_OP2_KILLNE_INT
);
100 a
->src
[1] = sh
.get_const_value(0);
102 a
->bc
.src
[0].clear();
103 a
->bc
.src
[1].clear();
105 // kill with constant 'false' condition, this shouldn't happen
106 // but remove it anyway
112 bool if_conversion::check_and_convert(region_node
*r
) {
114 depart_node
*nd1
= static_cast<depart_node
*>(r
->first
);
115 if (!nd1
->is_depart() || nd1
->target
!= r
)
117 if_node
*nif
= static_cast<if_node
*>(nd1
->first
);
120 depart_node
*nd2
= static_cast<depart_node
*>(nif
->first
);
121 if (!nd2
->is_depart() || nd2
->target
!= r
)
124 value
* &em
= nif
->cond
;
131 sblog
<< "ifcvt: region " << r
->region_id
<< " :\n";
135 if (s
.region_count
|| s
.fetch_count
|| s
.alu_kill_count
||
136 s
.if_count
!= 1 || s
.repeat_count
|| s
.uses_ar
)
139 unsigned real_alu_count
= s
.alu_count
- s
.alu_copy_mov_count
;
141 // if_conversion allows to eliminate JUMP-ALU_POP_AFTER or
142 // JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions
143 // are eliminated. According to the docs, cost of CF instruction is
144 // equal to ~40 ALU VLIW instructions (instruction groups),
145 // so we have eliminated cost equal to ~120 groups in total.
146 // Let's also assume that we have avg 3 ALU instructions per group,
147 // This means that potential eliminated cost is about 360 single alu inst.
148 // On the other hand, we are speculatively executing conditional code now,
149 // so we are increasing the cost in some cases. In the worst case, we'll
150 // have to execute real_alu_count additional alu instructions instead of
151 // jumping over them. Let's assume for now that average added cost is
153 // (0.9 * real_alu_count)
155 // So we should perform if_conversion if
157 // (0.9 * real_alu_count) < 360, or
159 // real_alu_count < 400
161 // So if real_alu_count is more than 400, than we think that if_conversion
162 // doesn't make sense.
164 // FIXME: We can use more precise heuristic, taking into account sizes of
165 // the branches and their probability instead of total size.
166 // Another way to improve this is to consider the number of the groups
167 // instead of the number of instructions (taking into account actual VLIW
169 // (Currently we don't know anything about packing at this stage, but
170 // probably we can make some more precise estimations anyway)
172 if (real_alu_count
> 400)
175 IFC_DUMP( sblog
<< "if_cvt: processing...\n"; );
177 value
*select
= get_select_value_for_em(sh
, em
);
182 for (node_iterator I
= r
->phi
->begin(), E
= r
->phi
->end(); I
!= E
; ++I
) {
185 alu_node
*ns
= convert_phi(select
, n
);
199 bool if_conversion::run_on(region_node
* r
) {
201 if (r
->dep_count() != 2 || r
->rep_count() != 1)
204 depart_node
*nd1
= static_cast<depart_node
*>(r
->first
);
205 if (!nd1
->is_depart())
207 if_node
*nif
= static_cast<if_node
*>(nd1
->first
);
210 depart_node
*nd2
= static_cast<depart_node
*>(nif
->first
);
211 if (!nd2
->is_depart())
214 value
* &em
= nif
->cond
;
216 convert_kill_instructions(r
, em
, true, nd2
);
217 convert_kill_instructions(r
, em
, false, nd1
);
219 if (check_and_convert(r
))
222 if (nd2
->empty() && nif
->next
) {
223 // empty true branch, non-empty false branch
224 // we'll invert it to get rid of 'else'
226 assert(em
&& em
->def
);
228 alu_node
*predset
= static_cast<alu_node
*>(em
->def
);
230 // create clone of PREDSET instruction with inverted condition.
231 // PREDSET has 3 dst operands in our IR (value written to gpr,
232 // predicate value and exec mask value), we'll split it such that
233 // new PREDSET will define exec mask value only, and two others will
234 // be defined in the old PREDSET (if they are not used then DCE will
235 // simply remove old PREDSET).
237 alu_node
*newpredset
= sh
.clone(predset
);
238 predset
->insert_after(newpredset
);
240 predset
->dst
[2] = NULL
;
242 newpredset
->dst
[0] = NULL
;
243 newpredset
->dst
[1] = NULL
;
245 em
->def
= newpredset
;
247 unsigned cc
= newpredset
->bc
.op_ptr
->flags
& AF_CC_MASK
;
248 unsigned cmptype
= newpredset
->bc
.op_ptr
->flags
& AF_CMP_TYPE_MASK
;
249 bool swapargs
= false;
251 cc
= invert_setcc_condition(cc
, swapargs
);
254 std::swap(newpredset
->src
[0], newpredset
->src
[1]);
255 std::swap(newpredset
->bc
.src
[0], newpredset
->bc
.src
[1]);
258 unsigned newopcode
= get_predsetcc_op(cc
, cmptype
);
259 newpredset
->bc
.set_op(newopcode
);
261 // move the code from the 'false' branch ('else') to the 'true' branch
262 nd2
->move(nif
->next
, NULL
);
265 for (node_iterator I
= r
->phi
->begin(), E
= r
->phi
->end(); I
!= E
;
268 assert(p
->src
.size() == 2);
269 std::swap(p
->src
[0], p
->src
[1]);
276 alu_node
* if_conversion::convert_phi(value
* select
, node
* phi
) {
277 assert(phi
->dst
.size() == 1 || phi
->src
.size() == 2);
279 value
*d
= phi
->dst
[0];
280 value
*v1
= phi
->src
[0];
281 value
*v2
= phi
->src
[1];
285 if (!d
->is_any_gpr())
288 if (v1
->is_undef()) {
289 if (v2
->is_undef()) {
292 return sh
.create_mov(d
, v2
);
294 } else if (v2
->is_undef())
295 return sh
.create_mov(d
, v1
);
297 alu_node
* n
= sh
.create_alu();
299 n
->bc
.set_op(ALU_OP3_CNDE_INT
);
301 n
->src
.push_back(select
);
302 n
->src
.push_back(v1
);
303 n
->src
.push_back(v2
);
308 } // namespace r600_sb