2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define PPH_DUMP(q) do { q } while (0)
35 #include "sb_shader.h"
47 void peephole::run_on(container_node
* c
) {
49 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
52 if (n
->is_container())
53 run_on(static_cast<container_node
*>(n
));
55 if (n
->is_fetch_inst() && (n
->fetch_op_flags() & FF_GDS
)) {
56 fetch_node
*f
= static_cast<fetch_node
*>(n
);
59 for(vvec::iterator I
= f
->dst
.begin(), E
= f
->dst
.end(); I
!= E
; ++I
) {
65 if (f
->bc
.op
>= FETCH_OP_GDS_ADD_RET
&& f
->bc
.op
<= FETCH_OP_GDS_USHORT_READ_RET
)
66 f
->bc
.set_op(f
->bc
.op
- FETCH_OP_GDS_ADD_RET
+ FETCH_OP_GDS_ADD
);
68 if (n
->is_alu_inst()) {
69 alu_node
*a
= static_cast<alu_node
*>(n
);
71 if (a
->bc
.op_ptr
->flags
&
72 (AF_PRED
| AF_SET
| AF_CMOV
| AF_KILL
)) {
74 } else if (a
->bc
.op
== ALU_OP1_FLT_TO_INT
) {
77 if (get_bool_flt_to_int_source(s
)) {
78 convert_float_setcc(a
, s
);
86 void peephole::optimize_cc_op(alu_node
* a
) {
87 unsigned aflags
= a
->bc
.op_ptr
->flags
;
89 if (aflags
& (AF_PRED
| AF_SET
| AF_KILL
)) {
91 } else if (aflags
& AF_CMOV
) {
96 void peephole::convert_float_setcc(alu_node
*f2i
, alu_node
*s
) {
97 alu_node
*ns
= sh
.clone(s
);
99 ns
->dst
[0] = f2i
->dst
[0];
100 ns
->dst
[0]->def
= ns
;
101 ns
->bc
.set_op(ns
->bc
.op
+ (ALU_OP2_SETE_DX10
- ALU_OP2_SETE
));
102 f2i
->insert_after(ns
);
106 void peephole::optimize_cc_op2(alu_node
* a
) {
108 unsigned flags
= a
->bc
.op_ptr
->flags
;
109 unsigned cc
= flags
& AF_CC_MASK
;
111 if ((cc
!= AF_CC_E
&& cc
!= AF_CC_NE
) || a
->pred
)
114 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
115 unsigned dst_type
= flags
& AF_DST_TYPE_MASK
;
117 int op_kind
= (flags
& AF_PRED
) ? 1 :
118 (flags
& AF_SET
) ? 2 :
119 (flags
& AF_KILL
) ? 3 : 0;
121 bool swapped
= false;
123 if (a
->src
[0]->is_const() && a
->src
[0]->literal_value
== literal(0)) {
124 std::swap(a
->src
[0],a
->src
[1]);
127 memset(&a
->bc
.src
[0], 0, sizeof(bc_alu_src
));
128 memset(&a
->bc
.src
[1], 0, sizeof(bc_alu_src
));
131 if (swapped
|| (a
->src
[1]->is_const() &&
132 a
->src
[1]->literal_value
== literal(0))) {
134 value
*s
= a
->src
[0];
136 bool_op_info bop
= {};
144 if (!get_bool_op_info(s
, bop
))
148 bop
.invert
= !bop
.invert
;
150 bool swap_args
= false;
152 cc
= bop
.n
->bc
.op_ptr
->flags
& AF_CC_MASK
;
155 cc
= invert_setcc_condition(cc
, swap_args
);
158 assert(cmp_type
!= AF_FLOAT_CMP
);
159 cmp_type
= AF_FLOAT_CMP
;
163 sblog
<< "boi node: ";
164 dump::dump_op(bop
.n
);
165 sblog
<< " invert: " << bop
.invert
<< " int_cvt: " << bop
.int_cvt
;
173 newop
= get_predsetcc_op(cc
, cmp_type
);
176 newop
= get_setcc_op(cc
, cmp_type
, dst_type
!= AF_FLOAT_DST
);
179 newop
= get_killcc_op(cc
, cmp_type
);
183 assert(!"invalid op kind");
190 a
->src
[0] = bop
.n
->src
[1];
191 a
->src
[1] = bop
.n
->src
[0];
192 a
->bc
.src
[0] = bop
.n
->bc
.src
[1];
193 a
->bc
.src
[1] = bop
.n
->bc
.src
[0];
196 a
->src
[0] = bop
.n
->src
[0];
197 a
->src
[1] = bop
.n
->src
[1];
198 a
->bc
.src
[0] = bop
.n
->bc
.src
[0];
199 a
->bc
.src
[1] = bop
.n
->bc
.src
[1];
204 void peephole::optimize_CNDcc_op(alu_node
* a
) {
205 unsigned flags
= a
->bc
.op_ptr
->flags
;
206 unsigned cc
= flags
& AF_CC_MASK
;
207 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
213 } else if (cc
!= AF_CC_NE
)
216 value
*s
= a
->src
[0];
218 bool_op_info bop
= {};
226 if (!get_bool_op_info(s
, bop
))
235 sblog
<< "cndcc def: ";
241 unsigned dflags
= d
->bc
.op_ptr
->flags
;
242 unsigned dcc
= dflags
& AF_CC_MASK
;
243 unsigned dcmp_type
= dflags
& AF_CMP_TYPE_MASK
;
244 unsigned ddst_type
= dflags
& AF_DST_TYPE_MASK
;
247 // TODO we can handle some of these cases,
248 // though probably this shouldn't happen
249 if (cmp_type
!= AF_FLOAT_CMP
&& ddst_type
== AF_FLOAT_DST
)
252 if (d
->src
[0]->is_const() && d
->src
[0]->literal_value
== literal(0))
254 else if ((d
->src
[1]->is_const() &&
255 d
->src
[1]->literal_value
== literal(0)))
260 // can't propagate ABS modifier to CNDcc because it's OP3
261 if (d
->bc
.src
[nds
].abs
)
264 // TODO we can handle some cases for uint comparison
265 if (dcmp_type
== AF_UINT_CMP
)
268 if (dcc
== AF_CC_NE
) {
275 case AF_CC_GT
: dcc
= AF_CC_GE
; swap
= !swap
; break;
276 case AF_CC_GE
: dcc
= AF_CC_GT
; swap
= !swap
; break;
281 a
->src
[0] = d
->src
[nds
];
282 a
->bc
.src
[0] = d
->bc
.src
[nds
];
285 std::swap(a
->src
[1], a
->src
[2]);
286 std::swap(a
->bc
.src
[1], a
->bc
.src
[2]);
289 a
->bc
.set_op(get_cndcc_op(dcc
, dcmp_type
));
293 bool peephole::get_bool_flt_to_int_source(alu_node
* &a
) {
295 if (a
->bc
.op
== ALU_OP1_FLT_TO_INT
) {
297 if (a
->bc
.src
[0].neg
|| a
->bc
.src
[0].abs
|| a
->bc
.src
[0].rel
)
300 value
*s
= a
->src
[0];
301 if (!s
|| !s
->def
|| !s
->def
->is_alu_inst())
304 alu_node
*dn
= static_cast<alu_node
*>(s
->def
);
306 if (dn
->is_alu_op(ALU_OP1_TRUNC
)) {
308 if (!s
|| !s
->def
|| !s
->def
->is_alu_inst())
311 if (dn
->bc
.src
[0].neg
!= 1 || dn
->bc
.src
[0].abs
!= 0 ||
312 dn
->bc
.src
[0].rel
!= 0) {
316 dn
= static_cast<alu_node
*>(s
->def
);
320 if (dn
->bc
.op_ptr
->flags
& AF_SET
) {
328 bool peephole::get_bool_op_info(value
* b
, bool_op_info
& bop
) {
332 if (!d
|| !d
->is_alu_inst())
335 alu_node
*dn
= static_cast<alu_node
*>(d
);
337 if (dn
->bc
.op_ptr
->flags
& AF_SET
) {
340 if (dn
->bc
.op_ptr
->flags
& AF_DX10
)
346 if (get_bool_flt_to_int_source(dn
)) {
355 } // namespace r600_sb