2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define PPH_DUMP(q) do { q } while (0)
35 #include "sb_shader.h"
47 void peephole::run_on(container_node
* c
) {
49 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
52 if (n
->is_container())
53 run_on(static_cast<container_node
*>(n
));
55 if (n
->is_fetch_inst() && (n
->fetch_op_flags() & FF_GDS
)) {
56 fetch_node
*f
= static_cast<fetch_node
*>(n
);
59 for(vvec::iterator I
= f
->dst
.begin(), E
= f
->dst
.end(); I
!= E
; ++I
) {
65 if (f
->bc
.op
>= FETCH_OP_GDS_ADD_RET
&& f
->bc
.op
<= FETCH_OP_GDS_USHORT_READ_RET
)
66 f
->bc
.set_op(f
->bc
.op
- FETCH_OP_GDS_ADD_RET
+ FETCH_OP_GDS_ADD
);
68 if (n
->is_alu_inst()) {
69 alu_node
*a
= static_cast<alu_node
*>(n
);
71 if (a
->bc
.op_ptr
->flags
& AF_LDS
) {
73 if (a
->bc
.op
>= LDS_OP2_LDS_ADD_RET
&& a
->bc
.op
<= LDS_OP3_LDS_MSKOR_RET
)
74 a
->bc
.set_op(a
->bc
.op
- LDS_OP2_LDS_ADD_RET
+ LDS_OP2_LDS_ADD
);
75 if (a
->bc
.op
== LDS_OP1_LDS_READ_RET
)
76 a
->src
[0] = sh
.get_undef_value();
78 } else if (a
->bc
.op_ptr
->flags
&
79 (AF_PRED
| AF_SET
| AF_CMOV
| AF_KILL
)) {
81 } else if (a
->bc
.op
== ALU_OP1_FLT_TO_INT
) {
84 if (get_bool_flt_to_int_source(s
)) {
85 convert_float_setcc(a
, s
);
93 void peephole::optimize_cc_op(alu_node
* a
) {
94 unsigned aflags
= a
->bc
.op_ptr
->flags
;
96 if (aflags
& (AF_PRED
| AF_SET
| AF_KILL
)) {
98 } else if (aflags
& AF_CMOV
) {
103 void peephole::convert_float_setcc(alu_node
*f2i
, alu_node
*s
) {
104 alu_node
*ns
= sh
.clone(s
);
106 ns
->dst
[0] = f2i
->dst
[0];
107 ns
->dst
[0]->def
= ns
;
108 ns
->bc
.set_op(ns
->bc
.op
+ (ALU_OP2_SETE_DX10
- ALU_OP2_SETE
));
109 f2i
->insert_after(ns
);
113 void peephole::optimize_cc_op2(alu_node
* a
) {
115 unsigned flags
= a
->bc
.op_ptr
->flags
;
116 unsigned cc
= flags
& AF_CC_MASK
;
118 if ((cc
!= AF_CC_E
&& cc
!= AF_CC_NE
) || a
->pred
)
121 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
122 unsigned dst_type
= flags
& AF_DST_TYPE_MASK
;
124 int op_kind
= (flags
& AF_PRED
) ? 1 :
125 (flags
& AF_SET
) ? 2 :
126 (flags
& AF_KILL
) ? 3 : 0;
128 bool swapped
= false;
130 if (a
->src
[0]->is_const() && a
->src
[0]->literal_value
== literal(0)) {
131 std::swap(a
->src
[0],a
->src
[1]);
134 memset(&a
->bc
.src
[0], 0, sizeof(bc_alu_src
));
135 memset(&a
->bc
.src
[1], 0, sizeof(bc_alu_src
));
138 if (swapped
|| (a
->src
[1]->is_const() &&
139 a
->src
[1]->literal_value
== literal(0))) {
141 value
*s
= a
->src
[0];
143 bool_op_info bop
= {};
151 if (!get_bool_op_info(s
, bop
))
155 bop
.invert
= !bop
.invert
;
157 bool swap_args
= false;
159 cc
= bop
.n
->bc
.op_ptr
->flags
& AF_CC_MASK
;
162 cc
= invert_setcc_condition(cc
, swap_args
);
165 assert(cmp_type
!= AF_FLOAT_CMP
);
166 cmp_type
= AF_FLOAT_CMP
;
170 sblog
<< "boi node: ";
171 dump::dump_op(bop
.n
);
172 sblog
<< " invert: " << bop
.invert
<< " int_cvt: " << bop
.int_cvt
;
180 newop
= get_predsetcc_op(cc
, cmp_type
);
183 newop
= get_setcc_op(cc
, cmp_type
, dst_type
!= AF_FLOAT_DST
);
186 newop
= get_killcc_op(cc
, cmp_type
);
190 assert(!"invalid op kind");
197 a
->src
[0] = bop
.n
->src
[1];
198 a
->src
[1] = bop
.n
->src
[0];
199 a
->bc
.src
[0] = bop
.n
->bc
.src
[1];
200 a
->bc
.src
[1] = bop
.n
->bc
.src
[0];
203 a
->src
[0] = bop
.n
->src
[0];
204 a
->src
[1] = bop
.n
->src
[1];
205 a
->bc
.src
[0] = bop
.n
->bc
.src
[0];
206 a
->bc
.src
[1] = bop
.n
->bc
.src
[1];
211 void peephole::optimize_CNDcc_op(alu_node
* a
) {
212 unsigned flags
= a
->bc
.op_ptr
->flags
;
213 unsigned cc
= flags
& AF_CC_MASK
;
214 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
220 } else if (cc
!= AF_CC_NE
)
223 value
*s
= a
->src
[0];
225 bool_op_info bop
= {};
233 if (!get_bool_op_info(s
, bop
))
242 sblog
<< "cndcc def: ";
248 unsigned dflags
= d
->bc
.op_ptr
->flags
;
249 unsigned dcc
= dflags
& AF_CC_MASK
;
250 unsigned dcmp_type
= dflags
& AF_CMP_TYPE_MASK
;
251 unsigned ddst_type
= dflags
& AF_DST_TYPE_MASK
;
254 // TODO we can handle some of these cases,
255 // though probably this shouldn't happen
256 if (cmp_type
!= AF_FLOAT_CMP
&& ddst_type
== AF_FLOAT_DST
)
259 if (d
->src
[0]->is_const() && d
->src
[0]->literal_value
== literal(0))
261 else if ((d
->src
[1]->is_const() &&
262 d
->src
[1]->literal_value
== literal(0)))
267 // can't propagate ABS modifier to CNDcc because it's OP3
268 if (d
->bc
.src
[nds
].abs
)
271 // TODO we can handle some cases for uint comparison
272 if (dcmp_type
== AF_UINT_CMP
)
275 if (dcc
== AF_CC_NE
) {
282 case AF_CC_GT
: dcc
= AF_CC_GE
; swap
= !swap
; break;
283 case AF_CC_GE
: dcc
= AF_CC_GT
; swap
= !swap
; break;
288 a
->src
[0] = d
->src
[nds
];
289 a
->bc
.src
[0] = d
->bc
.src
[nds
];
292 std::swap(a
->src
[1], a
->src
[2]);
293 std::swap(a
->bc
.src
[1], a
->bc
.src
[2]);
296 a
->bc
.set_op(get_cndcc_op(dcc
, dcmp_type
));
300 bool peephole::get_bool_flt_to_int_source(alu_node
* &a
) {
302 if (a
->bc
.op
== ALU_OP1_FLT_TO_INT
) {
304 if (a
->bc
.src
[0].neg
|| a
->bc
.src
[0].abs
|| a
->bc
.src
[0].rel
)
307 value
*s
= a
->src
[0];
308 if (!s
|| !s
->def
|| !s
->def
->is_alu_inst())
311 alu_node
*dn
= static_cast<alu_node
*>(s
->def
);
313 if (dn
->is_alu_op(ALU_OP1_TRUNC
)) {
315 if (!s
|| !s
->def
|| !s
->def
->is_alu_inst())
318 if (dn
->bc
.src
[0].neg
!= 1 || dn
->bc
.src
[0].abs
!= 0 ||
319 dn
->bc
.src
[0].rel
!= 0) {
323 dn
= static_cast<alu_node
*>(s
->def
);
327 if (dn
->bc
.op_ptr
->flags
& AF_SET
) {
335 bool peephole::get_bool_op_info(value
* b
, bool_op_info
& bop
) {
339 if (!d
|| !d
->is_alu_inst())
342 alu_node
*dn
= static_cast<alu_node
*>(d
);
344 if (dn
->bc
.op_ptr
->flags
& AF_SET
) {
347 if (dn
->bc
.op_ptr
->flags
& AF_DX10
)
353 if (get_bool_flt_to_int_source(dn
)) {
362 } // namespace r600_sb