r600g/sb: improve optimization of conditional instructions
[mesa.git] / src / gallium / drivers / r600 / sb / sb_peephole.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define PPH_DEBUG 0
28
29 #if PPH_DEBUG
30 #define PPH_DUMP(q) do { q } while (0)
31 #else
32 #define PPH_DUMP(q)
33 #endif
34
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37
38 namespace r600_sb {
39
40 int peephole::run() {
41
42 run_on(sh.root);
43
44 return 0;
45 }
46
47 void peephole::run_on(container_node* c) {
48
49 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
50 node *n = *I;
51
52 if (n->is_container())
53 run_on(static_cast<container_node*>(n));
54 else {
55
56 if (n->is_alu_inst()) {
57 alu_node *a = static_cast<alu_node*>(n);
58
59 if (a->bc.op_ptr->flags &
60 (AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
61 optimize_cc_op(a);
62 } else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
63
64 alu_node *s = a;
65 if (get_bool_flt_to_int_source(s)) {
66 convert_float_setcc(a, s);
67 }
68 }
69 }
70 }
71 }
72 }
73
74 void peephole::optimize_cc_op(alu_node* a) {
75 unsigned aflags = a->bc.op_ptr->flags;
76
77 if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
78 optimize_cc_op2(a);
79 } else if (aflags & AF_CMOV) {
80 optimize_CNDcc_op(a);
81 }
82 }
83
84 void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
85 alu_node *ns = sh.clone(s);
86
87 ns->dst[0] = f2i->dst[0];
88 ns->dst[0]->def = ns;
89 ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE));
90 f2i->insert_after(ns);
91 f2i->remove();
92 }
93
94 void peephole::optimize_cc_op2(alu_node* a) {
95
96 unsigned flags = a->bc.op_ptr->flags;
97 unsigned cc = flags & AF_CC_MASK;
98
99 if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
100 return;
101
102 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
103 unsigned dst_type = flags & AF_DST_TYPE_MASK;
104
105 int op_kind = (flags & AF_PRED) ? 1 :
106 (flags & AF_SET) ? 2 :
107 (flags & AF_KILL) ? 3 : 0;
108
109 bool swapped = false;
110
111 if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
112 std::swap(a->src[0],a->src[1]);
113 swapped = true;
114 }
115
116 if (swapped || (a->src[1]->is_const() &&
117 a->src[1]->literal_value == literal(0))) {
118
119 value *s = a->src[0];
120
121 bool_op_info bop = {};
122
123 PPH_DUMP(
124 sblog << "cc_op2: ";
125 dump::dump_op(a);
126 sblog << "\n";
127 );
128
129 if (!get_bool_op_info(s, bop))
130 return;
131
132 if (cc == AF_CC_E)
133 bop.invert = !bop.invert;
134
135 bool swap_args = false;
136
137 cc = bop.n->bc.op_ptr->flags & AF_CC_MASK;
138
139 if (bop.invert)
140 cc = invert_setcc_condition(cc, swap_args);
141
142 if (bop.int_cvt) {
143 assert(cmp_type != AF_FLOAT_CMP);
144 cmp_type = AF_FLOAT_CMP;
145 }
146
147 PPH_DUMP(
148 sblog << "boi node: ";
149 dump::dump_op(bop.n);
150 sblog << " invert: " << bop.invert << " int_cvt: " << bop.int_cvt;
151 sblog <<"\n";
152 );
153
154 unsigned newop;
155
156 switch(op_kind) {
157 case 1:
158 newop = get_predsetcc_op(cc, cmp_type);
159 break;
160 case 2:
161 newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
162 break;
163 case 3:
164 newop = get_killcc_op(cc, cmp_type);
165 break;
166 default:
167 newop = ALU_OP0_NOP;
168 assert(!"invalid op kind");
169 break;
170 }
171
172 a->bc.set_op(newop);
173
174 if (swap_args) {
175 a->src[0] = bop.n->src[1];
176 a->src[1] = bop.n->src[0];
177 a->bc.src[0] = bop.n->bc.src[1];
178 a->bc.src[1] = bop.n->bc.src[0];
179
180 } else {
181 a->src[0] = bop.n->src[0];
182 a->src[1] = bop.n->src[1];
183 a->bc.src[0] = bop.n->bc.src[0];
184 a->bc.src[1] = bop.n->bc.src[1];
185 }
186 }
187 }
188
189 void peephole::optimize_CNDcc_op(alu_node* a) {
190
191 //TODO
192 }
193
194 bool peephole::get_bool_flt_to_int_source(alu_node* &a) {
195
196 if (a->bc.op == ALU_OP1_FLT_TO_INT) {
197
198 if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel)
199 return false;
200
201 value *s = a->src[0];
202 if (!s || !s->def || !s->def->is_alu_inst())
203 return false;
204
205 alu_node *dn = static_cast<alu_node*>(s->def);
206
207 if (dn->is_alu_op(ALU_OP1_TRUNC)) {
208 s = dn->src[0];
209 if (!s || !s->def || !s->def->is_alu_inst())
210 return false;
211
212 if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 ||
213 dn->bc.src[0].rel != 0) {
214 return false;
215 }
216
217 dn = static_cast<alu_node*>(s->def);
218
219 }
220
221 if (dn->bc.op_ptr->flags & AF_SET) {
222 a = dn;
223 return true;
224 }
225 }
226 return false;
227 }
228
229 bool peephole::get_bool_op_info(value* b, bool_op_info& bop) {
230
231 node *d = b->def;
232
233 if (!d || !d->is_alu_inst())
234 return false;
235
236 alu_node *dn = static_cast<alu_node*>(d);
237
238 if (dn->bc.op_ptr->flags & AF_SET) {
239 bop.n = dn;
240
241 if (dn->bc.op_ptr->flags & AF_DX10)
242 bop.int_cvt = true;
243
244 return true;
245 }
246
247 if (get_bool_flt_to_int_source(dn)) {
248 bop.n = dn;
249 bop.int_cvt = true;
250 return true;
251 }
252
253 return false;
254 }
255
256 } // namespace r600_sb