r600/sb: start adding GDS support
[mesa.git] / src / gallium / drivers / r600 / sb / sb_peephole.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define PPH_DEBUG 0
28
29 #if PPH_DEBUG
30 #define PPH_DUMP(q) do { q } while (0)
31 #else
32 #define PPH_DUMP(q)
33 #endif
34
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37
38 namespace r600_sb {
39
40 int peephole::run() {
41
42 run_on(sh.root);
43
44 return 0;
45 }
46
47 void peephole::run_on(container_node* c) {
48
49 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
50 node *n = *I;
51
52 if (n->is_container())
53 run_on(static_cast<container_node*>(n));
54 else {
55 if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) {
56 fetch_node *f = static_cast<fetch_node*>(n);
57 bool has_dst = false;
58
59 for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) {
60 value *v = *I;
61 if (v)
62 has_dst = true;
63 }
64 if (!has_dst)
65 if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET)
66 f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD);
67 }
68 if (n->is_alu_inst()) {
69 alu_node *a = static_cast<alu_node*>(n);
70
71 if (a->bc.op_ptr->flags &
72 (AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
73 optimize_cc_op(a);
74 } else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
75
76 alu_node *s = a;
77 if (get_bool_flt_to_int_source(s)) {
78 convert_float_setcc(a, s);
79 }
80 }
81 }
82 }
83 }
84 }
85
86 void peephole::optimize_cc_op(alu_node* a) {
87 unsigned aflags = a->bc.op_ptr->flags;
88
89 if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
90 optimize_cc_op2(a);
91 } else if (aflags & AF_CMOV) {
92 optimize_CNDcc_op(a);
93 }
94 }
95
96 void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
97 alu_node *ns = sh.clone(s);
98
99 ns->dst[0] = f2i->dst[0];
100 ns->dst[0]->def = ns;
101 ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE));
102 f2i->insert_after(ns);
103 f2i->remove();
104 }
105
106 void peephole::optimize_cc_op2(alu_node* a) {
107
108 unsigned flags = a->bc.op_ptr->flags;
109 unsigned cc = flags & AF_CC_MASK;
110
111 if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
112 return;
113
114 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
115 unsigned dst_type = flags & AF_DST_TYPE_MASK;
116
117 int op_kind = (flags & AF_PRED) ? 1 :
118 (flags & AF_SET) ? 2 :
119 (flags & AF_KILL) ? 3 : 0;
120
121 bool swapped = false;
122
123 if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
124 std::swap(a->src[0],a->src[1]);
125 swapped = true;
126 // clear modifiers
127 memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
128 memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
129 }
130
131 if (swapped || (a->src[1]->is_const() &&
132 a->src[1]->literal_value == literal(0))) {
133
134 value *s = a->src[0];
135
136 bool_op_info bop = {};
137
138 PPH_DUMP(
139 sblog << "cc_op2: ";
140 dump::dump_op(a);
141 sblog << "\n";
142 );
143
144 if (!get_bool_op_info(s, bop))
145 return;
146
147 if (cc == AF_CC_E)
148 bop.invert = !bop.invert;
149
150 bool swap_args = false;
151
152 cc = bop.n->bc.op_ptr->flags & AF_CC_MASK;
153
154 if (bop.invert)
155 cc = invert_setcc_condition(cc, swap_args);
156
157 if (bop.int_cvt) {
158 assert(cmp_type != AF_FLOAT_CMP);
159 cmp_type = AF_FLOAT_CMP;
160 }
161
162 PPH_DUMP(
163 sblog << "boi node: ";
164 dump::dump_op(bop.n);
165 sblog << " invert: " << bop.invert << " int_cvt: " << bop.int_cvt;
166 sblog <<"\n";
167 );
168
169 unsigned newop;
170
171 switch(op_kind) {
172 case 1:
173 newop = get_predsetcc_op(cc, cmp_type);
174 break;
175 case 2:
176 newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
177 break;
178 case 3:
179 newop = get_killcc_op(cc, cmp_type);
180 break;
181 default:
182 newop = ALU_OP0_NOP;
183 assert(!"invalid op kind");
184 break;
185 }
186
187 a->bc.set_op(newop);
188
189 if (swap_args) {
190 a->src[0] = bop.n->src[1];
191 a->src[1] = bop.n->src[0];
192 a->bc.src[0] = bop.n->bc.src[1];
193 a->bc.src[1] = bop.n->bc.src[0];
194
195 } else {
196 a->src[0] = bop.n->src[0];
197 a->src[1] = bop.n->src[1];
198 a->bc.src[0] = bop.n->bc.src[0];
199 a->bc.src[1] = bop.n->bc.src[1];
200 }
201 }
202 }
203
204 void peephole::optimize_CNDcc_op(alu_node* a) {
205 unsigned flags = a->bc.op_ptr->flags;
206 unsigned cc = flags & AF_CC_MASK;
207 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
208 bool swap = false;
209
210 if (cc == AF_CC_E) {
211 swap = !swap;
212 cc = AF_CC_NE;
213 } else if (cc != AF_CC_NE)
214 return;
215
216 value *s = a->src[0];
217
218 bool_op_info bop = {};
219
220 PPH_DUMP(
221 sblog << "cndcc: ";
222 dump::dump_op(a);
223 sblog << "\n";
224 );
225
226 if (!get_bool_op_info(s, bop))
227 return;
228
229 alu_node *d = bop.n;
230
231 if (d->bc.omod)
232 return;
233
234 PPH_DUMP(
235 sblog << "cndcc def: ";
236 dump::dump_op(d);
237 sblog << "\n";
238 );
239
240
241 unsigned dflags = d->bc.op_ptr->flags;
242 unsigned dcc = dflags & AF_CC_MASK;
243 unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK;
244 unsigned ddst_type = dflags & AF_DST_TYPE_MASK;
245 int nds;
246
247 // TODO we can handle some of these cases,
248 // though probably this shouldn't happen
249 if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST)
250 return;
251
252 if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0))
253 nds = 1;
254 else if ((d->src[1]->is_const() &&
255 d->src[1]->literal_value == literal(0)))
256 nds = 0;
257 else
258 return;
259
260 // can't propagate ABS modifier to CNDcc because it's OP3
261 if (d->bc.src[nds].abs)
262 return;
263
264 // TODO we can handle some cases for uint comparison
265 if (dcmp_type == AF_UINT_CMP)
266 return;
267
268 if (dcc == AF_CC_NE) {
269 dcc = AF_CC_E;
270 swap = !swap;
271 }
272
273 if (nds == 1) {
274 switch (dcc) {
275 case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break;
276 case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break;
277 default: break;
278 }
279 }
280
281 a->src[0] = d->src[nds];
282 a->bc.src[0] = d->bc.src[nds];
283
284 if (swap) {
285 std::swap(a->src[1], a->src[2]);
286 std::swap(a->bc.src[1], a->bc.src[2]);
287 }
288
289 a->bc.set_op(get_cndcc_op(dcc, dcmp_type));
290
291 }
292
293 bool peephole::get_bool_flt_to_int_source(alu_node* &a) {
294
295 if (a->bc.op == ALU_OP1_FLT_TO_INT) {
296
297 if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel)
298 return false;
299
300 value *s = a->src[0];
301 if (!s || !s->def || !s->def->is_alu_inst())
302 return false;
303
304 alu_node *dn = static_cast<alu_node*>(s->def);
305
306 if (dn->is_alu_op(ALU_OP1_TRUNC)) {
307 s = dn->src[0];
308 if (!s || !s->def || !s->def->is_alu_inst())
309 return false;
310
311 if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 ||
312 dn->bc.src[0].rel != 0) {
313 return false;
314 }
315
316 dn = static_cast<alu_node*>(s->def);
317
318 }
319
320 if (dn->bc.op_ptr->flags & AF_SET) {
321 a = dn;
322 return true;
323 }
324 }
325 return false;
326 }
327
328 bool peephole::get_bool_op_info(value* b, bool_op_info& bop) {
329
330 node *d = b->def;
331
332 if (!d || !d->is_alu_inst())
333 return false;
334
335 alu_node *dn = static_cast<alu_node*>(d);
336
337 if (dn->bc.op_ptr->flags & AF_SET) {
338 bop.n = dn;
339
340 if (dn->bc.op_ptr->flags & AF_DX10)
341 bop.int_cvt = true;
342
343 return true;
344 }
345
346 if (get_bool_flt_to_int_source(dn)) {
347 bop.n = dn;
348 bop.int_cvt = true;
349 return true;
350 }
351
352 return false;
353 }
354
355 } // namespace r600_sb