r600g/sb: initial commit of the optimizing shader backend
[mesa.git] / src / gallium / drivers / r600 / sb / sb_peephole.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define PPH_DEBUG 0
28
29 #if PPH_DEBUG
30 #define PPH_DUMP(q) do { q } while (0)
31 #else
32 #define PPH_DUMP(q)
33 #endif
34
35 #include "sb_shader.h"
36
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
41 using std::cerr;
42
43 int peephole::run() {
44
45 run_on(sh.root);
46
47 return 0;
48 }
49
50 void peephole::run_on(container_node* c) {
51
52 for (node_riterator I = c->rbegin(), E = c->rend(); I != E; ++I) {
53 node *n = *I;
54
55 if (n->is_container())
56 run_on(static_cast<container_node*>(n));
57 else {
58
59 if (n->is_alu_inst()) {
60 alu_node *a = static_cast<alu_node*>(n);
61
62 if (a->bc.op_ptr->flags & AF_CC_MASK) {
63 optimize_cc_op(a);
64 } else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
65
66 alu_node *s = a;
67 if (get_bool_flt_to_int_source(s)) {
68 convert_float_setcc(a, s);
69 }
70 }
71 }
72 }
73 }
74 }
75
76 void peephole::optimize_cc_op(alu_node* a) {
77 unsigned aflags = a->bc.op_ptr->flags;
78
79 if (aflags & (AF_PRED | AF_SET)) {
80 optimize_SETcc_op(a);
81 } else if (aflags & AF_CMOV) {
82 optimize_CNDcc_op(a);
83 }
84 }
85
86 void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
87 alu_node *ns = sh.clone(s);
88
89 ns->dst[0] = f2i->dst[0];
90 ns->dst[0]->def = ns;
91 ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE));
92 f2i->insert_after(ns);
93 f2i->remove();
94 }
95
96 void peephole::optimize_SETcc_op(alu_node* a) {
97
98 unsigned flags = a->bc.op_ptr->flags;
99 unsigned cc = flags & AF_CC_MASK;
100 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
101 unsigned dst_type = flags & AF_DST_TYPE_MASK;
102 bool is_pred = flags & AF_PRED;
103
104 // TODO handle other cases
105
106 if (a->src[1]->is_const() && (cc == AF_CC_E || cc == AF_CC_NE) &&
107 a->src[1]->literal_value == literal(0) &&
108 a->bc.src[0].neg == 0 && a->bc.src[0].abs == 0) {
109
110 value *s = a->src[0];
111
112 bool_op_info bop = {};
113
114 PPH_DUMP(
115 cerr << "optSETcc ";
116 dump::dump_op(a);
117 cerr << "\n";
118 );
119
120 if (!get_bool_op_info(s, bop))
121 return;
122
123 if (cc == AF_CC_E)
124 bop.invert = !bop.invert;
125
126 bool swap_args = false;
127
128 cc = bop.n->bc.op_ptr->flags & AF_CC_MASK;
129
130 if (bop.invert)
131 cc = invert_setcc_condition(cc, swap_args);
132
133 if (bop.int_cvt) {
134 assert(cmp_type != AF_FLOAT_CMP);
135 cmp_type = AF_FLOAT_CMP;
136 }
137
138 PPH_DUMP(
139 cerr << "boi node: ";
140 dump::dump_op(bop.n);
141 cerr << " invert: " << bop.invert << " int_cvt: " << bop.int_cvt;
142 cerr <<"\n";
143 );
144
145 unsigned newop = is_pred ? get_predsetcc_opcode(cc, cmp_type) :
146 get_setcc_opcode(cc, cmp_type, dst_type != AF_FLOAT_DST);
147
148 a->bc.set_op(newop);
149
150 if (swap_args) {
151 a->src[0] = bop.n->src[1];
152 a->src[1] = bop.n->src[0];
153 a->bc.src[0] = bop.n->bc.src[1];
154 a->bc.src[1] = bop.n->bc.src[0];
155
156 } else {
157 a->src[0] = bop.n->src[0];
158 a->src[1] = bop.n->src[1];
159 a->bc.src[0] = bop.n->bc.src[0];
160 a->bc.src[1] = bop.n->bc.src[1];
161 }
162 }
163 }
164
165 void peephole::optimize_CNDcc_op(alu_node* a) {
166
167 //TODO
168 }
169
170 bool peephole::get_bool_flt_to_int_source(alu_node* &a) {
171
172 if (a->bc.op == ALU_OP1_FLT_TO_INT) {
173
174 if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel)
175 return false;
176
177 value *s = a->src[0];
178 if (!s || !s->def || !s->def->is_alu_inst())
179 return false;
180
181 alu_node *dn = static_cast<alu_node*>(s->def);
182
183 if (dn->is_alu_op(ALU_OP1_TRUNC)) {
184 s = dn->src[0];
185 if (!s || !s->def || !s->def->is_alu_inst())
186 return false;
187
188 if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 ||
189 dn->bc.src[0].rel != 0) {
190 return false;
191 }
192
193 dn = static_cast<alu_node*>(s->def);
194
195 }
196
197 if (dn->bc.op_ptr->flags & AF_SET) {
198 a = dn;
199 return true;
200 }
201 }
202 return false;
203 }
204
205 bool peephole::get_bool_op_info(value* b, bool_op_info& bop) {
206
207 node *d = b->def;
208
209 if (!d || !d->is_alu_inst())
210 return false;
211
212 alu_node *dn = static_cast<alu_node*>(d);
213
214 if (dn->bc.op_ptr->flags & AF_SET) {
215 bop.n = dn;
216
217 if (dn->bc.op_ptr->flags & AF_DX10)
218 bop.int_cvt = true;
219
220 return true;
221 }
222
223 if (get_bool_flt_to_int_source(dn)) {
224 bop.n = dn;
225 bop.int_cvt = true;
226 return true;
227 }
228
229 return false;
230 }
231
232 } // namespace r600_sb