r600/sb: handle scratch mem reads on r600
[mesa.git] / src / gallium / drivers / r600 / sb / sb_if_conversion.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define IFC_DEBUG 0
28
29 #if IFC_DEBUG
30 #define IFC_DUMP(q) do { q } while (0)
31 #else
32 #define IFC_DUMP(q)
33 #endif
34
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37
38 namespace r600_sb {
39
40 int if_conversion::run() {
41
42 regions_vec &rv = sh.get_regions();
43
44 unsigned converted = 0;
45
46 for (regions_vec::reverse_iterator N, I = rv.rbegin(), E = rv.rend();
47 I != E; I = N) {
48 N = I; ++N;
49
50 region_node *r = *I;
51 if (run_on(r)) {
52 rv.erase(I.base() - 1);
53 ++converted;
54 }
55 }
56 return 0;
57 }
58
59 void if_conversion::convert_kill_instructions(region_node *r,
60 value *em, bool branch,
61 container_node *c) {
62 value *cnd = NULL;
63
64 for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) {
65 N = I + 1;
66
67 if (!I->is_alu_inst())
68 continue;
69
70 alu_node *a = static_cast<alu_node*>(*I);
71 unsigned flags = a->bc.op_ptr->flags;
72
73 if (!(flags & AF_KILL))
74 continue;
75
76 // ignore predicated or non-const kill instructions
77 if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const())
78 continue;
79
80 literal l0 = a->src[0]->literal_value;
81 literal l1 = a->src[1]->literal_value;
82
83 expr_handler::apply_alu_src_mod(a->bc, 0, l0);
84 expr_handler::apply_alu_src_mod(a->bc, 1, l1);
85
86 if (expr_handler::evaluate_condition(flags, l0, l1)) {
87 // kill with constant 'true' condition, we'll convert it to the
88 // conditional kill outside of the if-then-else block
89
90 a->remove();
91
92 if (!cnd) {
93 cnd = get_select_value_for_em(sh, em);
94 } else {
95 // more than one kill with the same condition, just remove it
96 continue;
97 }
98
99 r->insert_before(a);
100 a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT);
101
102 a->src[0] = cnd;
103 a->src[1] = sh.get_const_value(0);
104 // clear modifiers
105 memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
106 memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
107 } else {
108 // kill with constant 'false' condition, this shouldn't happen
109 // but remove it anyway
110 a->remove();
111 }
112 }
113 }
114
115 bool if_conversion::check_and_convert(region_node *r) {
116
117 depart_node *nd1 = static_cast<depart_node*>(r->first);
118 if (!nd1->is_depart() || nd1->target != r)
119 return false;
120 if_node *nif = static_cast<if_node*>(nd1->first);
121 if (!nif->is_if())
122 return false;
123 depart_node *nd2 = static_cast<depart_node*>(nif->first);
124 if (!nd2->is_depart() || nd2->target != r)
125 return false;
126
127 value* &em = nif->cond;
128
129 node_stats s;
130
131 r->collect_stats(s);
132
133 IFC_DUMP(
134 sblog << "ifcvt: region " << r->region_id << " :\n";
135 s.dump();
136 );
137
138 if (s.region_count || s.fetch_count || s.alu_kill_count ||
139 s.if_count != 1 || s.repeat_count || s.uses_ar)
140 return false;
141
142 unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count;
143
144 // if_conversion allows to eliminate JUMP-ALU_POP_AFTER or
145 // JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions
146 // are eliminated. According to the docs, cost of CF instruction is
147 // equal to ~40 ALU VLIW instructions (instruction groups),
148 // so we have eliminated cost equal to ~120 groups in total.
149 // Let's also assume that we have avg 3 ALU instructions per group,
150 // This means that potential eliminated cost is about 360 single alu inst.
151 // On the other hand, we are speculatively executing conditional code now,
152 // so we are increasing the cost in some cases. In the worst case, we'll
153 // have to execute real_alu_count additional alu instructions instead of
154 // jumping over them. Let's assume for now that average added cost is
155 //
156 // (0.9 * real_alu_count)
157 //
158 // So we should perform if_conversion if
159 //
160 // (0.9 * real_alu_count) < 360, or
161 //
162 // real_alu_count < 400
163 //
164 // So if real_alu_count is more than 400, than we think that if_conversion
165 // doesn't make sense.
166
167 // FIXME: We can use more precise heuristic, taking into account sizes of
168 // the branches and their probability instead of total size.
169 // Another way to improve this is to consider the number of the groups
170 // instead of the number of instructions (taking into account actual VLIW
171 // packing).
172 // (Currently we don't know anything about packing at this stage, but
173 // probably we can make some more precise estimations anyway)
174
175 if (real_alu_count > 400)
176 return false;
177
178 IFC_DUMP( sblog << "if_cvt: processing...\n"; );
179
180 value *select = get_select_value_for_em(sh, em);
181
182 if (!select)
183 return false;
184
185 for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) {
186 node *n = *I;
187
188 alu_node *ns = convert_phi(select, n);
189
190 if (ns)
191 r->insert_after(ns);
192 }
193
194 nd2->expand();
195 nif->expand();
196 nd1->expand();
197 r->expand();
198
199 return true;
200 }
201
202 bool if_conversion::run_on(region_node* r) {
203
204 if (r->dep_count() != 2 || r->rep_count() != 1)
205 return false;
206
207 depart_node *nd1 = static_cast<depart_node*>(r->first);
208 if (!nd1->is_depart())
209 return false;
210 if_node *nif = static_cast<if_node*>(nd1->first);
211 if (!nif->is_if())
212 return false;
213 depart_node *nd2 = static_cast<depart_node*>(nif->first);
214 if (!nd2->is_depart())
215 return false;
216
217 value* &em = nif->cond;
218
219 convert_kill_instructions(r, em, true, nd2);
220 convert_kill_instructions(r, em, false, nd1);
221
222 if (check_and_convert(r))
223 return true;
224
225 if (nd2->empty() && nif->next) {
226 // empty true branch, non-empty false branch
227 // we'll invert it to get rid of 'else'
228
229 assert(em && em->def);
230
231 alu_node *predset = static_cast<alu_node*>(em->def);
232
233 // create clone of PREDSET instruction with inverted condition.
234 // PREDSET has 3 dst operands in our IR (value written to gpr,
235 // predicate value and exec mask value), we'll split it such that
236 // new PREDSET will define exec mask value only, and two others will
237 // be defined in the old PREDSET (if they are not used then DCE will
238 // simply remove old PREDSET).
239
240 alu_node *newpredset = sh.clone(predset);
241 predset->insert_after(newpredset);
242
243 predset->dst[2] = NULL;
244
245 newpredset->dst[0] = NULL;
246 newpredset->dst[1] = NULL;
247
248 em->def = newpredset;
249
250 unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK;
251 unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK;
252 bool swapargs = false;
253
254 cc = invert_setcc_condition(cc, swapargs);
255
256 if (swapargs) {
257 std::swap(newpredset->src[0], newpredset->src[1]);
258 std::swap(newpredset->bc.src[0], newpredset->bc.src[1]);
259 }
260
261 unsigned newopcode = get_predsetcc_op(cc, cmptype);
262 newpredset->bc.set_op(newopcode);
263
264 // move the code from the 'false' branch ('else') to the 'true' branch
265 nd2->move(nif->next, NULL);
266
267 // swap phi operands
268 for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E;
269 ++I) {
270 node *p = *I;
271 assert(p->src.size() == 2);
272 std::swap(p->src[0], p->src[1]);
273 }
274 }
275
276 return false;
277 }
278
279 alu_node* if_conversion::convert_phi(value* select, node* phi) {
280 assert(phi->dst.size() == 1 || phi->src.size() == 2);
281
282 value *d = phi->dst[0];
283 value *v1 = phi->src[0];
284 value *v2 = phi->src[1];
285
286 assert(d);
287
288 if (!d->is_any_gpr())
289 return NULL;
290
291 if (v1->is_undef()) {
292 if (v2->is_undef()) {
293 return NULL;
294 } else {
295 return sh.create_mov(d, v2);
296 }
297 } else if (v2->is_undef())
298 return sh.create_mov(d, v1);
299
300 alu_node* n = sh.create_alu();
301
302 n->bc.set_op(ALU_OP3_CNDE_INT);
303 n->dst.push_back(d);
304 n->src.push_back(select);
305 n->src.push_back(v1);
306 n->src.push_back(v2);
307
308 return n;
309 }
310
311 } // namespace r600_sb