r600g/sb: move chip & class name functions to sb_context
[mesa.git] / src / gallium / drivers / r600 / sb / sb_expr.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include <cmath>
28
29 #include "sb_shader.h"
30
31 namespace r600_sb {
32
33 value* get_select_value_for_em(shader& sh, value* em) {
34 if (!em->def)
35 return NULL;
36
37 node *predset = em->def;
38 if (!predset->is_pred_set())
39 return NULL;
40
41 alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42 convert_predset_to_set(sh, s);
43
44 predset->insert_after(s);
45
46 value* &d0 = s->dst[0];
47 d0 = sh.create_temp_value();
48 d0->def = s;
49 return d0;
50 }
51
52 void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53 n.src.resize(1);
54 n.src[0] = src;
55 n.bc.src[0].abs = abs;
56 n.bc.src[0].neg = neg;
57 n.bc.set_op(ALU_OP1_MOV);
58 }
59
60 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61
62 value * expr_handler::get_const(const literal &l) {
63 value *v = sh.get_const_value(l);
64 if (!v->gvn_source)
65 vt.add_value(v);
66 return v;
67 }
68
69 void expr_handler::assign_source(value *dst, value *src) {
70 dst->gvn_source = src->gvn_source;
71 }
72
73 bool expr_handler::equal(value *l, value *r) {
74
75 assert(l != r);
76
77 if (l->gvalue() == r->gvalue())
78 return true;
79
80 if (l->def && r->def)
81 return defs_equal(l, r);
82
83 if (l->is_rel() && r->is_rel())
84 return ivars_equal(l, r);
85
86 return false;
87 }
88
89 bool expr_handler::ivars_equal(value* l, value* r) {
90 if (l->rel->gvalue() == r->rel->gvalue()
91 && l->select == r->select) {
92
93 vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
94 vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
95
96 // FIXME: replace this with more precise aliasing test
97 return lv == rv;
98 }
99 return false;
100 }
101
102 bool expr_handler::defs_equal(value* l, value* r) {
103
104 node *d1 = l->def;
105 node *d2 = r->def;
106
107 if (d1->type != d2->type || d1->subtype != d2->subtype)
108 return false;
109
110 if (d1->is_pred_set() || d2->is_pred_set())
111 return false;
112
113 if (d1->type == NT_OP) {
114 switch (d1->subtype) {
115 case NST_ALU_INST:
116 return ops_equal(
117 static_cast<alu_node*>(d1),
118 static_cast<alu_node*>(d2));
119 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
120 // static_cast<fetch_node*>(d2);
121 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
122 // static_cast<cf_node*>(d2);
123 default:
124 break;
125 }
126 }
127 return false;
128 }
129
130 bool expr_handler::try_fold(value* v) {
131 assert(!v->gvn_source);
132
133 if (v->def)
134 try_fold(v->def);
135
136 if (v->gvn_source)
137 return true;
138
139 return false;
140 }
141
142 bool expr_handler::try_fold(node* n) {
143 return n->fold_dispatch(this);
144 }
145
146 bool expr_handler::fold(node& n) {
147 if (n.subtype == NST_PHI) {
148
149 value *s = n.src[0];
150
151 // FIXME disabling phi folding for registers for now, otherwise we lose
152 // control flow information in some cases
153 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
154 // probably control flow transformation is required to enable it
155 if (s->is_sgpr())
156 return false;
157
158 for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
159 value *v = *I;
160 if (!s->v_equal(v))
161 return false;
162 }
163
164 assign_source(n.dst[0], s);
165 } else {
166 assert(n.subtype == NST_PSI);
167 assert(n.src.size() >= 6);
168
169 value *s = n.src[2];
170 assert(s->gvn_source);
171
172 for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
173 value *v = *(I+2);
174 if (!s->v_equal(v))
175 return false;
176 }
177 assign_source(n.dst[0], s);
178 }
179 return true;
180 }
181
182 bool expr_handler::fold(container_node& n) {
183 return false;
184 }
185
186 bool expr_handler::fold_setcc(alu_node &n) {
187
188 value* v0 = n.src[0]->gvalue();
189 value* v1 = n.src[1]->gvalue();
190
191 assert(v0 && v1 && n.dst[0]);
192
193 unsigned flags = n.bc.op_ptr->flags;
194 unsigned cc = flags & AF_CC_MASK;
195 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
196 unsigned dst_type = flags & AF_DST_TYPE_MASK;
197
198 bool cond_result;
199 bool have_result = false;
200
201 bool isc0 = v0->is_const();
202 bool isc1 = v1->is_const();
203
204 literal dv, cv0, cv1;
205
206 if (isc0) {
207 cv0 = v0->get_const_value();
208 apply_alu_src_mod(n.bc, 0, cv0);
209 }
210
211 if (isc1) {
212 cv1 = v1->get_const_value();
213 apply_alu_src_mod(n.bc, 1, cv1);
214 }
215
216 if (isc0 && isc1) {
217 cond_result = evaluate_condition(flags, cv0, cv1);
218 have_result = true;
219 } else if (isc1) {
220 if (cmp_type == AF_FLOAT_CMP) {
221 if (n.bc.src[0].abs && !n.bc.src[0].neg) {
222 if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
223 cond_result = true;
224 have_result = true;
225 } else if (cv1.f <= 0.0f && cc == AF_CC_GE) {
226 cond_result = true;
227 have_result = true;
228 }
229 } else if (n.bc.src[0].abs && n.bc.src[0].neg) {
230 if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
231 cond_result = false;
232 have_result = true;
233 } else if (cv1.f >= 0.0f && cc == AF_CC_GT) {
234 cond_result = false;
235 have_result = true;
236 }
237 }
238 } else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
239 cond_result = true;
240 have_result = true;
241 }
242 } else if (isc0) {
243 if (cmp_type == AF_FLOAT_CMP) {
244 if (n.bc.src[1].abs && !n.bc.src[1].neg) {
245 if (cv0.f <= 0.0f && cc == AF_CC_GT) {
246 cond_result = false;
247 have_result = true;
248 } else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
249 cond_result = false;
250 have_result = true;
251 }
252 } else if (n.bc.src[1].abs && n.bc.src[1].neg) {
253 if (cv0.f >= 0.0f && cc == AF_CC_GE) {
254 cond_result = true;
255 have_result = true;
256 } else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
257 cond_result = true;
258 have_result = true;
259 }
260 }
261 } else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
262 cond_result = false;
263 have_result = true;
264 }
265 } else if (v0 == v1) {
266 bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
267 if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
268 // NOTE can't handle float comparisons here because of NaNs
269 cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
270 have_result = true;
271 }
272 }
273
274 if (have_result) {
275 literal result;
276
277 if (cond_result)
278 result = dst_type != AF_FLOAT_DST ?
279 literal(0xFFFFFFFFu) : literal(1.0f);
280 else
281 result = literal(0);
282
283 convert_to_mov(n, sh.get_const_value(result));
284 return fold_alu_op1(n);
285 }
286
287 return false;
288 }
289
290 bool expr_handler::fold(alu_node& n) {
291
292 switch (n.bc.op_ptr->src_count) {
293 case 1: return fold_alu_op1(n);
294 case 2: return fold_alu_op2(n);
295 case 3: return fold_alu_op3(n);
296 default:
297 assert(0);
298 }
299 return false;
300 }
301
302 bool expr_handler::fold(fetch_node& n) {
303
304 unsigned chan = 0;
305 for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
306 value* &v = *I;
307 if (v) {
308 if (n.bc.dst_sel[chan] == SEL_0)
309 assign_source(*I, get_const(0.0f));
310 else if (n.bc.dst_sel[chan] == SEL_1)
311 assign_source(*I, get_const(1.0f));
312 }
313 ++chan;
314 }
315 return false;
316 }
317
318 bool expr_handler::fold(cf_node& n) {
319 return false;
320 }
321
322 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
323 literal &v) {
324 const bc_alu_src &s = bc.src[src];
325
326 if (s.abs)
327 v = fabs(v.f);
328 if (s.neg)
329 v = -v.f;
330 }
331
332 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
333 float omod_coeff[] = {2.0f, 4.0, 0.5f};
334
335 if (bc.omod)
336 v = v.f * omod_coeff[bc.omod - 1];
337 if (bc.clamp)
338 v = float_clamp(v.f);
339 }
340
341 bool expr_handler::args_equal(const vvec &l, const vvec &r) {
342
343 assert(l.size() == r.size());
344
345 int s = l.size();
346
347 for (int k = 0; k < s; ++k) {
348 if (!l[k]->v_equal(r[k]))
349 return false;
350 }
351
352 return true;
353 }
354
355 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
356 const bc_alu &b0 = l->bc;
357 const bc_alu &b1 = r->bc;
358
359 if (b0.op != b1.op)
360 return false;
361
362 unsigned src_count = b0.op_ptr->src_count;
363
364 if (b0.index_mode != b1.index_mode)
365 return false;
366
367 if (b0.clamp != b1.clamp || b0.omod != b1.omod)
368 return false;
369
370 for (unsigned s = 0; s < src_count; ++s) {
371 const bc_alu_src &s0 = b0.src[s];
372 const bc_alu_src &s1 = b1.src[s];
373
374 if (s0.abs != s1.abs || s0.neg != s1.neg)
375 return false;
376 }
377 return args_equal(l->src, r->src);
378 }
379
380 bool expr_handler::fold_alu_op1(alu_node& n) {
381
382 assert(!n.src.empty());
383 if (n.src.empty())
384 return false;
385
386 value* v0 = n.src[0]->gvalue();
387
388 assert(v0 && n.dst[0]);
389
390 if (!v0->is_const()) {
391 // handle (MOV -(MOV -x)) => (MOV x)
392 if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
393 && v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
394 alu_node *sd = static_cast<alu_node*>(v0->def);
395 if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
396 sd->bc.src[0].neg) {
397 n.src[0] = sd->src[0];
398 n.bc.src[0].neg = 0;
399 v0 = n.src[0]->gvalue();
400 }
401 }
402
403 if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
404 n.bc.op == ALU_OP1_MOVA_GPR_INT)
405 && n.bc.clamp == 0 && n.bc.omod == 0
406 && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0) {
407 assign_source(n.dst[0], v0);
408 return true;
409 }
410 return false;
411 }
412
413 literal dv, cv = v0->get_const_value();
414 apply_alu_src_mod(n.bc, 0, cv);
415
416 switch (n.bc.op) {
417 case ALU_OP1_CEIL: dv = ceil(cv.f); break;
418 case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
419 case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
420 case ALU_OP1_FLOOR: dv = floor(cv.f); break;
421 case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
422 case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
423 case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
424 case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
425 case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
426 case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
427 case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
428 case ALU_OP1_LOG_CLAMPED:
429 case ALU_OP1_LOG_IEEE:
430 if (cv.f != 0.0f)
431 dv = log2(cv.f);
432 else
433 // don't fold to NAN, let the GPU handle it for now
434 // (prevents degenerate LIT tests from failing)
435 return false;
436 break;
437 case ALU_OP1_MOV: dv = cv; break;
438 case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
439 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
440 // case ALU_OP1_MOVA_GPR_INT:
441 case ALU_OP1_NOT_INT: dv = ~cv.i; break;
442 case ALU_OP1_PRED_SET_INV:
443 dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
444 case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
445 case ALU_OP1_RECIPSQRT_CLAMPED:
446 case ALU_OP1_RECIPSQRT_FF:
447 case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
448 case ALU_OP1_RECIP_CLAMPED:
449 case ALU_OP1_RECIP_FF:
450 case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
451 // case ALU_OP1_RECIP_INT:
452 case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
453 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
454 case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
455 case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
456 case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
457
458 default:
459 return false;
460 }
461
462 apply_alu_dst_mod(n.bc, dv);
463 assign_source(n.dst[0], get_const(dv));
464 return true;
465 }
466
467 bool expr_handler::fold_mul_add(alu_node *n) {
468
469 bool ieee;
470 value* v0 = n->src[0]->gvalue();
471
472 alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
473 static_cast<alu_node*>(v0->def) : NULL;
474
475 if (d0) {
476 if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
477 ieee = true;
478 else if (d0->is_alu_op(ALU_OP2_MUL))
479 ieee = false;
480 else
481 return false;
482
483 if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
484 !n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
485 !d0->bc.clamp && !n->bc.omod &&
486 (!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
487 !n->src[1]->is_kcache())) {
488
489 bool mul_neg = n->bc.src[0].neg;
490
491 n->src.resize(3);
492 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
493 n->src[2] = n->src[1];
494 n->bc.src[2] = n->bc.src[1];
495 n->src[0] = d0->src[0];
496 n->bc.src[0] = d0->bc.src[0];
497 n->src[1] = d0->src[1];
498 n->bc.src[1] = d0->bc.src[1];
499
500 n->bc.src[0].neg ^= mul_neg;
501
502 fold_alu_op3(*n);
503 return true;
504 }
505 }
506
507 value* v1 = n->src[1]->gvalue();
508
509 alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
510 static_cast<alu_node*>(v1->def) : NULL;
511
512 if (d1) {
513 if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
514 ieee = true;
515 else if (d1->is_alu_op(ALU_OP2_MUL))
516 ieee = false;
517 else
518 return false;
519
520 if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
521 !n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
522 !d1->bc.clamp && !n->bc.omod &&
523 (!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
524 !n->src[0]->is_kcache())) {
525
526 bool mul_neg = n->bc.src[1].neg;
527
528 n->src.resize(3);
529 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
530 n->src[2] = n->src[0];
531 n->bc.src[2] = n->bc.src[0];
532 n->src[1] = d1->src[1];
533 n->bc.src[1] = d1->bc.src[1];
534 n->src[0] = d1->src[0];
535 n->bc.src[0] = d1->bc.src[0];
536
537 n->bc.src[1].neg ^= mul_neg;
538
539 fold_alu_op3(*n);
540 return true;
541 }
542 }
543
544 return false;
545 }
546
547 bool expr_handler::eval_const_op(unsigned op, literal &r,
548 literal cv0, literal cv1) {
549
550 switch (op) {
551 case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
552 case ALU_OP2_ADDC_UINT:
553 r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
554 case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
555 case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
556 case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
557 case ALU_OP2_BFM_INT:
558 r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
559 case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
560 case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
561 case ALU_OP2_MAX:
562 case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
563 case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
564 case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
565 case ALU_OP2_MIN:
566 case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
567 case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
568 case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
569 case ALU_OP2_MUL:
570 case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
571 case ALU_OP2_MULHI_INT:
572 r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
573 case ALU_OP2_MULHI_UINT:
574 r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
575 case ALU_OP2_MULLO_INT:
576 r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
577 case ALU_OP2_MULLO_UINT:
578 r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
579 case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
580 case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
581 case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
582
583 default:
584 return false;
585 }
586
587 return true;
588 }
589
590 // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
591 bool expr_handler::fold_assoc(alu_node *n) {
592
593 alu_node *a = n;
594 literal cr;
595
596 int last_arg = -3;
597
598 unsigned op = n->bc.op;
599 bool allow_neg = false, cur_neg = false;
600
601 switch(op) {
602 case ALU_OP2_ADD:
603 case ALU_OP2_MUL:
604 case ALU_OP2_MUL_IEEE:
605 allow_neg = true;
606 break;
607 case ALU_OP3_MULADD:
608 allow_neg = true;
609 op = ALU_OP2_MUL;
610 break;
611 case ALU_OP3_MULADD_IEEE:
612 allow_neg = true;
613 op = ALU_OP2_MUL_IEEE;
614 break;
615 default:
616 if (n->bc.op_ptr->src_count != 2)
617 return false;
618 }
619
620 // check if we can evaluate the op
621 if (!eval_const_op(op, cr, literal(0), literal(0)))
622 return false;
623
624 while (true) {
625
626 value *v0 = a->src[0]->gvalue();
627 value *v1 = a->src[1]->gvalue();
628
629 last_arg = -2;
630
631 if (v1->is_const()) {
632 literal arg = v1->get_const_value();
633 apply_alu_src_mod(a->bc, 1, arg);
634 if (cur_neg)
635 arg.f = -arg.f;
636
637 if (a == n)
638 cr = arg;
639 else
640 eval_const_op(op, cr, cr, arg);
641
642 if (v0->def) {
643 alu_node *d0 = static_cast<alu_node*>(v0->def);
644 if ((d0->is_alu_op(op) ||
645 (op == ALU_OP2_MUL_IEEE &&
646 d0->is_alu_op(ALU_OP2_MUL))) &&
647 !d0->bc.omod && !d0->bc.clamp &&
648 (!a->bc.src[0].neg || allow_neg)) {
649 cur_neg ^= a->bc.src[0].neg;
650 a = d0;
651 continue;
652 }
653 }
654 last_arg = 0;
655
656 }
657
658 if (v0->is_const()) {
659 literal arg = v0->get_const_value();
660 apply_alu_src_mod(a->bc, 0, arg);
661 if (cur_neg)
662 arg.f = -arg.f;
663
664 if (last_arg == 0) {
665 eval_const_op(op, cr, cr, arg);
666 last_arg = -1;
667 break;
668 }
669
670 if (a == n)
671 cr = arg;
672 else
673 eval_const_op(op, cr, cr, arg);
674
675 if (v1->def) {
676 alu_node *d1 = static_cast<alu_node*>(v1->def);
677 if ((d1->is_alu_op(op) ||
678 (op == ALU_OP2_MUL_IEEE &&
679 d1->is_alu_op(ALU_OP2_MUL))) &&
680 !d1->bc.omod && !d1->bc.clamp &&
681 (!a->bc.src[1].neg || allow_neg)) {
682 cur_neg ^= a->bc.src[1].neg;
683 a = d1;
684 continue;
685 }
686 }
687
688 last_arg = 1;
689 }
690
691 break;
692 };
693
694 if (last_arg == -1) {
695 // result is const
696 apply_alu_dst_mod(n->bc, cr);
697
698 if (n->bc.op == op) {
699 convert_to_mov(*n, sh.get_const_value(cr));
700 fold_alu_op1(*n);
701 return true;
702 } else { // MULADD => ADD
703 n->src[0] = n->src[2];
704 n->bc.src[0] = n->bc.src[2];
705 n->src[1] = sh.get_const_value(cr);
706 memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
707
708 n->src.resize(2);
709 n->bc.set_op(ALU_OP2_ADD);
710 }
711 } else if (last_arg >= 0) {
712 n->src[0] = a->src[last_arg];
713 n->bc.src[0] = a->bc.src[last_arg];
714 n->bc.src[0].neg ^= cur_neg;
715 n->src[1] = sh.get_const_value(cr);
716 memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
717 }
718
719 return false;
720 }
721
722 bool expr_handler::fold_alu_op2(alu_node& n) {
723
724 if (n.src.size() < 2)
725 return false;
726
727 unsigned flags = n.bc.op_ptr->flags;
728
729 if (flags & AF_SET) {
730 return fold_setcc(n);
731 }
732
733 if (!sh.safe_math && (flags & AF_M_ASSOC)) {
734 if (fold_assoc(&n))
735 return true;
736 }
737
738 value* v0 = n.src[0]->gvalue();
739 value* v1 = n.src[1]->gvalue();
740
741 assert(v0 && v1);
742
743 // handle some operations with equal args, e.g. x + x => x * 2
744 if (v0 == v1) {
745 if (n.bc.src[0].neg == n.bc.src[1].neg &&
746 n.bc.src[0].abs == n.bc.src[1].abs) {
747 switch (n.bc.op) {
748 case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
749 case ALU_OP2_MAX:
750 convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
751 return fold_alu_op1(n);
752 case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2)
753 if (!sh.safe_math) {
754 n.src[1] = sh.get_const_value(2.0f);
755 memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
756 n.bc.set_op(ALU_OP2_MUL);
757 return fold_alu_op2(n);
758 }
759 break;
760 }
761 }
762 if (n.bc.src[0].neg != n.bc.src[1].neg &&
763 n.bc.src[0].abs == n.bc.src[1].abs) {
764 switch (n.bc.op) {
765 case ALU_OP2_ADD: // (ADD x, -x) => (MOV 0)
766 if (!sh.safe_math) {
767 convert_to_mov(n, sh.get_const_value(literal(0)));
768 return fold_alu_op1(n);
769 }
770 break;
771 }
772 }
773 }
774
775 if (n.bc.op == ALU_OP2_ADD) {
776 if (fold_mul_add(&n))
777 return true;
778 }
779
780 bool isc0 = v0->is_const();
781 bool isc1 = v1->is_const();
782
783 if (!isc0 && !isc1)
784 return false;
785
786 literal dv, cv0, cv1;
787
788 if (isc0) {
789 cv0 = v0->get_const_value();
790 apply_alu_src_mod(n.bc, 0, cv0);
791 }
792
793 if (isc1) {
794 cv1 = v1->get_const_value();
795 apply_alu_src_mod(n.bc, 1, cv1);
796 }
797
798 if (isc0 && isc1) {
799
800 if (!eval_const_op(n.bc.op, dv, cv0, cv1))
801 return false;
802
803 } else { // one source is const
804
805 if (isc0 && cv0 == literal(0)) {
806 switch (n.bc.op) {
807 case ALU_OP2_ADD:
808 case ALU_OP2_ADD_INT:
809 case ALU_OP2_MAX_UINT:
810 case ALU_OP2_OR_INT:
811 case ALU_OP2_XOR_INT:
812 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
813 return fold_alu_op1(n);
814 case ALU_OP2_AND_INT:
815 case ALU_OP2_ASHR_INT:
816 case ALU_OP2_LSHL_INT:
817 case ALU_OP2_LSHR_INT:
818 case ALU_OP2_MIN_UINT:
819 case ALU_OP2_MUL:
820 case ALU_OP2_MULHI_UINT:
821 case ALU_OP2_MULLO_UINT:
822 convert_to_mov(n, sh.get_const_value(literal(0)));
823 return fold_alu_op1(n);
824 }
825 } else if (isc1 && cv1 == literal(0)) {
826 switch (n.bc.op) {
827 case ALU_OP2_ADD:
828 case ALU_OP2_ADD_INT:
829 case ALU_OP2_ASHR_INT:
830 case ALU_OP2_LSHL_INT:
831 case ALU_OP2_LSHR_INT:
832 case ALU_OP2_MAX_UINT:
833 case ALU_OP2_OR_INT:
834 case ALU_OP2_SUB_INT:
835 case ALU_OP2_XOR_INT:
836 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
837 return fold_alu_op1(n);
838 case ALU_OP2_AND_INT:
839 case ALU_OP2_MIN_UINT:
840 case ALU_OP2_MUL:
841 case ALU_OP2_MULHI_UINT:
842 case ALU_OP2_MULLO_UINT:
843 convert_to_mov(n, sh.get_const_value(literal(0)));
844 return fold_alu_op1(n);
845 }
846 } else if (isc0 && cv0 == literal(1.0f)) {
847 switch (n.bc.op) {
848 case ALU_OP2_MUL:
849 case ALU_OP2_MUL_IEEE:
850 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
851 return fold_alu_op1(n);
852 }
853 } else if (isc1 && cv1 == literal(1.0f)) {
854 switch (n.bc.op) {
855 case ALU_OP2_MUL:
856 case ALU_OP2_MUL_IEEE:
857 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
858 return fold_alu_op1(n);
859 }
860 }
861
862 return false;
863 }
864
865 apply_alu_dst_mod(n.bc, dv);
866 assign_source(n.dst[0], get_const(dv));
867 return true;
868 }
869
870 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
871 literal s1, literal s2) {
872
873 unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
874 unsigned cc = alu_cnd_flags & AF_CC_MASK;
875
876 switch (cmp_type) {
877 case AF_FLOAT_CMP: {
878 switch (cc) {
879 case AF_CC_E : return s1.f == s2.f;
880 case AF_CC_GT: return s1.f > s2.f;
881 case AF_CC_GE: return s1.f >= s2.f;
882 case AF_CC_NE: return s1.f != s2.f;
883 case AF_CC_LT: return s1.f < s2.f;
884 case AF_CC_LE: return s1.f <= s2.f;
885 default:
886 assert(!"invalid condition code");
887 return false;
888 }
889 }
890 case AF_INT_CMP: {
891 switch (cc) {
892 case AF_CC_E : return s1.i == s2.i;
893 case AF_CC_GT: return s1.i > s2.i;
894 case AF_CC_GE: return s1.i >= s2.i;
895 case AF_CC_NE: return s1.i != s2.i;
896 case AF_CC_LT: return s1.i < s2.i;
897 case AF_CC_LE: return s1.i <= s2.i;
898 default:
899 assert(!"invalid condition code");
900 return false;
901 }
902 }
903 case AF_UINT_CMP: {
904 switch (cc) {
905 case AF_CC_E : return s1.u == s2.u;
906 case AF_CC_GT: return s1.u > s2.u;
907 case AF_CC_GE: return s1.u >= s2.u;
908 case AF_CC_NE: return s1.u != s2.u;
909 case AF_CC_LT: return s1.u < s2.u;
910 case AF_CC_LE: return s1.u <= s2.u;
911 default:
912 assert(!"invalid condition code");
913 return false;
914 }
915 }
916 default:
917 assert(!"invalid cmp_type");
918 return false;
919 }
920 }
921
922 bool expr_handler::fold_alu_op3(alu_node& n) {
923
924 if (n.src.size() < 3)
925 return false;
926
927 if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
928 if (fold_assoc(&n))
929 return true;
930 }
931
932 value* v0 = n.src[0]->gvalue();
933 value* v1 = n.src[1]->gvalue();
934 value* v2 = n.src[2]->gvalue();
935
936 assert(v0 && v1 && v2 && n.dst[0]);
937
938 bool isc0 = v0->is_const();
939 bool isc1 = v1->is_const();
940 bool isc2 = v2->is_const();
941
942 literal dv, cv0, cv1, cv2;
943
944 if (isc0) {
945 cv0 = v0->get_const_value();
946 apply_alu_src_mod(n.bc, 0, cv0);
947 }
948
949 if (isc1) {
950 cv1 = v1->get_const_value();
951 apply_alu_src_mod(n.bc, 1, cv1);
952 }
953
954 if (isc2) {
955 cv2 = v2->get_const_value();
956 apply_alu_src_mod(n.bc, 2, cv2);
957 }
958
959 unsigned flags = n.bc.op_ptr->flags;
960
961 if (flags & AF_CMOV) {
962 int src = 0;
963
964 if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
965 // result doesn't depend on condition, convert to MOV
966 src = 1;
967 } else if (isc0) {
968 // src0 is const, condition can be evaluated, convert to MOV
969 bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
970 AF_CMP_TYPE_MASK), cv0, literal(0));
971 src = cond ? 1 : 2;
972 }
973
974 if (src) {
975 // if src is selected, convert to MOV
976 convert_to_mov(n, n.src[src], n.bc.src[src].neg);
977 return fold_alu_op1(n);
978 }
979 }
980
981 // handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
982 if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
983 n.bc.op == ALU_OP3_MULADD_IEEE)) {
984
985 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
986 ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
987
988 if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
989
990 alu_node *md = static_cast<alu_node*>(v2->def);
991 value *mv0 = md->src[0]->gvalue();
992 value *mv1 = md->src[1]->gvalue();
993
994 int es0 = -1, es1;
995
996 if (v0 == mv0) {
997 es0 = 0;
998 es1 = 0;
999 } else if (v0 == mv1) {
1000 es0 = 0;
1001 es1 = 1;
1002 } else if (v1 == mv0) {
1003 es0 = 1;
1004 es1 = 0;
1005 } else if (v1 == mv1) {
1006 es0 = 1;
1007 es1 = 1;
1008 }
1009
1010 if (es0 != -1) {
1011 value *va0 = es0 == 0 ? v1 : v0;
1012 value *va1 = es1 == 0 ? mv1 : mv0;
1013
1014 alu_node *add = sh.create_alu();
1015 add->bc.set_op(ALU_OP2_ADD);
1016
1017 add->dst.resize(1);
1018 add->src.resize(2);
1019
1020 value *t = sh.create_temp_value();
1021 t->def = add;
1022 add->dst[0] = t;
1023 add->src[0] = va0;
1024 add->src[1] = va1;
1025 add->bc.src[0] = n.bc.src[!es0];
1026 add->bc.src[1] = md->bc.src[!es1];
1027
1028 add->bc.src[1].neg ^= n.bc.src[2].neg ^
1029 (n.bc.src[es0].neg != md->bc.src[es1].neg);
1030
1031 n.insert_before(add);
1032 vt.add_value(t);
1033
1034 t = t->gvalue();
1035
1036 if (es0 == 1) {
1037 n.src[0] = n.src[1];
1038 n.bc.src[0] = n.bc.src[1];
1039 }
1040
1041 n.src[1] = t;
1042 memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
1043
1044 n.src.resize(2);
1045
1046 n.bc.set_op(op);
1047 return fold_alu_op2(n);
1048 }
1049 }
1050 }
1051
1052 if (!isc0 && !isc1 && !isc2)
1053 return false;
1054
1055 if (isc0 && isc1 && isc2) {
1056 switch (n.bc.op) {
1057 case ALU_OP3_MULADD_IEEE:
1058 case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
1059
1060 // TODO
1061
1062 default:
1063 return false;
1064 }
1065 } else {
1066 if (isc0 && isc1) {
1067 switch (n.bc.op) {
1068 case ALU_OP3_MULADD:
1069 case ALU_OP3_MULADD_IEEE:
1070 dv = cv0.f * cv1.f;
1071 n.bc.set_op(ALU_OP2_ADD);
1072 n.src[0] = sh.get_const_value(dv);
1073 memset(&n.bc.src[0], 0, sizeof(bc_alu_src));
1074 n.src[1] = n.src[2];
1075 n.bc.src[1] = n.bc.src[2];
1076 n.src.resize(2);
1077 return fold_alu_op2(n);
1078 }
1079 }
1080
1081 if (n.bc.op == ALU_OP3_MULADD) {
1082 if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
1083 convert_to_mov(n, n.src[2], n.bc.src[2].neg, n.bc.src[2].abs);
1084 return fold_alu_op1(n);
1085 }
1086 }
1087
1088 if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
1089 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1090 ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1091
1092 if (isc1 && v0 == v2) {
1093 cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
1094 n.src[1] = sh.get_const_value(cv1);
1095 n.bc.src[1].neg = 0;
1096 n.bc.src[1].abs = 0;
1097 n.bc.set_op(op);
1098 n.src.resize(2);
1099 return fold_alu_op2(n);
1100 } else if (isc0 && v1 == v2) {
1101 cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
1102 n.src[0] = sh.get_const_value(cv0);
1103 n.bc.src[0].neg = 0;
1104 n.bc.src[0].abs = 0;
1105 n.bc.set_op(op);
1106 n.src.resize(2);
1107 return fold_alu_op2(n);
1108 }
1109 }
1110
1111 return false;
1112 }
1113
1114 apply_alu_dst_mod(n.bc, dv);
1115 assign_source(n.dst[0], get_const(dv));
1116 return true;
1117 }
1118
1119 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
1120 unsigned ncc = 0;
1121
1122 switch (cc) {
1123 case AF_CC_E: ncc = AF_CC_NE; break;
1124 case AF_CC_NE: ncc = AF_CC_E; break;
1125 case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
1126 case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
1127 default:
1128 assert(!"unexpected condition code");
1129 break;
1130 }
1131 return ncc;
1132 }
1133
1134 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
1135
1136 if (int_dst && cmp_type == AF_FLOAT_CMP) {
1137 switch (cc) {
1138 case AF_CC_E: return ALU_OP2_SETE_DX10;
1139 case AF_CC_NE: return ALU_OP2_SETNE_DX10;
1140 case AF_CC_GT: return ALU_OP2_SETGT_DX10;
1141 case AF_CC_GE: return ALU_OP2_SETGE_DX10;
1142 }
1143 } else {
1144
1145 switch(cmp_type) {
1146 case AF_FLOAT_CMP: {
1147 switch (cc) {
1148 case AF_CC_E: return ALU_OP2_SETE;
1149 case AF_CC_NE: return ALU_OP2_SETNE;
1150 case AF_CC_GT: return ALU_OP2_SETGT;
1151 case AF_CC_GE: return ALU_OP2_SETGE;
1152 }
1153 break;
1154 }
1155 case AF_INT_CMP: {
1156 switch (cc) {
1157 case AF_CC_E: return ALU_OP2_SETE_INT;
1158 case AF_CC_NE: return ALU_OP2_SETNE_INT;
1159 case AF_CC_GT: return ALU_OP2_SETGT_INT;
1160 case AF_CC_GE: return ALU_OP2_SETGE_INT;
1161 }
1162 break;
1163 }
1164 case AF_UINT_CMP: {
1165 switch (cc) {
1166 case AF_CC_E: return ALU_OP2_SETE_INT;
1167 case AF_CC_NE: return ALU_OP2_SETNE_INT;
1168 case AF_CC_GT: return ALU_OP2_SETGT_UINT;
1169 case AF_CC_GE: return ALU_OP2_SETGE_UINT;
1170 }
1171 break;
1172 }
1173 }
1174 }
1175
1176 assert(!"unexpected cc&cmp_type combination");
1177 return ~0u;
1178 }
1179
1180 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
1181
1182 switch(cmp_type) {
1183 case AF_FLOAT_CMP: {
1184 switch (cc) {
1185 case AF_CC_E: return ALU_OP2_PRED_SETE;
1186 case AF_CC_NE: return ALU_OP2_PRED_SETNE;
1187 case AF_CC_GT: return ALU_OP2_PRED_SETGT;
1188 case AF_CC_GE: return ALU_OP2_PRED_SETGE;
1189 }
1190 break;
1191 }
1192 case AF_INT_CMP: {
1193 switch (cc) {
1194 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1195 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1196 case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
1197 case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
1198 }
1199 break;
1200 }
1201 case AF_UINT_CMP: {
1202 switch (cc) {
1203 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1204 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1205 case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
1206 case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
1207 }
1208 break;
1209 }
1210 }
1211
1212 assert(!"unexpected cc&cmp_type combination");
1213 return ~0u;
1214 }
1215
1216 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
1217
1218 switch(cmp_type) {
1219 case AF_FLOAT_CMP: {
1220 switch (cc) {
1221 case AF_CC_E: return ALU_OP2_KILLE;
1222 case AF_CC_NE: return ALU_OP2_KILLNE;
1223 case AF_CC_GT: return ALU_OP2_KILLGT;
1224 case AF_CC_GE: return ALU_OP2_KILLGE;
1225 }
1226 break;
1227 }
1228 case AF_INT_CMP: {
1229 switch (cc) {
1230 case AF_CC_E: return ALU_OP2_KILLE_INT;
1231 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1232 case AF_CC_GT: return ALU_OP2_KILLGT_INT;
1233 case AF_CC_GE: return ALU_OP2_KILLGE_INT;
1234 }
1235 break;
1236 }
1237 case AF_UINT_CMP: {
1238 switch (cc) {
1239 case AF_CC_E: return ALU_OP2_KILLE_INT;
1240 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1241 case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
1242 case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
1243 }
1244 break;
1245 }
1246 }
1247
1248 assert(!"unexpected cc&cmp_type combination");
1249 return ~0u;
1250 }
1251
1252 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
1253
1254 switch(cmp_type) {
1255 case AF_FLOAT_CMP: {
1256 switch (cc) {
1257 case AF_CC_E: return ALU_OP3_CNDE;
1258 case AF_CC_GT: return ALU_OP3_CNDGT;
1259 case AF_CC_GE: return ALU_OP3_CNDGE;
1260 }
1261 break;
1262 }
1263 case AF_INT_CMP: {
1264 switch (cc) {
1265 case AF_CC_E: return ALU_OP3_CNDE_INT;
1266 case AF_CC_GT: return ALU_OP3_CNDGT_INT;
1267 case AF_CC_GE: return ALU_OP3_CNDGE_INT;
1268 }
1269 break;
1270 }
1271 }
1272
1273 assert(!"unexpected cc&cmp_type combination");
1274 return ~0u;
1275 }
1276
1277
1278 void convert_predset_to_set(shader& sh, alu_node* a) {
1279
1280 unsigned flags = a->bc.op_ptr->flags;
1281 unsigned cc = flags & AF_CC_MASK;
1282 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
1283
1284 bool swap_args = false;
1285
1286 cc = invert_setcc_condition(cc, swap_args);
1287
1288 unsigned newop = get_setcc_op(cc, cmp_type, true);
1289
1290 a->dst.resize(1);
1291 a->bc.set_op(newop);
1292
1293 if (swap_args) {
1294 std::swap(a->src[0], a->src[1]);
1295 std::swap(a->bc.src[0], a->bc.src[1]);
1296 }
1297
1298 a->bc.update_exec_mask = 0;
1299 a->bc.update_pred = 0;
1300 }
1301
1302 } // namespace r600_sb