Merge branch 'master' of ../mesa into vulkan
[mesa.git] / src / gallium / drivers / r600 / sb / sb_expr.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include <cmath>
28
29 #include "sb_shader.h"
30
31 namespace r600_sb {
32
33 value* get_select_value_for_em(shader& sh, value* em) {
34 if (!em->def)
35 return NULL;
36
37 node *predset = em->def;
38 if (!predset->is_pred_set())
39 return NULL;
40
41 alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42 convert_predset_to_set(sh, s);
43
44 predset->insert_after(s);
45
46 value* &d0 = s->dst[0];
47 d0 = sh.create_temp_value();
48 d0->def = s;
49 return d0;
50 }
51
52 void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53 n.src.resize(1);
54 n.src[0] = src;
55 n.bc.src[0].abs = abs;
56 n.bc.src[0].neg = neg;
57 n.bc.set_op(ALU_OP1_MOV);
58 }
59
60 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61
62 value * expr_handler::get_const(const literal &l) {
63 value *v = sh.get_const_value(l);
64 if (!v->gvn_source)
65 vt.add_value(v);
66 return v;
67 }
68
69 void expr_handler::assign_source(value *dst, value *src) {
70 dst->gvn_source = src->gvn_source;
71 }
72
73 bool expr_handler::equal(value *l, value *r) {
74
75 assert(l != r);
76
77 if (l->gvalue() == r->gvalue())
78 return true;
79
80 if (l->def && r->def)
81 return defs_equal(l, r);
82
83 if (l->is_rel() && r->is_rel())
84 return ivars_equal(l, r);
85
86 return false;
87 }
88
89 bool expr_handler::ivars_equal(value* l, value* r) {
90 if (l->rel->gvalue() == r->rel->gvalue()
91 && l->select == r->select) {
92
93 vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
94 vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
95
96 // FIXME: replace this with more precise aliasing test
97 return lv == rv;
98 }
99 return false;
100 }
101
102 bool expr_handler::defs_equal(value* l, value* r) {
103
104 node *d1 = l->def;
105 node *d2 = r->def;
106
107 if (d1->type != d2->type || d1->subtype != d2->subtype)
108 return false;
109
110 if (d1->is_pred_set() || d2->is_pred_set())
111 return false;
112
113 if (d1->type == NT_OP) {
114 switch (d1->subtype) {
115 case NST_ALU_INST:
116 return ops_equal(
117 static_cast<alu_node*>(d1),
118 static_cast<alu_node*>(d2));
119 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
120 // static_cast<fetch_node*>(d2);
121 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
122 // static_cast<cf_node*>(d2);
123 default:
124 break;
125 }
126 }
127 return false;
128 }
129
130 bool expr_handler::try_fold(value* v) {
131 assert(!v->gvn_source);
132
133 if (v->def)
134 try_fold(v->def);
135
136 if (v->gvn_source)
137 return true;
138
139 return false;
140 }
141
142 bool expr_handler::try_fold(node* n) {
143 return n->fold_dispatch(this);
144 }
145
146 bool expr_handler::fold(node& n) {
147 if (n.subtype == NST_PHI) {
148
149 value *s = n.src[0];
150
151 // FIXME disabling phi folding for registers for now, otherwise we lose
152 // control flow information in some cases
153 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
154 // probably control flow transformation is required to enable it
155 if (s->is_sgpr())
156 return false;
157
158 for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
159 value *v = *I;
160 if (!s->v_equal(v))
161 return false;
162 }
163
164 assign_source(n.dst[0], s);
165 } else {
166 assert(n.subtype == NST_PSI);
167 assert(n.src.size() >= 6);
168
169 value *s = n.src[2];
170 assert(s->gvn_source);
171
172 for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
173 value *v = *(I+2);
174 if (!s->v_equal(v))
175 return false;
176 }
177 assign_source(n.dst[0], s);
178 }
179 return true;
180 }
181
182 bool expr_handler::fold(container_node& n) {
183 return false;
184 }
185
186 bool expr_handler::fold_setcc(alu_node &n) {
187
188 value* v0 = n.src[0]->gvalue();
189 value* v1 = n.src[1]->gvalue();
190
191 assert(v0 && v1 && n.dst[0]);
192
193 unsigned flags = n.bc.op_ptr->flags;
194 unsigned cc = flags & AF_CC_MASK;
195 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
196 unsigned dst_type = flags & AF_DST_TYPE_MASK;
197
198 bool cond_result;
199 bool have_result = false;
200
201 bool isc0 = v0->is_const();
202 bool isc1 = v1->is_const();
203
204 literal dv, cv0, cv1;
205
206 if (isc0) {
207 cv0 = v0->get_const_value();
208 apply_alu_src_mod(n.bc, 0, cv0);
209 }
210
211 if (isc1) {
212 cv1 = v1->get_const_value();
213 apply_alu_src_mod(n.bc, 1, cv1);
214 }
215
216 if (isc0 && isc1) {
217 cond_result = evaluate_condition(flags, cv0, cv1);
218 have_result = true;
219 } else if (isc1) {
220 if (cmp_type == AF_FLOAT_CMP) {
221 if (n.bc.src[0].abs && !n.bc.src[0].neg) {
222 if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
223 cond_result = true;
224 have_result = true;
225 } else if (cv1.f <= 0.0f && cc == AF_CC_GE) {
226 cond_result = true;
227 have_result = true;
228 }
229 } else if (n.bc.src[0].abs && n.bc.src[0].neg) {
230 if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
231 cond_result = false;
232 have_result = true;
233 } else if (cv1.f >= 0.0f && cc == AF_CC_GT) {
234 cond_result = false;
235 have_result = true;
236 }
237 }
238 } else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
239 cond_result = true;
240 have_result = true;
241 }
242 } else if (isc0) {
243 if (cmp_type == AF_FLOAT_CMP) {
244 if (n.bc.src[1].abs && !n.bc.src[1].neg) {
245 if (cv0.f <= 0.0f && cc == AF_CC_GT) {
246 cond_result = false;
247 have_result = true;
248 } else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
249 cond_result = false;
250 have_result = true;
251 }
252 } else if (n.bc.src[1].abs && n.bc.src[1].neg) {
253 if (cv0.f >= 0.0f && cc == AF_CC_GE) {
254 cond_result = true;
255 have_result = true;
256 } else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
257 cond_result = true;
258 have_result = true;
259 }
260 }
261 } else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
262 cond_result = false;
263 have_result = true;
264 }
265 } else if (v0 == v1) {
266 bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
267 if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
268 // NOTE can't handle float comparisons here because of NaNs
269 cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
270 have_result = true;
271 }
272 }
273
274 if (have_result) {
275 literal result;
276
277 if (cond_result)
278 result = dst_type != AF_FLOAT_DST ?
279 literal(0xFFFFFFFFu) : literal(1.0f);
280 else
281 result = literal(0);
282
283 convert_to_mov(n, sh.get_const_value(result));
284 return fold_alu_op1(n);
285 }
286
287 return false;
288 }
289
290 bool expr_handler::fold(alu_node& n) {
291
292 switch (n.bc.op_ptr->src_count) {
293 case 1: return fold_alu_op1(n);
294 case 2: return fold_alu_op2(n);
295 case 3: return fold_alu_op3(n);
296 default:
297 assert(0);
298 }
299 return false;
300 }
301
302 bool expr_handler::fold(fetch_node& n) {
303
304 unsigned chan = 0;
305 for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
306 value* &v = *I;
307 if (v) {
308 if (n.bc.dst_sel[chan] == SEL_0)
309 assign_source(*I, get_const(0.0f));
310 else if (n.bc.dst_sel[chan] == SEL_1)
311 assign_source(*I, get_const(1.0f));
312 }
313 ++chan;
314 }
315 return false;
316 }
317
318 bool expr_handler::fold(cf_node& n) {
319 return false;
320 }
321
322 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
323 literal &v) {
324 const bc_alu_src &s = bc.src[src];
325
326 if (s.abs)
327 v = fabs(v.f);
328 if (s.neg)
329 v = -v.f;
330 }
331
332 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
333 float omod_coeff[] = {2.0f, 4.0, 0.5f};
334
335 if (bc.omod)
336 v = v.f * omod_coeff[bc.omod - 1];
337 if (bc.clamp)
338 v = float_clamp(v.f);
339 }
340
341 bool expr_handler::args_equal(const vvec &l, const vvec &r) {
342
343 assert(l.size() == r.size());
344
345 int s = l.size();
346
347 for (int k = 0; k < s; ++k) {
348 if (!l[k]->v_equal(r[k]))
349 return false;
350 }
351
352 return true;
353 }
354
355 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
356 const bc_alu &b0 = l->bc;
357 const bc_alu &b1 = r->bc;
358
359 if (b0.op != b1.op)
360 return false;
361
362 unsigned src_count = b0.op_ptr->src_count;
363
364 if (b0.index_mode != b1.index_mode)
365 return false;
366
367 if (b0.clamp != b1.clamp || b0.omod != b1.omod)
368 return false;
369
370 for (unsigned s = 0; s < src_count; ++s) {
371 const bc_alu_src &s0 = b0.src[s];
372 const bc_alu_src &s1 = b1.src[s];
373
374 if (s0.abs != s1.abs || s0.neg != s1.neg)
375 return false;
376 }
377 return args_equal(l->src, r->src);
378 }
379
380 bool expr_handler::fold_alu_op1(alu_node& n) {
381
382 assert(!n.src.empty());
383 if (n.src.empty())
384 return false;
385
386 value* v0 = n.src[0]->gvalue();
387
388 assert(v0 && n.dst[0]);
389
390 if (!v0->is_const()) {
391 // handle (MOV -(MOV -x)) => (MOV x)
392 if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
393 && v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
394 alu_node *sd = static_cast<alu_node*>(v0->def);
395 if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
396 sd->bc.src[0].neg) {
397 n.src[0] = sd->src[0];
398 n.bc.src[0].neg = 0;
399 v0 = n.src[0]->gvalue();
400 }
401 }
402
403 if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
404 n.bc.op == ALU_OP1_MOVA_GPR_INT)
405 && n.bc.clamp == 0 && n.bc.omod == 0
406 && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0) {
407 assign_source(n.dst[0], v0);
408 return true;
409 }
410 return false;
411 }
412
413 literal dv, cv = v0->get_const_value();
414 apply_alu_src_mod(n.bc, 0, cv);
415
416 switch (n.bc.op) {
417 case ALU_OP1_CEIL: dv = ceil(cv.f); break;
418 case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
419 case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
420 case ALU_OP1_FLOOR: dv = floor(cv.f); break;
421 case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
422 case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
423 case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
424 case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
425 case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
426 case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
427 case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
428 case ALU_OP1_LOG_CLAMPED:
429 case ALU_OP1_LOG_IEEE:
430 if (cv.f != 0.0f)
431 dv = log2(cv.f);
432 else
433 // don't fold to NAN, let the GPU handle it for now
434 // (prevents degenerate LIT tests from failing)
435 return false;
436 break;
437 case ALU_OP1_MOV: dv = cv; break;
438 case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
439 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
440 // case ALU_OP1_MOVA_GPR_INT:
441 case ALU_OP1_NOT_INT: dv = ~cv.i; break;
442 case ALU_OP1_PRED_SET_INV:
443 dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
444 case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
445 case ALU_OP1_RECIPSQRT_CLAMPED:
446 case ALU_OP1_RECIPSQRT_FF:
447 case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
448 case ALU_OP1_RECIP_CLAMPED:
449 case ALU_OP1_RECIP_FF:
450 case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
451 // case ALU_OP1_RECIP_INT:
452 case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
453 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
454 case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
455 case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
456 case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
457
458 default:
459 return false;
460 }
461
462 apply_alu_dst_mod(n.bc, dv);
463 assign_source(n.dst[0], get_const(dv));
464 return true;
465 }
466
467 bool expr_handler::fold_mul_add(alu_node *n) {
468
469 bool ieee;
470 value* v0 = n->src[0]->gvalue();
471
472 alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
473 static_cast<alu_node*>(v0->def) : NULL;
474
475 if (d0) {
476 if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
477 ieee = true;
478 else if (d0->is_alu_op(ALU_OP2_MUL))
479 ieee = false;
480 else
481 return false;
482
483 if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
484 !n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
485 !d0->bc.clamp && !n->bc.omod &&
486 (!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
487 !n->src[1]->is_kcache())) {
488
489 bool mul_neg = n->bc.src[0].neg;
490
491 n->src.resize(3);
492 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
493 n->src[2] = n->src[1];
494 n->bc.src[2] = n->bc.src[1];
495 n->src[0] = d0->src[0];
496 n->bc.src[0] = d0->bc.src[0];
497 n->src[1] = d0->src[1];
498 n->bc.src[1] = d0->bc.src[1];
499
500 n->bc.src[0].neg ^= mul_neg;
501
502 fold_alu_op3(*n);
503 return true;
504 }
505 }
506
507 value* v1 = n->src[1]->gvalue();
508
509 alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
510 static_cast<alu_node*>(v1->def) : NULL;
511
512 if (d1) {
513 if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
514 ieee = true;
515 else if (d1->is_alu_op(ALU_OP2_MUL))
516 ieee = false;
517 else
518 return false;
519
520 if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
521 !n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
522 !d1->bc.clamp && !n->bc.omod &&
523 (!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
524 !n->src[0]->is_kcache())) {
525
526 bool mul_neg = n->bc.src[1].neg;
527
528 n->src.resize(3);
529 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
530 n->src[2] = n->src[0];
531 n->bc.src[2] = n->bc.src[0];
532 n->src[1] = d1->src[1];
533 n->bc.src[1] = d1->bc.src[1];
534 n->src[0] = d1->src[0];
535 n->bc.src[0] = d1->bc.src[0];
536
537 n->bc.src[1].neg ^= mul_neg;
538
539 fold_alu_op3(*n);
540 return true;
541 }
542 }
543
544 return false;
545 }
546
547 bool expr_handler::eval_const_op(unsigned op, literal &r,
548 literal cv0, literal cv1) {
549
550 switch (op) {
551 case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
552 case ALU_OP2_ADDC_UINT:
553 r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
554 case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
555 case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
556 case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
557 case ALU_OP2_BFM_INT:
558 r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
559 case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
560 case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
561 case ALU_OP2_MAX:
562 case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
563 case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
564 case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
565 case ALU_OP2_MIN:
566 case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
567 case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
568 case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
569 case ALU_OP2_MUL:
570 case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
571 case ALU_OP2_MULHI_INT:
572 r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
573 case ALU_OP2_MULHI_UINT:
574 r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
575 case ALU_OP2_MULLO_INT:
576 r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
577 case ALU_OP2_MULLO_UINT:
578 r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
579 case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
580 case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
581 case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
582
583 default:
584 return false;
585 }
586
587 return true;
588 }
589
590 // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
591 bool expr_handler::fold_assoc(alu_node *n) {
592
593 alu_node *a = n;
594 literal cr;
595
596 int last_arg = -3;
597
598 unsigned op = n->bc.op;
599 bool allow_neg = false, cur_neg = false;
600
601 switch(op) {
602 case ALU_OP2_ADD:
603 case ALU_OP2_MUL:
604 case ALU_OP2_MUL_IEEE:
605 allow_neg = true;
606 break;
607 case ALU_OP3_MULADD:
608 allow_neg = true;
609 op = ALU_OP2_MUL;
610 break;
611 case ALU_OP3_MULADD_IEEE:
612 allow_neg = true;
613 op = ALU_OP2_MUL_IEEE;
614 break;
615 default:
616 if (n->bc.op_ptr->src_count != 2)
617 return false;
618 }
619
620 // check if we can evaluate the op
621 if (!eval_const_op(op, cr, literal(0), literal(0)))
622 return false;
623
624 while (true) {
625
626 value *v0 = a->src[0]->gvalue();
627 value *v1 = a->src[1]->gvalue();
628
629 last_arg = -2;
630
631 if (v1->is_const()) {
632 literal arg = v1->get_const_value();
633 apply_alu_src_mod(a->bc, 1, arg);
634 if (cur_neg)
635 arg.f = -arg.f;
636
637 if (a == n)
638 cr = arg;
639 else
640 eval_const_op(op, cr, cr, arg);
641
642 if (v0->def) {
643 alu_node *d0 = static_cast<alu_node*>(v0->def);
644 if ((d0->is_alu_op(op) ||
645 (op == ALU_OP2_MUL_IEEE &&
646 d0->is_alu_op(ALU_OP2_MUL))) &&
647 !d0->bc.omod && !d0->bc.clamp &&
648 !a->bc.src[0].abs &&
649 (!a->bc.src[0].neg || allow_neg)) {
650 cur_neg ^= a->bc.src[0].neg;
651 a = d0;
652 continue;
653 }
654 }
655 last_arg = 0;
656
657 }
658
659 if (v0->is_const()) {
660 literal arg = v0->get_const_value();
661 apply_alu_src_mod(a->bc, 0, arg);
662 if (cur_neg)
663 arg.f = -arg.f;
664
665 if (last_arg == 0) {
666 eval_const_op(op, cr, cr, arg);
667 last_arg = -1;
668 break;
669 }
670
671 if (a == n)
672 cr = arg;
673 else
674 eval_const_op(op, cr, cr, arg);
675
676 if (v1->def) {
677 alu_node *d1 = static_cast<alu_node*>(v1->def);
678 if ((d1->is_alu_op(op) ||
679 (op == ALU_OP2_MUL_IEEE &&
680 d1->is_alu_op(ALU_OP2_MUL))) &&
681 !d1->bc.omod && !d1->bc.clamp &&
682 !a->bc.src[1].abs &&
683 (!a->bc.src[1].neg || allow_neg)) {
684 cur_neg ^= a->bc.src[1].neg;
685 a = d1;
686 continue;
687 }
688 }
689
690 last_arg = 1;
691 }
692
693 break;
694 };
695
696 if (last_arg == -1) {
697 // result is const
698 apply_alu_dst_mod(n->bc, cr);
699
700 if (n->bc.op == op) {
701 convert_to_mov(*n, sh.get_const_value(cr));
702 fold_alu_op1(*n);
703 return true;
704 } else { // MULADD => ADD
705 n->src[0] = n->src[2];
706 n->bc.src[0] = n->bc.src[2];
707 n->src[1] = sh.get_const_value(cr);
708 memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
709
710 n->src.resize(2);
711 n->bc.set_op(ALU_OP2_ADD);
712 }
713 } else if (last_arg >= 0) {
714 n->src[0] = a->src[last_arg];
715 n->bc.src[0] = a->bc.src[last_arg];
716 n->bc.src[0].neg ^= cur_neg;
717 n->src[1] = sh.get_const_value(cr);
718 memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
719 }
720
721 return false;
722 }
723
724 bool expr_handler::fold_alu_op2(alu_node& n) {
725
726 if (n.src.size() < 2)
727 return false;
728
729 unsigned flags = n.bc.op_ptr->flags;
730
731 if (flags & AF_SET) {
732 return fold_setcc(n);
733 }
734
735 if (!sh.safe_math && (flags & AF_M_ASSOC)) {
736 if (fold_assoc(&n))
737 return true;
738 }
739
740 value* v0 = n.src[0]->gvalue();
741 value* v1 = n.src[1]->gvalue();
742
743 assert(v0 && v1);
744
745 // handle some operations with equal args, e.g. x + x => x * 2
746 if (v0 == v1) {
747 if (n.bc.src[0].neg == n.bc.src[1].neg &&
748 n.bc.src[0].abs == n.bc.src[1].abs) {
749 switch (n.bc.op) {
750 case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
751 case ALU_OP2_MAX:
752 convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
753 return fold_alu_op1(n);
754 case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2)
755 if (!sh.safe_math) {
756 n.src[1] = sh.get_const_value(2.0f);
757 memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
758 n.bc.set_op(ALU_OP2_MUL);
759 return fold_alu_op2(n);
760 }
761 break;
762 }
763 }
764 if (n.bc.src[0].neg != n.bc.src[1].neg &&
765 n.bc.src[0].abs == n.bc.src[1].abs) {
766 switch (n.bc.op) {
767 case ALU_OP2_ADD: // (ADD x, -x) => (MOV 0)
768 if (!sh.safe_math) {
769 convert_to_mov(n, sh.get_const_value(literal(0)));
770 return fold_alu_op1(n);
771 }
772 break;
773 }
774 }
775 }
776
777 if (n.bc.op == ALU_OP2_ADD) {
778 if (fold_mul_add(&n))
779 return true;
780 }
781
782 bool isc0 = v0->is_const();
783 bool isc1 = v1->is_const();
784
785 if (!isc0 && !isc1)
786 return false;
787
788 literal dv, cv0, cv1;
789
790 if (isc0) {
791 cv0 = v0->get_const_value();
792 apply_alu_src_mod(n.bc, 0, cv0);
793 }
794
795 if (isc1) {
796 cv1 = v1->get_const_value();
797 apply_alu_src_mod(n.bc, 1, cv1);
798 }
799
800 if (isc0 && isc1) {
801
802 if (!eval_const_op(n.bc.op, dv, cv0, cv1))
803 return false;
804
805 } else { // one source is const
806
807 if (isc0 && cv0 == literal(0)) {
808 switch (n.bc.op) {
809 case ALU_OP2_ADD:
810 case ALU_OP2_ADD_INT:
811 case ALU_OP2_MAX_UINT:
812 case ALU_OP2_OR_INT:
813 case ALU_OP2_XOR_INT:
814 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
815 return fold_alu_op1(n);
816 case ALU_OP2_AND_INT:
817 case ALU_OP2_ASHR_INT:
818 case ALU_OP2_LSHL_INT:
819 case ALU_OP2_LSHR_INT:
820 case ALU_OP2_MIN_UINT:
821 case ALU_OP2_MUL:
822 case ALU_OP2_MULHI_UINT:
823 case ALU_OP2_MULLO_UINT:
824 convert_to_mov(n, sh.get_const_value(literal(0)));
825 return fold_alu_op1(n);
826 }
827 } else if (isc1 && cv1 == literal(0)) {
828 switch (n.bc.op) {
829 case ALU_OP2_ADD:
830 case ALU_OP2_ADD_INT:
831 case ALU_OP2_ASHR_INT:
832 case ALU_OP2_LSHL_INT:
833 case ALU_OP2_LSHR_INT:
834 case ALU_OP2_MAX_UINT:
835 case ALU_OP2_OR_INT:
836 case ALU_OP2_SUB_INT:
837 case ALU_OP2_XOR_INT:
838 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
839 return fold_alu_op1(n);
840 case ALU_OP2_AND_INT:
841 case ALU_OP2_MIN_UINT:
842 case ALU_OP2_MUL:
843 case ALU_OP2_MULHI_UINT:
844 case ALU_OP2_MULLO_UINT:
845 convert_to_mov(n, sh.get_const_value(literal(0)));
846 return fold_alu_op1(n);
847 }
848 } else if (isc0 && cv0 == literal(1.0f)) {
849 switch (n.bc.op) {
850 case ALU_OP2_MUL:
851 case ALU_OP2_MUL_IEEE:
852 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
853 return fold_alu_op1(n);
854 }
855 } else if (isc1 && cv1 == literal(1.0f)) {
856 switch (n.bc.op) {
857 case ALU_OP2_MUL:
858 case ALU_OP2_MUL_IEEE:
859 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
860 return fold_alu_op1(n);
861 }
862 }
863
864 return false;
865 }
866
867 apply_alu_dst_mod(n.bc, dv);
868 assign_source(n.dst[0], get_const(dv));
869 return true;
870 }
871
872 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
873 literal s1, literal s2) {
874
875 unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
876 unsigned cc = alu_cnd_flags & AF_CC_MASK;
877
878 switch (cmp_type) {
879 case AF_FLOAT_CMP: {
880 switch (cc) {
881 case AF_CC_E : return s1.f == s2.f;
882 case AF_CC_GT: return s1.f > s2.f;
883 case AF_CC_GE: return s1.f >= s2.f;
884 case AF_CC_NE: return s1.f != s2.f;
885 case AF_CC_LT: return s1.f < s2.f;
886 case AF_CC_LE: return s1.f <= s2.f;
887 default:
888 assert(!"invalid condition code");
889 return false;
890 }
891 }
892 case AF_INT_CMP: {
893 switch (cc) {
894 case AF_CC_E : return s1.i == s2.i;
895 case AF_CC_GT: return s1.i > s2.i;
896 case AF_CC_GE: return s1.i >= s2.i;
897 case AF_CC_NE: return s1.i != s2.i;
898 case AF_CC_LT: return s1.i < s2.i;
899 case AF_CC_LE: return s1.i <= s2.i;
900 default:
901 assert(!"invalid condition code");
902 return false;
903 }
904 }
905 case AF_UINT_CMP: {
906 switch (cc) {
907 case AF_CC_E : return s1.u == s2.u;
908 case AF_CC_GT: return s1.u > s2.u;
909 case AF_CC_GE: return s1.u >= s2.u;
910 case AF_CC_NE: return s1.u != s2.u;
911 case AF_CC_LT: return s1.u < s2.u;
912 case AF_CC_LE: return s1.u <= s2.u;
913 default:
914 assert(!"invalid condition code");
915 return false;
916 }
917 }
918 default:
919 assert(!"invalid cmp_type");
920 return false;
921 }
922 }
923
924 bool expr_handler::fold_alu_op3(alu_node& n) {
925
926 if (n.src.size() < 3)
927 return false;
928
929 if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
930 if (fold_assoc(&n))
931 return true;
932 }
933
934 value* v0 = n.src[0]->gvalue();
935 value* v1 = n.src[1]->gvalue();
936 value* v2 = n.src[2]->gvalue();
937
938 assert(v0 && v1 && v2 && n.dst[0]);
939
940 bool isc0 = v0->is_const();
941 bool isc1 = v1->is_const();
942 bool isc2 = v2->is_const();
943
944 literal dv, cv0, cv1, cv2;
945
946 if (isc0) {
947 cv0 = v0->get_const_value();
948 apply_alu_src_mod(n.bc, 0, cv0);
949 }
950
951 if (isc1) {
952 cv1 = v1->get_const_value();
953 apply_alu_src_mod(n.bc, 1, cv1);
954 }
955
956 if (isc2) {
957 cv2 = v2->get_const_value();
958 apply_alu_src_mod(n.bc, 2, cv2);
959 }
960
961 unsigned flags = n.bc.op_ptr->flags;
962
963 if (flags & AF_CMOV) {
964 int src = 0;
965
966 if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
967 // result doesn't depend on condition, convert to MOV
968 src = 1;
969 } else if (isc0) {
970 // src0 is const, condition can be evaluated, convert to MOV
971 bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
972 AF_CMP_TYPE_MASK), cv0, literal(0));
973 src = cond ? 1 : 2;
974 }
975
976 if (src) {
977 // if src is selected, convert to MOV
978 convert_to_mov(n, n.src[src], n.bc.src[src].neg);
979 return fold_alu_op1(n);
980 }
981 }
982
983 // handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
984 if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
985 n.bc.op == ALU_OP3_MULADD_IEEE)) {
986
987 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
988 ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
989
990 if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
991
992 alu_node *md = static_cast<alu_node*>(v2->def);
993 value *mv0 = md->src[0]->gvalue();
994 value *mv1 = md->src[1]->gvalue();
995
996 int es0 = -1, es1;
997
998 if (v0 == mv0) {
999 es0 = 0;
1000 es1 = 0;
1001 } else if (v0 == mv1) {
1002 es0 = 0;
1003 es1 = 1;
1004 } else if (v1 == mv0) {
1005 es0 = 1;
1006 es1 = 0;
1007 } else if (v1 == mv1) {
1008 es0 = 1;
1009 es1 = 1;
1010 }
1011
1012 if (es0 != -1) {
1013 value *va0 = es0 == 0 ? v1 : v0;
1014 value *va1 = es1 == 0 ? mv1 : mv0;
1015
1016 alu_node *add = sh.create_alu();
1017 add->bc.set_op(ALU_OP2_ADD);
1018
1019 add->dst.resize(1);
1020 add->src.resize(2);
1021
1022 value *t = sh.create_temp_value();
1023 t->def = add;
1024 add->dst[0] = t;
1025 add->src[0] = va0;
1026 add->src[1] = va1;
1027 add->bc.src[0] = n.bc.src[!es0];
1028 add->bc.src[1] = md->bc.src[!es1];
1029
1030 add->bc.src[1].neg ^= n.bc.src[2].neg ^
1031 (n.bc.src[es0].neg != md->bc.src[es1].neg);
1032
1033 n.insert_before(add);
1034 vt.add_value(t);
1035
1036 t = t->gvalue();
1037
1038 if (es0 == 1) {
1039 n.src[0] = n.src[1];
1040 n.bc.src[0] = n.bc.src[1];
1041 }
1042
1043 n.src[1] = t;
1044 memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
1045
1046 n.src.resize(2);
1047
1048 n.bc.set_op(op);
1049 return fold_alu_op2(n);
1050 }
1051 }
1052 }
1053
1054 if (!isc0 && !isc1 && !isc2)
1055 return false;
1056
1057 if (isc0 && isc1 && isc2) {
1058 switch (n.bc.op) {
1059 case ALU_OP3_MULADD_IEEE:
1060 case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
1061
1062 // TODO
1063
1064 default:
1065 return false;
1066 }
1067 } else {
1068 if (isc0 && isc1) {
1069 switch (n.bc.op) {
1070 case ALU_OP3_MULADD:
1071 case ALU_OP3_MULADD_IEEE:
1072 dv = cv0.f * cv1.f;
1073 n.bc.set_op(ALU_OP2_ADD);
1074 n.src[0] = sh.get_const_value(dv);
1075 memset(&n.bc.src[0], 0, sizeof(bc_alu_src));
1076 n.src[1] = n.src[2];
1077 n.bc.src[1] = n.bc.src[2];
1078 n.src.resize(2);
1079 return fold_alu_op2(n);
1080 }
1081 }
1082
1083 if (n.bc.op == ALU_OP3_MULADD) {
1084 if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
1085 convert_to_mov(n, n.src[2], n.bc.src[2].neg, n.bc.src[2].abs);
1086 return fold_alu_op1(n);
1087 }
1088 }
1089
1090 if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
1091 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1092 ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1093
1094 if (isc1 && v0 == v2) {
1095 cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
1096 n.src[1] = sh.get_const_value(cv1);
1097 n.bc.src[1].neg = 0;
1098 n.bc.src[1].abs = 0;
1099 n.bc.set_op(op);
1100 n.src.resize(2);
1101 return fold_alu_op2(n);
1102 } else if (isc0 && v1 == v2) {
1103 cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
1104 n.src[0] = sh.get_const_value(cv0);
1105 n.bc.src[0].neg = 0;
1106 n.bc.src[0].abs = 0;
1107 n.bc.set_op(op);
1108 n.src.resize(2);
1109 return fold_alu_op2(n);
1110 }
1111 }
1112
1113 return false;
1114 }
1115
1116 apply_alu_dst_mod(n.bc, dv);
1117 assign_source(n.dst[0], get_const(dv));
1118 return true;
1119 }
1120
1121 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
1122 unsigned ncc = 0;
1123
1124 switch (cc) {
1125 case AF_CC_E: ncc = AF_CC_NE; break;
1126 case AF_CC_NE: ncc = AF_CC_E; break;
1127 case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
1128 case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
1129 default:
1130 assert(!"unexpected condition code");
1131 break;
1132 }
1133 return ncc;
1134 }
1135
1136 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
1137
1138 if (int_dst && cmp_type == AF_FLOAT_CMP) {
1139 switch (cc) {
1140 case AF_CC_E: return ALU_OP2_SETE_DX10;
1141 case AF_CC_NE: return ALU_OP2_SETNE_DX10;
1142 case AF_CC_GT: return ALU_OP2_SETGT_DX10;
1143 case AF_CC_GE: return ALU_OP2_SETGE_DX10;
1144 }
1145 } else {
1146
1147 switch(cmp_type) {
1148 case AF_FLOAT_CMP: {
1149 switch (cc) {
1150 case AF_CC_E: return ALU_OP2_SETE;
1151 case AF_CC_NE: return ALU_OP2_SETNE;
1152 case AF_CC_GT: return ALU_OP2_SETGT;
1153 case AF_CC_GE: return ALU_OP2_SETGE;
1154 }
1155 break;
1156 }
1157 case AF_INT_CMP: {
1158 switch (cc) {
1159 case AF_CC_E: return ALU_OP2_SETE_INT;
1160 case AF_CC_NE: return ALU_OP2_SETNE_INT;
1161 case AF_CC_GT: return ALU_OP2_SETGT_INT;
1162 case AF_CC_GE: return ALU_OP2_SETGE_INT;
1163 }
1164 break;
1165 }
1166 case AF_UINT_CMP: {
1167 switch (cc) {
1168 case AF_CC_E: return ALU_OP2_SETE_INT;
1169 case AF_CC_NE: return ALU_OP2_SETNE_INT;
1170 case AF_CC_GT: return ALU_OP2_SETGT_UINT;
1171 case AF_CC_GE: return ALU_OP2_SETGE_UINT;
1172 }
1173 break;
1174 }
1175 }
1176 }
1177
1178 assert(!"unexpected cc&cmp_type combination");
1179 return ~0u;
1180 }
1181
1182 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
1183
1184 switch(cmp_type) {
1185 case AF_FLOAT_CMP: {
1186 switch (cc) {
1187 case AF_CC_E: return ALU_OP2_PRED_SETE;
1188 case AF_CC_NE: return ALU_OP2_PRED_SETNE;
1189 case AF_CC_GT: return ALU_OP2_PRED_SETGT;
1190 case AF_CC_GE: return ALU_OP2_PRED_SETGE;
1191 }
1192 break;
1193 }
1194 case AF_INT_CMP: {
1195 switch (cc) {
1196 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1197 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1198 case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
1199 case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
1200 }
1201 break;
1202 }
1203 case AF_UINT_CMP: {
1204 switch (cc) {
1205 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1206 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1207 case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
1208 case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
1209 }
1210 break;
1211 }
1212 }
1213
1214 assert(!"unexpected cc&cmp_type combination");
1215 return ~0u;
1216 }
1217
1218 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
1219
1220 switch(cmp_type) {
1221 case AF_FLOAT_CMP: {
1222 switch (cc) {
1223 case AF_CC_E: return ALU_OP2_KILLE;
1224 case AF_CC_NE: return ALU_OP2_KILLNE;
1225 case AF_CC_GT: return ALU_OP2_KILLGT;
1226 case AF_CC_GE: return ALU_OP2_KILLGE;
1227 }
1228 break;
1229 }
1230 case AF_INT_CMP: {
1231 switch (cc) {
1232 case AF_CC_E: return ALU_OP2_KILLE_INT;
1233 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1234 case AF_CC_GT: return ALU_OP2_KILLGT_INT;
1235 case AF_CC_GE: return ALU_OP2_KILLGE_INT;
1236 }
1237 break;
1238 }
1239 case AF_UINT_CMP: {
1240 switch (cc) {
1241 case AF_CC_E: return ALU_OP2_KILLE_INT;
1242 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1243 case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
1244 case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
1245 }
1246 break;
1247 }
1248 }
1249
1250 assert(!"unexpected cc&cmp_type combination");
1251 return ~0u;
1252 }
1253
1254 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
1255
1256 switch(cmp_type) {
1257 case AF_FLOAT_CMP: {
1258 switch (cc) {
1259 case AF_CC_E: return ALU_OP3_CNDE;
1260 case AF_CC_GT: return ALU_OP3_CNDGT;
1261 case AF_CC_GE: return ALU_OP3_CNDGE;
1262 }
1263 break;
1264 }
1265 case AF_INT_CMP: {
1266 switch (cc) {
1267 case AF_CC_E: return ALU_OP3_CNDE_INT;
1268 case AF_CC_GT: return ALU_OP3_CNDGT_INT;
1269 case AF_CC_GE: return ALU_OP3_CNDGE_INT;
1270 }
1271 break;
1272 }
1273 }
1274
1275 assert(!"unexpected cc&cmp_type combination");
1276 return ~0u;
1277 }
1278
1279
1280 void convert_predset_to_set(shader& sh, alu_node* a) {
1281
1282 unsigned flags = a->bc.op_ptr->flags;
1283 unsigned cc = flags & AF_CC_MASK;
1284 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
1285
1286 bool swap_args = false;
1287
1288 cc = invert_setcc_condition(cc, swap_args);
1289
1290 unsigned newop = get_setcc_op(cc, cmp_type, true);
1291
1292 a->dst.resize(1);
1293 a->bc.set_op(newop);
1294
1295 if (swap_args) {
1296 std::swap(a->src[0], a->src[1]);
1297 std::swap(a->bc.src[0], a->bc.src[1]);
1298 }
1299
1300 a->bc.update_exec_mask = 0;
1301 a->bc.update_pred = 0;
1302 }
1303
1304 } // namespace r600_sb