r600g/sb: handle more cases for folding in gvn pass
[mesa.git] / src / gallium / drivers / r600 / sb / sb_expr.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include <cmath>
28
29 #include "sb_shader.h"
30
31 namespace r600_sb {
32
33 value* get_select_value_for_em(shader& sh, value* em) {
34 if (!em->def)
35 return NULL;
36
37 node *predset = em->def;
38 if (!predset->is_pred_set())
39 return NULL;
40
41 alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42 convert_predset_to_set(sh, s);
43
44 predset->insert_after(s);
45
46 value* &d0 = s->dst[0];
47 d0 = sh.create_temp_value();
48 d0->def = s;
49 return d0;
50 }
51
52 void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53 n.src.resize(1);
54 n.src[0] = src;
55 n.bc.src[0].abs = abs;
56 n.bc.src[0].neg = neg;
57 n.bc.set_op(ALU_OP1_MOV);
58 }
59
60 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61
62 value * expr_handler::get_const(const literal &l) {
63 value *v = sh.get_const_value(l);
64 if (!v->gvn_source)
65 vt.add_value(v);
66 return v;
67 }
68
69 void expr_handler::assign_source(value *dst, value *src) {
70 dst->gvn_source = src->gvn_source;
71 }
72
73 bool expr_handler::equal(value *l, value *r) {
74
75 assert(l != r);
76
77 if (l->gvalue() == r->gvalue())
78 return true;
79
80 if (l->def && r->def)
81 return defs_equal(l, r);
82
83 if (l->is_rel() && r->is_rel())
84 return ivars_equal(l, r);
85
86 return false;
87 }
88
89 bool expr_handler::ivars_equal(value* l, value* r) {
90 if (l->rel->gvalue() == r->rel->gvalue()
91 && l->select == r->select) {
92
93 vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
94 vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
95
96 // FIXME: replace this with more precise aliasing test
97 return lv == rv;
98 }
99 return false;
100 }
101
102 bool expr_handler::defs_equal(value* l, value* r) {
103
104 node *d1 = l->def;
105 node *d2 = r->def;
106
107 if (d1->type != d2->type || d1->subtype != d2->subtype)
108 return false;
109
110 if (d1->is_pred_set() || d2->is_pred_set())
111 return false;
112
113 if (d1->type == NT_OP) {
114 switch (d1->subtype) {
115 case NST_ALU_INST:
116 return ops_equal(
117 static_cast<alu_node*>(d1),
118 static_cast<alu_node*>(d2));
119 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
120 // static_cast<fetch_node*>(d2);
121 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
122 // static_cast<cf_node*>(d2);
123 default:
124 break;
125 }
126 }
127 return false;
128 }
129
130 bool expr_handler::try_fold(value* v) {
131 assert(!v->gvn_source);
132
133 if (v->def)
134 try_fold(v->def);
135
136 if (v->gvn_source)
137 return true;
138
139 return false;
140 }
141
142 bool expr_handler::try_fold(node* n) {
143 return n->fold_dispatch(this);
144 }
145
146 bool expr_handler::fold(node& n) {
147 if (n.subtype == NST_PHI) {
148
149 value *s = n.src[0];
150
151 // FIXME disabling phi folding for registers for now, otherwise we lose
152 // control flow information in some cases
153 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
154 // probably control flow transformation is required to enable it
155 if (s->is_sgpr())
156 return false;
157
158 for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
159 value *v = *I;
160 if (!s->v_equal(v))
161 return false;
162 }
163
164 assign_source(n.dst[0], s);
165 } else {
166 assert(n.subtype == NST_PSI);
167 assert(n.src.size() >= 6);
168
169 value *s = n.src[2];
170 assert(s->gvn_source);
171
172 for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
173 value *v = *(I+2);
174 if (!s->v_equal(v))
175 return false;
176 }
177 assign_source(n.dst[0], s);
178 }
179 return true;
180 }
181
182 bool expr_handler::fold(container_node& n) {
183 return false;
184 }
185
186 bool expr_handler::fold_setcc(alu_node &n) {
187
188 value* v0 = n.src[0]->gvalue();
189 value* v1 = n.src[1]->gvalue();
190
191 assert(v0 && v1 && n.dst[0]);
192
193 unsigned flags = n.bc.op_ptr->flags;
194 unsigned cc = flags & AF_CC_MASK;
195 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
196 unsigned dst_type = flags & AF_DST_TYPE_MASK;
197
198 bool cond_result;
199 bool have_result = false;
200
201 bool isc0 = v0->is_const();
202 bool isc1 = v1->is_const();
203
204 literal dv, cv0, cv1;
205
206 if (isc0) {
207 cv0 = v0->get_const_value();
208 apply_alu_src_mod(n.bc, 0, cv0);
209 }
210
211 if (isc1) {
212 cv1 = v1->get_const_value();
213 apply_alu_src_mod(n.bc, 1, cv1);
214 }
215
216 if (isc0 && isc1) {
217 cond_result = evaluate_condition(flags, cv0, cv1);
218 have_result = true;
219 } else if (isc1) {
220 if (cmp_type == AF_FLOAT_CMP) {
221 if (n.bc.src[0].abs && !n.bc.src[0].neg) {
222 if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
223 cond_result = true;
224 have_result = true;
225 } else if (cv1.f <= 0.0f && cc == AF_CC_GE) {
226 cond_result = true;
227 have_result = true;
228 }
229 } else if (n.bc.src[0].abs && n.bc.src[0].neg) {
230 if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
231 cond_result = false;
232 have_result = true;
233 } else if (cv1.f >= 0.0f && cc == AF_CC_GT) {
234 cond_result = false;
235 have_result = true;
236 }
237 }
238 } else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
239 cond_result = true;
240 have_result = true;
241 }
242 } else if (isc0) {
243 if (cmp_type == AF_FLOAT_CMP) {
244 if (n.bc.src[1].abs && !n.bc.src[1].neg) {
245 if (cv0.f <= 0.0f && cc == AF_CC_GT) {
246 cond_result = false;
247 have_result = true;
248 } else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
249 cond_result = false;
250 have_result = true;
251 }
252 } else if (n.bc.src[1].abs && n.bc.src[1].neg) {
253 if (cv0.f >= 0.0f && cc == AF_CC_GE) {
254 cond_result = true;
255 have_result = true;
256 } else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
257 cond_result = true;
258 have_result = true;
259 }
260 }
261 } else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
262 cond_result = false;
263 have_result = true;
264 }
265 } else if (v0 == v1) {
266 bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
267 if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
268 // NOTE can't handle float comparisons here because of NaNs
269 cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
270 have_result = true;
271 }
272 }
273
274 if (have_result) {
275 literal result;
276
277 if (cond_result)
278 result = dst_type != AF_FLOAT_DST ?
279 literal(0xFFFFFFFFu) : literal(1.0f);
280 else
281 result = literal(0);
282
283 convert_to_mov(n, sh.get_const_value(result));
284 return fold_alu_op1(n);
285 }
286
287 return false;
288 }
289
290 bool expr_handler::fold(alu_node& n) {
291
292 switch (n.bc.op_ptr->src_count) {
293 case 1: return fold_alu_op1(n);
294 case 2: return fold_alu_op2(n);
295 case 3: return fold_alu_op3(n);
296 default:
297 assert(0);
298 }
299 return false;
300 }
301
302 bool expr_handler::fold(fetch_node& n) {
303
304 unsigned chan = 0;
305 for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
306 value* &v = *I;
307 if (v) {
308 if (n.bc.dst_sel[chan] == SEL_0)
309 assign_source(*I, get_const(0.0f));
310 else if (n.bc.dst_sel[chan] == SEL_1)
311 assign_source(*I, get_const(1.0f));
312 }
313 ++chan;
314 }
315 return false;
316 }
317
318 bool expr_handler::fold(cf_node& n) {
319 return false;
320 }
321
322 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
323 literal &v) {
324 const bc_alu_src &s = bc.src[src];
325
326 if (s.abs)
327 v = fabs(v.f);
328 if (s.neg)
329 v = -v.f;
330 }
331
332 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
333 float omod_coeff[] = {2.0f, 4.0, 0.5f};
334
335 if (bc.omod)
336 v = v.f * omod_coeff[bc.omod - 1];
337 if (bc.clamp)
338 v = float_clamp(v.f);
339 }
340
341 bool expr_handler::args_equal(const vvec &l, const vvec &r) {
342
343 assert(l.size() == r.size());
344
345 int s = l.size();
346
347 for (int k = 0; k < s; ++k) {
348 if (!l[k]->v_equal(r[k]))
349 return false;
350 }
351
352 return true;
353 }
354
355 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
356 const bc_alu &b0 = l->bc;
357 const bc_alu &b1 = r->bc;
358
359 if (b0.op != b1.op)
360 return false;
361
362 unsigned src_count = b0.op_ptr->src_count;
363
364 if (b0.index_mode != b1.index_mode)
365 return false;
366
367 if (b0.clamp != b1.clamp || b0.omod != b1.omod)
368 return false;
369
370 for (unsigned s = 0; s < src_count; ++s) {
371 const bc_alu_src &s0 = b0.src[s];
372 const bc_alu_src &s1 = b1.src[s];
373
374 if (s0.abs != s1.abs || s0.neg != s1.neg)
375 return false;
376 }
377 return args_equal(l->src, r->src);
378 }
379
380 bool expr_handler::fold_alu_op1(alu_node& n) {
381
382 assert(!n.src.empty());
383 if (n.src.empty())
384 return false;
385
386 value* v0 = n.src[0]->gvalue();
387
388 assert(v0 && n.dst[0]);
389
390 if (!v0->is_const()) {
391 if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
392 n.bc.op == ALU_OP1_MOVA_GPR_INT)
393 && n.bc.clamp == 0 && n.bc.omod == 0
394 && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0) {
395 assign_source(n.dst[0], v0);
396 return true;
397 }
398 return false;
399 }
400
401 literal dv, cv = v0->get_const_value();
402 apply_alu_src_mod(n.bc, 0, cv);
403
404 switch (n.bc.op) {
405 case ALU_OP1_CEIL: dv = ceil(cv.f); break;
406 case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
407 case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
408 case ALU_OP1_FLOOR: dv = floor(cv.f); break;
409 case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
410 case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
411 case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
412 case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
413 case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
414 case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
415 case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
416 case ALU_OP1_LOG_CLAMPED:
417 case ALU_OP1_LOG_IEEE:
418 if (cv.f != 0.0f)
419 dv = log2(cv.f);
420 else
421 // don't fold to NAN, let the GPU handle it for now
422 // (prevents degenerate LIT tests from failing)
423 return false;
424 break;
425 case ALU_OP1_MOV: dv = cv; break;
426 case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
427 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
428 // case ALU_OP1_MOVA_GPR_INT:
429 case ALU_OP1_NOT_INT: dv = ~cv.i; break;
430 case ALU_OP1_PRED_SET_INV:
431 dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
432 case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
433 case ALU_OP1_RECIPSQRT_CLAMPED:
434 case ALU_OP1_RECIPSQRT_FF:
435 case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
436 case ALU_OP1_RECIP_CLAMPED:
437 case ALU_OP1_RECIP_FF:
438 case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
439 // case ALU_OP1_RECIP_INT:
440 case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
441 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
442 case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
443 case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
444 case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
445
446 default:
447 return false;
448 }
449
450 apply_alu_dst_mod(n.bc, dv);
451 assign_source(n.dst[0], get_const(dv));
452 return true;
453 }
454
455
456 bool expr_handler::fold_alu_op2(alu_node& n) {
457
458 if (n.src.size() < 2)
459 return false;
460
461 unsigned flags = n.bc.op_ptr->flags;
462
463 if (flags & AF_SET) {
464 return fold_setcc(n);
465 }
466
467 value* v0 = n.src[0]->gvalue();
468 value* v1 = n.src[1]->gvalue();
469
470 assert(v0 && v1 && n.dst[0]);
471
472 bool isc0 = v0->is_const();
473 bool isc1 = v1->is_const();
474
475 if (!isc0 && !isc1)
476 return false;
477
478 literal dv, cv0, cv1;
479
480 if (isc0) {
481 cv0 = v0->get_const_value();
482 apply_alu_src_mod(n.bc, 0, cv0);
483 }
484
485 if (isc1) {
486 cv1 = v1->get_const_value();
487 apply_alu_src_mod(n.bc, 1, cv1);
488 }
489
490 if (isc0 && isc1) {
491 switch (n.bc.op) {
492 case ALU_OP2_ADD: dv = cv0.f + cv1.f; break;
493 case ALU_OP2_ADDC_UINT:
494 dv = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
495 case ALU_OP2_ADD_INT: dv = cv0.i + cv1.i; break;
496 case ALU_OP2_AND_INT: dv = cv0.i & cv1.i; break;
497 case ALU_OP2_ASHR_INT: dv = cv0.i >> (cv1.i & 0x1F); break;
498 case ALU_OP2_BFM_INT:
499 dv = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
500 case ALU_OP2_LSHL_INT: dv = cv0.i << cv1.i; break;
501 case ALU_OP2_LSHR_INT: dv = cv0.u >> cv1.u; break;
502 case ALU_OP2_MAX:
503 case ALU_OP2_MAX_DX10: dv = cv0.f > cv1.f ? cv0.f : cv1.f; break;
504 case ALU_OP2_MAX_INT: dv = cv0.i > cv1.i ? cv0.i : cv1.i; break;
505 case ALU_OP2_MAX_UINT: dv = cv0.u > cv1.u ? cv0.u : cv1.u; break;
506 case ALU_OP2_MIN:
507 case ALU_OP2_MIN_DX10: dv = cv0.f < cv1.f ? cv0.f : cv1.f; break;
508 case ALU_OP2_MIN_INT: dv = cv0.i < cv1.i ? cv0.i : cv1.i; break;
509 case ALU_OP2_MIN_UINT: dv = cv0.u < cv1.u ? cv0.u : cv1.u; break;
510 case ALU_OP2_MUL:
511 case ALU_OP2_MUL_IEEE: dv = cv0.f * cv1.f; break;
512 case ALU_OP2_MULHI_INT:
513 dv = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
514 case ALU_OP2_MULHI_UINT:
515 dv = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
516 case ALU_OP2_MULLO_INT:
517 dv = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
518 case ALU_OP2_MULLO_UINT:
519 dv = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
520 case ALU_OP2_OR_INT: dv = cv0.i | cv1.i; break;
521 case ALU_OP2_SUB_INT: dv = cv0.i - cv1.i; break;
522 case ALU_OP2_XOR_INT: dv = cv0.i ^ cv1.i; break;
523
524 default:
525 return false;
526 }
527
528 } else { // one source is const
529
530 if (isc0 && cv0 == literal(0)) {
531 switch (n.bc.op) {
532 case ALU_OP2_ADD:
533 case ALU_OP2_ADD_INT:
534 case ALU_OP2_MAX_UINT:
535 case ALU_OP2_OR_INT:
536 case ALU_OP2_XOR_INT:
537 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
538 return fold_alu_op1(n);
539 case ALU_OP2_AND_INT:
540 case ALU_OP2_ASHR_INT:
541 case ALU_OP2_LSHL_INT:
542 case ALU_OP2_LSHR_INT:
543 case ALU_OP2_MIN_UINT:
544 case ALU_OP2_MUL:
545 case ALU_OP2_MULHI_UINT:
546 case ALU_OP2_MULLO_UINT:
547 convert_to_mov(n, sh.get_const_value(literal(0)));
548 return fold_alu_op1(n);
549 }
550 } else if (isc1 && cv1 == literal(0)) {
551 switch (n.bc.op) {
552 case ALU_OP2_ADD:
553 case ALU_OP2_ADD_INT:
554 case ALU_OP2_ASHR_INT:
555 case ALU_OP2_LSHL_INT:
556 case ALU_OP2_LSHR_INT:
557 case ALU_OP2_MAX_UINT:
558 case ALU_OP2_OR_INT:
559 case ALU_OP2_SUB_INT:
560 case ALU_OP2_XOR_INT:
561 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
562 return fold_alu_op1(n);
563 case ALU_OP2_AND_INT:
564 case ALU_OP2_MIN_UINT:
565 case ALU_OP2_MUL:
566 case ALU_OP2_MULHI_UINT:
567 case ALU_OP2_MULLO_UINT:
568 convert_to_mov(n, sh.get_const_value(literal(0)));
569 return fold_alu_op1(n);
570 }
571 } else if (isc0 && cv0 == literal(1.0f)) {
572 switch (n.bc.op) {
573 case ALU_OP2_MUL:
574 case ALU_OP2_MUL_IEEE:
575 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs);
576 return fold_alu_op1(n);
577 }
578 } else if (isc1 && cv1 == literal(1.0f)) {
579 switch (n.bc.op) {
580 case ALU_OP2_MUL:
581 case ALU_OP2_MUL_IEEE:
582 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs);
583 return fold_alu_op1(n);
584 }
585 }
586
587 return false;
588 }
589
590 apply_alu_dst_mod(n.bc, dv);
591 assign_source(n.dst[0], get_const(dv));
592 return true;
593 }
594
595 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
596 literal s1, literal s2) {
597
598 unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
599 unsigned cc = alu_cnd_flags & AF_CC_MASK;
600
601 switch (cmp_type) {
602 case AF_FLOAT_CMP: {
603 switch (cc) {
604 case AF_CC_E : return s1.f == s2.f;
605 case AF_CC_GT: return s1.f > s2.f;
606 case AF_CC_GE: return s1.f >= s2.f;
607 case AF_CC_NE: return s1.f != s2.f;
608 case AF_CC_LT: return s1.f < s2.f;
609 case AF_CC_LE: return s1.f <= s2.f;
610 default:
611 assert(!"invalid condition code");
612 return false;
613 }
614 }
615 case AF_INT_CMP: {
616 switch (cc) {
617 case AF_CC_E : return s1.i == s2.i;
618 case AF_CC_GT: return s1.i > s2.i;
619 case AF_CC_GE: return s1.i >= s2.i;
620 case AF_CC_NE: return s1.i != s2.i;
621 case AF_CC_LT: return s1.i < s2.i;
622 case AF_CC_LE: return s1.i <= s2.i;
623 default:
624 assert(!"invalid condition code");
625 return false;
626 }
627 }
628 case AF_UINT_CMP: {
629 switch (cc) {
630 case AF_CC_E : return s1.u == s2.u;
631 case AF_CC_GT: return s1.u > s2.u;
632 case AF_CC_GE: return s1.u >= s2.u;
633 case AF_CC_NE: return s1.u != s2.u;
634 case AF_CC_LT: return s1.u < s2.u;
635 case AF_CC_LE: return s1.u <= s2.u;
636 default:
637 assert(!"invalid condition code");
638 return false;
639 }
640 }
641 default:
642 assert(!"invalid cmp_type");
643 return false;
644 }
645 }
646
647 bool expr_handler::fold_alu_op3(alu_node& n) {
648
649 if (n.src.size() < 3)
650 return false;
651
652 value* v0 = n.src[0]->gvalue();
653 value* v1 = n.src[1]->gvalue();
654 value* v2 = n.src[2]->gvalue();
655
656 assert(v0 && v1 && v2 && n.dst[0]);
657
658 bool isc0 = v0->is_const();
659 bool isc1 = v1->is_const();
660 bool isc2 = v2->is_const();
661
662 literal dv, cv0, cv1, cv2;
663
664 if (isc0) {
665 cv0 = v0->get_const_value();
666 apply_alu_src_mod(n.bc, 0, cv0);
667 }
668
669 if (isc1) {
670 cv1 = v1->get_const_value();
671 apply_alu_src_mod(n.bc, 1, cv1);
672 }
673
674 if (isc2) {
675 cv2 = v2->get_const_value();
676 apply_alu_src_mod(n.bc, 2, cv2);
677 }
678
679 unsigned flags = n.bc.op_ptr->flags;
680
681 if (flags & AF_CMOV) {
682 int src = 0;
683
684 if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
685 // result doesn't depend on condition, convert to MOV
686 src = 1;
687 } else if (isc0) {
688 // src0 is const, condition can be evaluated, convert to MOV
689 bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
690 AF_CMP_TYPE_MASK), cv0, literal(0));
691 src = cond ? 1 : 2;
692 }
693
694 if (src) {
695 // if src is selected, convert to MOV
696 convert_to_mov(n, n.src[src], n.bc.src[src].neg);
697 return fold_alu_op1(n);
698 }
699 }
700
701 if (!isc0 && !isc1 && !isc2)
702 return false;
703
704 if (isc0 && isc1 && isc2) {
705 switch (n.bc.op) {
706 case ALU_OP3_MULADD_IEEE:
707 case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
708
709 // TODO
710
711 default:
712 return false;
713 }
714 } else {
715 if (isc0 && isc1) {
716 switch (n.bc.op) {
717 case ALU_OP3_MULADD:
718 case ALU_OP3_MULADD_IEEE:
719 dv = cv0.f * cv1.f;
720 n.bc.set_op(ALU_OP2_ADD);
721 n.src[0] = sh.get_const_value(dv);
722 memset(&n.bc.src[0], 0, sizeof(bc_alu_src));
723 n.src[1] = n.src[2];
724 n.bc.src[1] = n.bc.src[2];
725 n.src.resize(2);
726 return fold_alu_op2(n);
727 }
728 }
729
730 if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
731 switch (n.bc.op) {
732 case ALU_OP3_MULADD:
733 convert_to_mov(n, n.src[2], n.bc.src[2].neg, n.bc.src[2].abs);
734 return fold_alu_op1(n);
735 }
736 }
737 return false;
738 }
739
740 apply_alu_dst_mod(n.bc, dv);
741 assign_source(n.dst[0], get_const(dv));
742 return true;
743 }
744
745 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
746 unsigned ncc = 0;
747
748 switch (cc) {
749 case AF_CC_E: ncc = AF_CC_NE; break;
750 case AF_CC_NE: ncc = AF_CC_E; break;
751 case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
752 case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
753 default:
754 assert(!"unexpected condition code");
755 break;
756 }
757 return ncc;
758 }
759
760 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
761
762 if (int_dst && cmp_type == AF_FLOAT_CMP) {
763 switch (cc) {
764 case AF_CC_E: return ALU_OP2_SETE_DX10;
765 case AF_CC_NE: return ALU_OP2_SETNE_DX10;
766 case AF_CC_GT: return ALU_OP2_SETGT_DX10;
767 case AF_CC_GE: return ALU_OP2_SETGE_DX10;
768 }
769 } else {
770
771 switch(cmp_type) {
772 case AF_FLOAT_CMP: {
773 switch (cc) {
774 case AF_CC_E: return ALU_OP2_SETE;
775 case AF_CC_NE: return ALU_OP2_SETNE;
776 case AF_CC_GT: return ALU_OP2_SETGT;
777 case AF_CC_GE: return ALU_OP2_SETGE;
778 }
779 break;
780 }
781 case AF_INT_CMP: {
782 switch (cc) {
783 case AF_CC_E: return ALU_OP2_SETE_INT;
784 case AF_CC_NE: return ALU_OP2_SETNE_INT;
785 case AF_CC_GT: return ALU_OP2_SETGT_INT;
786 case AF_CC_GE: return ALU_OP2_SETGE_INT;
787 }
788 break;
789 }
790 case AF_UINT_CMP: {
791 switch (cc) {
792 case AF_CC_E: return ALU_OP2_SETE_INT;
793 case AF_CC_NE: return ALU_OP2_SETNE_INT;
794 case AF_CC_GT: return ALU_OP2_SETGT_UINT;
795 case AF_CC_GE: return ALU_OP2_SETGE_UINT;
796 }
797 break;
798 }
799 }
800 }
801
802 assert(!"unexpected cc&cmp_type combination");
803 return ~0u;
804 }
805
806 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
807
808 switch(cmp_type) {
809 case AF_FLOAT_CMP: {
810 switch (cc) {
811 case AF_CC_E: return ALU_OP2_PRED_SETE;
812 case AF_CC_NE: return ALU_OP2_PRED_SETNE;
813 case AF_CC_GT: return ALU_OP2_PRED_SETGT;
814 case AF_CC_GE: return ALU_OP2_PRED_SETGE;
815 }
816 break;
817 }
818 case AF_INT_CMP: {
819 switch (cc) {
820 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
821 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
822 case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
823 case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
824 }
825 break;
826 }
827 case AF_UINT_CMP: {
828 switch (cc) {
829 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
830 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
831 case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
832 case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
833 }
834 break;
835 }
836 }
837
838 assert(!"unexpected cc&cmp_type combination");
839 return ~0u;
840 }
841
842 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
843
844 switch(cmp_type) {
845 case AF_FLOAT_CMP: {
846 switch (cc) {
847 case AF_CC_E: return ALU_OP2_KILLE;
848 case AF_CC_NE: return ALU_OP2_KILLNE;
849 case AF_CC_GT: return ALU_OP2_KILLGT;
850 case AF_CC_GE: return ALU_OP2_KILLGE;
851 }
852 break;
853 }
854 case AF_INT_CMP: {
855 switch (cc) {
856 case AF_CC_E: return ALU_OP2_KILLE_INT;
857 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
858 case AF_CC_GT: return ALU_OP2_KILLGT_INT;
859 case AF_CC_GE: return ALU_OP2_KILLGE_INT;
860 }
861 break;
862 }
863 case AF_UINT_CMP: {
864 switch (cc) {
865 case AF_CC_E: return ALU_OP2_KILLE_INT;
866 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
867 case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
868 case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
869 }
870 break;
871 }
872 }
873
874 assert(!"unexpected cc&cmp_type combination");
875 return ~0u;
876 }
877
878 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
879
880 switch(cmp_type) {
881 case AF_FLOAT_CMP: {
882 switch (cc) {
883 case AF_CC_E: return ALU_OP3_CNDE;
884 case AF_CC_GT: return ALU_OP3_CNDGT;
885 case AF_CC_GE: return ALU_OP3_CNDGE;
886 }
887 break;
888 }
889 case AF_INT_CMP: {
890 switch (cc) {
891 case AF_CC_E: return ALU_OP3_CNDE_INT;
892 case AF_CC_GT: return ALU_OP3_CNDGT_INT;
893 case AF_CC_GE: return ALU_OP3_CNDGE_INT;
894 }
895 break;
896 }
897 }
898
899 assert(!"unexpected cc&cmp_type combination");
900 return ~0u;
901 }
902
903
904 void convert_predset_to_set(shader& sh, alu_node* a) {
905
906 unsigned flags = a->bc.op_ptr->flags;
907 unsigned cc = flags & AF_CC_MASK;
908 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
909
910 bool swap_args = false;
911
912 cc = invert_setcc_condition(cc, swap_args);
913
914 unsigned newop = get_setcc_op(cc, cmp_type, true);
915
916 a->dst.resize(1);
917 a->bc.set_op(newop);
918
919 if (swap_args) {
920 std::swap(a->src[0], a->src[1]);
921 std::swap(a->bc.src[0], a->bc.src[1]);
922 }
923
924 a->bc.update_exec_mask = 0;
925 a->bc.update_pred = 0;
926 }
927
928 } // namespace r600_sb