r600g/sb: optimize CNDcc instructions
[mesa.git] / src / gallium / drivers / r600 / sb / sb_expr.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include <cmath>
28
29 #include "sb_shader.h"
30
31 namespace r600_sb {
32
33 value* get_select_value_for_em(shader& sh, value* em) {
34 if (!em->def)
35 return NULL;
36
37 node *predset = em->def;
38 if (!predset->is_pred_set())
39 return NULL;
40
41 alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42 convert_predset_to_set(sh, s);
43
44 predset->insert_after(s);
45
46 value* &d0 = s->dst[0];
47 d0 = sh.create_temp_value();
48 d0->def = s;
49 return d0;
50 }
51
52 expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
53
54 value * expr_handler::get_const(const literal &l) {
55 value *v = sh.get_const_value(l);
56 if (!v->gvn_source)
57 vt.add_value(v);
58 return v;
59 }
60
61 void expr_handler::assign_source(value *dst, value *src) {
62 dst->gvn_source = src->gvn_source;
63 }
64
65 bool expr_handler::equal(value *l, value *r) {
66
67 assert(l != r);
68
69 if (l->gvalue() == r->gvalue())
70 return true;
71
72 if (l->def && r->def)
73 return defs_equal(l, r);
74
75 if (l->is_rel() && r->is_rel())
76 return ivars_equal(l, r);
77
78 return false;
79 }
80
81 bool expr_handler::ivars_equal(value* l, value* r) {
82 if (l->rel->gvalue() == r->rel->gvalue()
83 && l->select == r->select) {
84
85 vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
86 vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
87
88 // FIXME: replace this with more precise aliasing test
89 return lv == rv;
90 }
91 return false;
92 }
93
94 bool expr_handler::defs_equal(value* l, value* r) {
95
96 node *d1 = l->def;
97 node *d2 = r->def;
98
99 if (d1->type != d2->type || d1->subtype != d2->subtype)
100 return false;
101
102 if (d1->is_pred_set() || d2->is_pred_set())
103 return false;
104
105 if (d1->type == NT_OP) {
106 switch (d1->subtype) {
107 case NST_ALU_INST:
108 return ops_equal(
109 static_cast<alu_node*>(d1),
110 static_cast<alu_node*>(d2));
111 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
112 // static_cast<fetch_node*>(d2);
113 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
114 // static_cast<cf_node*>(d2);
115 default:
116 break;
117 }
118 }
119 return false;
120 }
121
122 bool expr_handler::try_fold(value* v) {
123 assert(!v->gvn_source);
124
125 if (v->def)
126 try_fold(v->def);
127
128 if (v->gvn_source)
129 return true;
130
131 return false;
132 }
133
134 bool expr_handler::try_fold(node* n) {
135 return n->fold_dispatch(this);
136 }
137
138 bool expr_handler::fold(node& n) {
139 if (n.subtype == NST_PHI) {
140
141 value *s = n.src[0];
142
143 // FIXME disabling phi folding for registers for now, otherwise we lose
144 // control flow information in some cases
145 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
146 // probably control flow transformation is required to enable it
147 if (s->is_sgpr())
148 return false;
149
150 for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
151 value *v = *I;
152 if (!s->v_equal(v))
153 return false;
154 }
155
156 assign_source(n.dst[0], s);
157 } else {
158 assert(n.subtype == NST_PSI);
159 assert(n.src.size() >= 6);
160
161 value *s = n.src[2];
162 assert(s->gvn_source);
163
164 for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
165 value *v = *(I+2);
166 if (!s->v_equal(v))
167 return false;
168 }
169 assign_source(n.dst[0], s);
170 }
171 return true;
172 }
173
174 bool expr_handler::fold(container_node& n) {
175 return false;
176 }
177
178 bool expr_handler::fold_setcc(alu_node &n) {
179
180 // TODO
181
182 return false;
183 }
184
185 bool expr_handler::fold(alu_node& n) {
186
187 if (n.bc.op_ptr->flags & (AF_PRED | AF_KILL)) {
188 fold_setcc(n);
189 return false;
190 }
191
192 switch (n.bc.op_ptr->src_count) {
193 case 1: return fold_alu_op1(n);
194 case 2: return fold_alu_op2(n);
195 case 3: return fold_alu_op3(n);
196 default:
197 assert(0);
198 }
199 return false;
200 }
201
202 bool expr_handler::fold(fetch_node& n) {
203
204 unsigned chan = 0;
205 for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
206 value* &v = *I;
207 if (v) {
208 if (n.bc.dst_sel[chan] == SEL_0)
209 assign_source(*I, get_const(0.0f));
210 else if (n.bc.dst_sel[chan] == SEL_1)
211 assign_source(*I, get_const(1.0f));
212 }
213 ++chan;
214 }
215 return false;
216 }
217
218 bool expr_handler::fold(cf_node& n) {
219 return false;
220 }
221
222 void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
223 literal &v) {
224 const bc_alu_src &s = bc.src[src];
225
226 if (s.abs)
227 v = fabs(v.f);
228 if (s.neg)
229 v = -v.f;
230 }
231
232 void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
233 float omod_coeff[] = {2.0f, 4.0, 0.5f};
234
235 if (bc.omod)
236 v = v.f * omod_coeff[bc.omod - 1];
237 if (bc.clamp)
238 v = float_clamp(v.f);
239 }
240
241 bool expr_handler::args_equal(const vvec &l, const vvec &r) {
242
243 assert(l.size() == r.size());
244
245 int s = l.size();
246
247 for (int k = 0; k < s; ++k) {
248 if (!l[k]->v_equal(r[k]))
249 return false;
250 }
251
252 return true;
253 }
254
255 bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
256 const bc_alu &b0 = l->bc;
257 const bc_alu &b1 = r->bc;
258
259 if (b0.op != b1.op)
260 return false;
261
262 unsigned src_count = b0.op_ptr->src_count;
263
264 if (b0.index_mode != b1.index_mode)
265 return false;
266
267 if (b0.clamp != b1.clamp || b0.omod != b1.omod)
268 return false;
269
270 for (unsigned s = 0; s < src_count; ++s) {
271 const bc_alu_src &s0 = b0.src[s];
272 const bc_alu_src &s1 = b1.src[s];
273
274 if (s0.abs != s1.abs || s0.neg != s1.neg)
275 return false;
276 }
277 return args_equal(l->src, r->src);
278 }
279
280 bool expr_handler::fold_alu_op1(alu_node& n) {
281
282 assert(!n.src.empty());
283 if (n.src.empty())
284 return false;
285
286 value* v0 = n.src[0];
287
288 assert(v0 && n.dst[0]);
289
290 if (!v0->is_const()) {
291 if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
292 n.bc.op == ALU_OP1_MOVA_GPR_INT)
293 && n.bc.clamp == 0 && n.bc.omod == 0
294 && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0) {
295 assign_source(n.dst[0], v0);
296 return true;
297 }
298 return false;
299 }
300
301 literal dv, cv = v0->get_const_value();
302 apply_alu_src_mod(n.bc, 0, cv);
303
304 switch (n.bc.op) {
305 case ALU_OP1_CEIL: dv = ceil(cv.f); break;
306 case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
307 case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
308 case ALU_OP1_FLOOR: dv = floor(cv.f); break;
309 case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
310 case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
311 case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
312 case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
313 case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
314 case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
315 case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
316 case ALU_OP1_LOG_CLAMPED:
317 case ALU_OP1_LOG_IEEE:
318 if (cv.f != 0.0f)
319 dv = log2(cv.f);
320 else
321 // don't fold to NAN, let the GPU handle it for now
322 // (prevents degenerate LIT tests from failing)
323 return false;
324 break;
325 case ALU_OP1_MOV: dv = cv; break;
326 case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
327 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
328 // case ALU_OP1_MOVA_GPR_INT:
329 case ALU_OP1_NOT_INT: dv = ~cv.i; break;
330 case ALU_OP1_PRED_SET_INV:
331 dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
332 case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
333 case ALU_OP1_RECIPSQRT_CLAMPED:
334 case ALU_OP1_RECIPSQRT_FF:
335 case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
336 case ALU_OP1_RECIP_CLAMPED:
337 case ALU_OP1_RECIP_FF:
338 case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
339 // case ALU_OP1_RECIP_INT:
340 // case ALU_OP1_RECIP_UINT:
341 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
342 case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
343 case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
344 case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
345
346 default:
347 return false;
348 }
349
350 apply_alu_dst_mod(n.bc, dv);
351 assign_source(n.dst[0], get_const(dv));
352 return true;
353 }
354
355 bool expr_handler::fold_alu_op2(alu_node& n) {
356
357 if (n.src.size() < 2)
358 return false;
359
360 value* v0 = n.src[0];
361 value* v1 = n.src[1];
362
363 assert(v0 && v1 && n.dst[0]);
364
365 bool isc0 = v0->is_const();
366 bool isc1 = v1->is_const();
367
368 if (!isc0 && !isc1)
369 return false;
370
371 literal dv, cv0, cv1;
372
373 if (isc0) {
374 cv0 = v0->get_const_value();
375 apply_alu_src_mod(n.bc, 0, cv0);
376 }
377
378 if (isc1) {
379 cv1 = v1->get_const_value();
380 apply_alu_src_mod(n.bc, 1, cv1);
381 }
382
383 if (isc0 && isc1) {
384 switch (n.bc.op) {
385 case ALU_OP2_ADD: dv = cv0.f + cv1.f; break;
386 case ALU_OP2_ADDC_UINT:
387 dv = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
388 case ALU_OP2_ADD_INT: dv = cv0.i + cv1.i; break;
389 case ALU_OP2_AND_INT: dv = cv0.i & cv1.i; break;
390 case ALU_OP2_ASHR_INT: dv = cv0.i >> (cv1.i & 0x1F); break;
391 case ALU_OP2_BFM_INT:
392 dv = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
393 case ALU_OP2_LSHL_INT: dv = cv0.i << cv1.i; break;
394 case ALU_OP2_LSHR_INT: dv = cv0.u >> cv1.u; break;
395 case ALU_OP2_MAX:
396 case ALU_OP2_MAX_DX10: dv = cv0.f > cv1.f ? cv0.f : cv1.f; break;
397 case ALU_OP2_MAX_INT: dv = cv0.i > cv1.i ? cv0.i : cv1.i; break;
398 case ALU_OP2_MAX_UINT: dv = cv0.u > cv1.u ? cv0.u : cv1.u; break;
399 case ALU_OP2_MIN:
400 case ALU_OP2_MIN_DX10: dv = cv0.f < cv1.f ? cv0.f : cv1.f; break;
401 case ALU_OP2_MIN_INT: dv = cv0.i < cv1.i ? cv0.i : cv1.i; break;
402 case ALU_OP2_MIN_UINT: dv = cv0.u < cv1.u ? cv0.u : cv1.u; break;
403 case ALU_OP2_MUL:
404 case ALU_OP2_MUL_IEEE: dv = cv0.f * cv1.f; break;
405 case ALU_OP2_MULHI_INT:
406 dv = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
407 case ALU_OP2_MULHI_UINT:
408 dv = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
409 case ALU_OP2_MULLO_INT:
410 dv = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
411 case ALU_OP2_MULLO_UINT:
412 dv = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
413 case ALU_OP2_OR_INT: dv = cv0.i | cv1.i; break;
414 case ALU_OP2_SUB_INT: dv = cv0.i - cv1.i; break;
415 case ALU_OP2_XOR_INT: dv = cv0.i ^ cv1.i; break;
416
417 case ALU_OP2_SETE: dv = cv0.f == cv1.f ? 1.0f : 0.0f; break;
418
419 default:
420 return false;
421 }
422
423 } else { // one source is const
424
425 // TODO handle 1 * anything, 0 * anything, 0 + anything, etc
426
427 return false;
428 }
429
430 apply_alu_dst_mod(n.bc, dv);
431 assign_source(n.dst[0], get_const(dv));
432 return true;
433 }
434
435 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
436 literal s1, literal s2) {
437
438 unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
439 unsigned cc = alu_cnd_flags & AF_CC_MASK;
440
441 switch (cmp_type) {
442 case AF_FLOAT_CMP: {
443 switch (cc) {
444 case AF_CC_E : return s1.f == s2.f;
445 case AF_CC_GT: return s1.f > s2.f;
446 case AF_CC_GE: return s1.f >= s2.f;
447 case AF_CC_NE: return s1.f != s2.f;
448 case AF_CC_LT: return s1.f < s2.f;
449 case AF_CC_LE: return s1.f <= s2.f;
450 default:
451 assert(!"invalid condition code");
452 return false;
453 }
454 }
455 case AF_INT_CMP: {
456 switch (cc) {
457 case AF_CC_E : return s1.i == s2.i;
458 case AF_CC_GT: return s1.i > s2.i;
459 case AF_CC_GE: return s1.i >= s2.i;
460 case AF_CC_NE: return s1.i != s2.i;
461 case AF_CC_LT: return s1.i < s2.i;
462 case AF_CC_LE: return s1.i <= s2.i;
463 default:
464 assert(!"invalid condition code");
465 return false;
466 }
467 }
468 case AF_UINT_CMP: {
469 switch (cc) {
470 case AF_CC_E : return s1.u == s2.u;
471 case AF_CC_GT: return s1.u > s2.u;
472 case AF_CC_GE: return s1.u >= s2.u;
473 case AF_CC_NE: return s1.u != s2.u;
474 case AF_CC_LT: return s1.u < s2.u;
475 case AF_CC_LE: return s1.u <= s2.u;
476 default:
477 assert(!"invalid condition code");
478 return false;
479 }
480 }
481 default:
482 assert(!"invalid cmp_type");
483 return false;
484 }
485 }
486
487 bool expr_handler::fold_alu_op3(alu_node& n) {
488
489 if (n.src.size() < 3)
490 return false;
491
492 value* v0 = n.src[0];
493 value* v1 = n.src[1];
494 value* v2 = n.src[2];
495
496 assert(v0 && v1 && v2 && n.dst[0]);
497
498 bool isc0 = v0->is_const();
499 bool isc1 = v1->is_const();
500 bool isc2 = v2->is_const();
501
502 literal dv, cv0, cv1, cv2;
503
504 if (isc0) {
505 cv0 = v0->get_const_value();
506 apply_alu_src_mod(n.bc, 0, cv0);
507 }
508
509 if (isc1) {
510 cv1 = v1->get_const_value();
511 apply_alu_src_mod(n.bc, 1, cv1);
512 }
513
514 if (isc2) {
515 cv2 = v2->get_const_value();
516 apply_alu_src_mod(n.bc, 2, cv2);
517 }
518
519 if (n.bc.op_ptr->flags & AF_CMOV) {
520 int src = 0;
521
522 if (v1->gvalue() == v2->gvalue() &&
523 n.bc.src[1].neg == n.bc.src[2].neg) {
524 // result doesn't depend on condition, convert to MOV
525 src = 1;
526 } else if (isc0) {
527 // src0 is const, condition can be evaluated, convert to MOV
528 bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
529 AF_CMP_TYPE_MASK), cv0, literal(0));
530 src = cond ? 1 : 2;
531 }
532
533 if (src) {
534 // if src is selected, convert to MOV
535 n.bc.src[0] = n.bc.src[src];
536 n.src[0] = n.src[src];
537 n.src.resize(1);
538 n.bc.set_op(ALU_OP1_MOV);
539 return fold_alu_op1(n);
540 }
541 }
542
543 if (!isc0 && !isc1 && !isc2)
544 return false;
545
546 if (isc0 && isc1 && isc2) {
547 switch (n.bc.op) {
548 case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
549
550 // TODO
551
552 default:
553 return false;
554 }
555
556 } else {
557
558 // TODO
559
560 return false;
561 }
562
563 apply_alu_dst_mod(n.bc, dv);
564 assign_source(n.dst[0], get_const(dv));
565 return true;
566 }
567
568 unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
569 unsigned ncc = 0;
570
571 switch (cc) {
572 case AF_CC_E: ncc = AF_CC_NE; break;
573 case AF_CC_NE: ncc = AF_CC_E; break;
574 case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
575 case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
576 default:
577 assert(!"unexpected condition code");
578 break;
579 }
580 return ncc;
581 }
582
583 unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
584
585 if (int_dst && cmp_type == AF_FLOAT_CMP) {
586 switch (cc) {
587 case AF_CC_E: return ALU_OP2_SETE_DX10;
588 case AF_CC_NE: return ALU_OP2_SETNE_DX10;
589 case AF_CC_GT: return ALU_OP2_SETGT_DX10;
590 case AF_CC_GE: return ALU_OP2_SETGE_DX10;
591 }
592 } else {
593
594 switch(cmp_type) {
595 case AF_FLOAT_CMP: {
596 switch (cc) {
597 case AF_CC_E: return ALU_OP2_SETE;
598 case AF_CC_NE: return ALU_OP2_SETNE;
599 case AF_CC_GT: return ALU_OP2_SETGT;
600 case AF_CC_GE: return ALU_OP2_SETGE;
601 }
602 break;
603 }
604 case AF_INT_CMP: {
605 switch (cc) {
606 case AF_CC_E: return ALU_OP2_SETE_INT;
607 case AF_CC_NE: return ALU_OP2_SETNE_INT;
608 case AF_CC_GT: return ALU_OP2_SETGT_INT;
609 case AF_CC_GE: return ALU_OP2_SETGE_INT;
610 }
611 break;
612 }
613 case AF_UINT_CMP: {
614 switch (cc) {
615 case AF_CC_E: return ALU_OP2_SETE_INT;
616 case AF_CC_NE: return ALU_OP2_SETNE_INT;
617 case AF_CC_GT: return ALU_OP2_SETGT_UINT;
618 case AF_CC_GE: return ALU_OP2_SETGE_UINT;
619 }
620 break;
621 }
622 }
623 }
624
625 assert(!"unexpected cc&cmp_type combination");
626 return ~0u;
627 }
628
629 unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
630
631 switch(cmp_type) {
632 case AF_FLOAT_CMP: {
633 switch (cc) {
634 case AF_CC_E: return ALU_OP2_PRED_SETE;
635 case AF_CC_NE: return ALU_OP2_PRED_SETNE;
636 case AF_CC_GT: return ALU_OP2_PRED_SETGT;
637 case AF_CC_GE: return ALU_OP2_PRED_SETGE;
638 }
639 break;
640 }
641 case AF_INT_CMP: {
642 switch (cc) {
643 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
644 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
645 case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
646 case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
647 }
648 break;
649 }
650 case AF_UINT_CMP: {
651 switch (cc) {
652 case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
653 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
654 case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
655 case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
656 }
657 break;
658 }
659 }
660
661 assert(!"unexpected cc&cmp_type combination");
662 return ~0u;
663 }
664
665 unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
666
667 switch(cmp_type) {
668 case AF_FLOAT_CMP: {
669 switch (cc) {
670 case AF_CC_E: return ALU_OP2_KILLE;
671 case AF_CC_NE: return ALU_OP2_KILLNE;
672 case AF_CC_GT: return ALU_OP2_KILLGT;
673 case AF_CC_GE: return ALU_OP2_KILLGE;
674 }
675 break;
676 }
677 case AF_INT_CMP: {
678 switch (cc) {
679 case AF_CC_E: return ALU_OP2_KILLE_INT;
680 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
681 case AF_CC_GT: return ALU_OP2_KILLGT_INT;
682 case AF_CC_GE: return ALU_OP2_KILLGE_INT;
683 }
684 break;
685 }
686 case AF_UINT_CMP: {
687 switch (cc) {
688 case AF_CC_E: return ALU_OP2_KILLE_INT;
689 case AF_CC_NE: return ALU_OP2_KILLNE_INT;
690 case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
691 case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
692 }
693 break;
694 }
695 }
696
697 assert(!"unexpected cc&cmp_type combination");
698 return ~0u;
699 }
700
701 unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
702
703 switch(cmp_type) {
704 case AF_FLOAT_CMP: {
705 switch (cc) {
706 case AF_CC_E: return ALU_OP3_CNDE;
707 case AF_CC_GT: return ALU_OP3_CNDGT;
708 case AF_CC_GE: return ALU_OP3_CNDGE;
709 }
710 break;
711 }
712 case AF_INT_CMP: {
713 switch (cc) {
714 case AF_CC_E: return ALU_OP3_CNDE_INT;
715 case AF_CC_GT: return ALU_OP3_CNDGT_INT;
716 case AF_CC_GE: return ALU_OP3_CNDGE_INT;
717 }
718 break;
719 }
720 }
721
722 assert(!"unexpected cc&cmp_type combination");
723 return ~0u;
724 }
725
726
727 void convert_predset_to_set(shader& sh, alu_node* a) {
728
729 unsigned flags = a->bc.op_ptr->flags;
730 unsigned cc = flags & AF_CC_MASK;
731 unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
732
733 bool swap_args = false;
734
735 cc = invert_setcc_condition(cc, swap_args);
736
737 unsigned newop = get_setcc_op(cc, cmp_type, true);
738
739 a->dst.resize(1);
740 a->bc.set_op(newop);
741
742 if (swap_args) {
743 std::swap(a->src[0], a->src[1]);
744 std::swap(a->bc.src[0], a->bc.src[1]);
745 }
746
747 a->bc.update_exec_mask = 0;
748 a->bc.update_pred = 0;
749 }
750
751 } // namespace r600_sb