2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "sb_shader.h"
33 value
* get_select_value_for_em(shader
& sh
, value
* em
) {
37 node
*predset
= em
->def
;
38 if (!predset
->is_pred_set())
41 alu_node
*s
= sh
.clone(static_cast<alu_node
*>(predset
));
42 convert_predset_to_set(sh
, s
);
44 predset
->insert_after(s
);
46 value
* &d0
= s
->dst
[0];
47 d0
= sh
.create_temp_value();
52 void convert_to_mov(alu_node
&n
, value
*src
, bool neg
, bool abs
) {
55 n
.bc
.src
[0].abs
= abs
;
56 n
.bc
.src
[0].neg
= neg
;
57 n
.bc
.set_op(ALU_OP1_MOV
);
60 expr_handler::expr_handler(shader
& sh
) : sh(sh
), vt(sh
.vt
) {}
62 value
* expr_handler::get_const(const literal
&l
) {
63 value
*v
= sh
.get_const_value(l
);
69 void expr_handler::assign_source(value
*dst
, value
*src
) {
70 dst
->gvn_source
= src
->gvn_source
;
73 bool expr_handler::equal(value
*l
, value
*r
) {
77 if (l
->gvalue() == r
->gvalue())
81 return defs_equal(l
, r
);
83 if (l
->is_rel() && r
->is_rel())
84 return ivars_equal(l
, r
);
89 bool expr_handler::ivars_equal(value
* l
, value
* r
) {
90 if (l
->rel
->gvalue() == r
->rel
->gvalue()
91 && l
->select
== r
->select
) {
93 vvec
&lv
= l
->mdef
.empty() ? l
->muse
: l
->mdef
;
94 vvec
&rv
= r
->mdef
.empty() ? r
->muse
: r
->mdef
;
96 // FIXME: replace this with more precise aliasing test
102 bool expr_handler::defs_equal(value
* l
, value
* r
) {
107 if (d1
->type
!= d2
->type
|| d1
->subtype
!= d2
->subtype
)
110 if (d1
->is_pred_set() || d2
->is_pred_set())
113 if (d1
->type
== NT_OP
) {
114 switch (d1
->subtype
) {
117 static_cast<alu_node
*>(d1
),
118 static_cast<alu_node
*>(d2
));
119 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
120 // static_cast<fetch_node*>(d2);
121 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
122 // static_cast<cf_node*>(d2);
130 bool expr_handler::try_fold(value
* v
) {
131 assert(!v
->gvn_source
);
142 bool expr_handler::try_fold(node
* n
) {
143 return n
->fold_dispatch(this);
146 bool expr_handler::fold(node
& n
) {
147 if (n
.subtype
== NST_PHI
) {
151 // FIXME disabling phi folding for registers for now, otherwise we lose
152 // control flow information in some cases
153 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
154 // probably control flow transformation is required to enable it
158 for(vvec::iterator I
= n
.src
.begin() + 1, E
= n
.src
.end(); I
!= E
; ++I
) {
164 assign_source(n
.dst
[0], s
);
166 assert(n
.subtype
== NST_PSI
);
167 assert(n
.src
.size() >= 6);
170 assert(s
->gvn_source
);
172 for(vvec::iterator I
= n
.src
.begin() + 3, E
= n
.src
.end(); I
!= E
; I
+= 3) {
177 assign_source(n
.dst
[0], s
);
182 bool expr_handler::fold(container_node
& n
) {
186 bool expr_handler::fold_setcc(alu_node
&n
) {
188 value
* v0
= n
.src
[0]->gvalue();
189 value
* v1
= n
.src
[1]->gvalue();
191 assert(v0
&& v1
&& n
.dst
[0]);
193 unsigned flags
= n
.bc
.op_ptr
->flags
;
194 unsigned cc
= flags
& AF_CC_MASK
;
195 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
196 unsigned dst_type
= flags
& AF_DST_TYPE_MASK
;
199 bool have_result
= false;
201 bool isc0
= v0
->is_const();
202 bool isc1
= v1
->is_const();
204 literal dv
, cv0
, cv1
;
207 cv0
= v0
->get_const_value();
208 apply_alu_src_mod(n
.bc
, 0, cv0
);
212 cv1
= v1
->get_const_value();
213 apply_alu_src_mod(n
.bc
, 1, cv1
);
217 cond_result
= evaluate_condition(flags
, cv0
, cv1
);
220 if (cmp_type
== AF_FLOAT_CMP
) {
221 if (n
.bc
.src
[0].abs
&& !n
.bc
.src
[0].neg
) {
222 if (cv1
.f
< 0.0f
&& (cc
== AF_CC_GT
|| cc
== AF_CC_NE
)) {
225 } else if (cv1
.f
<= 0.0f
&& cc
== AF_CC_GE
) {
229 } else if (n
.bc
.src
[0].abs
&& n
.bc
.src
[0].neg
) {
230 if (cv1
.f
> 0.0f
&& (cc
== AF_CC_GE
|| cc
== AF_CC_E
)) {
233 } else if (cv1
.f
>= 0.0f
&& cc
== AF_CC_GT
) {
238 } else if (cmp_type
== AF_UINT_CMP
&& cv1
.u
== 0 && cc
== AF_CC_GE
) {
243 if (cmp_type
== AF_FLOAT_CMP
) {
244 if (n
.bc
.src
[1].abs
&& !n
.bc
.src
[1].neg
) {
245 if (cv0
.f
<= 0.0f
&& cc
== AF_CC_GT
) {
248 } else if (cv0
.f
< 0.0f
&& (cc
== AF_CC_GE
|| cc
== AF_CC_E
)) {
252 } else if (n
.bc
.src
[1].abs
&& n
.bc
.src
[1].neg
) {
253 if (cv0
.f
>= 0.0f
&& cc
== AF_CC_GE
) {
256 } else if (cv0
.f
> 0.0f
&& (cc
== AF_CC_GT
|| cc
== AF_CC_NE
)) {
261 } else if (cmp_type
== AF_UINT_CMP
&& cv0
.u
== 0 && cc
== AF_CC_GT
) {
265 } else if (v0
== v1
) {
266 bc_alu_src
&s0
= n
.bc
.src
[0], &s1
= n
.bc
.src
[1];
267 if (s0
.abs
== s1
.abs
&& s0
.neg
== s1
.neg
&& cmp_type
!= AF_FLOAT_CMP
) {
268 // NOTE can't handle float comparisons here because of NaNs
269 cond_result
= (cc
== AF_CC_E
|| cc
== AF_CC_GE
);
278 result
= dst_type
!= AF_FLOAT_DST
?
279 literal(0xFFFFFFFFu
) : literal(1.0f
);
283 convert_to_mov(n
, sh
.get_const_value(result
));
284 return fold_alu_op1(n
);
290 bool expr_handler::fold(alu_node
& n
) {
292 switch (n
.bc
.op_ptr
->src_count
) {
293 case 1: return fold_alu_op1(n
);
294 case 2: return fold_alu_op2(n
);
295 case 3: return fold_alu_op3(n
);
302 bool expr_handler::fold(fetch_node
& n
) {
305 for (vvec::iterator I
= n
.dst
.begin(), E
= n
.dst
.end(); I
!= E
; ++I
) {
308 if (n
.bc
.dst_sel
[chan
] == SEL_0
)
309 assign_source(*I
, get_const(0.0f
));
310 else if (n
.bc
.dst_sel
[chan
] == SEL_1
)
311 assign_source(*I
, get_const(1.0f
));
318 bool expr_handler::fold(cf_node
& n
) {
322 void expr_handler::apply_alu_src_mod(const bc_alu
&bc
, unsigned src
,
324 const bc_alu_src
&s
= bc
.src
[src
];
332 void expr_handler::apply_alu_dst_mod(const bc_alu
&bc
, literal
&v
) {
333 float omod_coeff
[] = {2.0f
, 4.0, 0.5f
};
336 v
= v
.f
* omod_coeff
[bc
.omod
- 1];
338 v
= float_clamp(v
.f
);
341 bool expr_handler::args_equal(const vvec
&l
, const vvec
&r
) {
343 assert(l
.size() == r
.size());
347 for (int k
= 0; k
< s
; ++k
) {
348 if (!l
[k
]->v_equal(r
[k
]))
355 bool expr_handler::ops_equal(const alu_node
*l
, const alu_node
* r
) {
356 const bc_alu
&b0
= l
->bc
;
357 const bc_alu
&b1
= r
->bc
;
362 unsigned src_count
= b0
.op_ptr
->src_count
;
364 if (b0
.index_mode
!= b1
.index_mode
)
367 if (b0
.clamp
!= b1
.clamp
|| b0
.omod
!= b1
.omod
)
370 for (unsigned s
= 0; s
< src_count
; ++s
) {
371 const bc_alu_src
&s0
= b0
.src
[s
];
372 const bc_alu_src
&s1
= b1
.src
[s
];
374 if (s0
.abs
!= s1
.abs
|| s0
.neg
!= s1
.neg
)
377 return args_equal(l
->src
, r
->src
);
380 bool expr_handler::fold_alu_op1(alu_node
& n
) {
382 assert(!n
.src
.empty());
386 value
* v0
= n
.src
[0]->gvalue();
388 assert(v0
&& n
.dst
[0]);
390 if (!v0
->is_const()) {
391 if ((n
.bc
.op
== ALU_OP1_MOV
|| n
.bc
.op
== ALU_OP1_MOVA_INT
||
392 n
.bc
.op
== ALU_OP1_MOVA_GPR_INT
)
393 && n
.bc
.clamp
== 0 && n
.bc
.omod
== 0
394 && n
.bc
.src
[0].abs
== 0 && n
.bc
.src
[0].neg
== 0) {
395 assign_source(n
.dst
[0], v0
);
401 literal dv
, cv
= v0
->get_const_value();
402 apply_alu_src_mod(n
.bc
, 0, cv
);
405 case ALU_OP1_CEIL
: dv
= ceil(cv
.f
); break;
406 case ALU_OP1_COS
: dv
= cos(cv
.f
* 2.0f
* M_PI
); break;
407 case ALU_OP1_EXP_IEEE
: dv
= exp2(cv
.f
); break;
408 case ALU_OP1_FLOOR
: dv
= floor(cv
.f
); break;
409 case ALU_OP1_FLT_TO_INT
: dv
= (int)cv
.f
; break; // FIXME: round modes ????
410 case ALU_OP1_FLT_TO_INT_FLOOR
: dv
= (int32_t)floor(cv
.f
); break;
411 case ALU_OP1_FLT_TO_INT_RPI
: dv
= (int32_t)floor(cv
.f
+ 0.5f
); break;
412 case ALU_OP1_FLT_TO_INT_TRUNC
: dv
= (int32_t)trunc(cv
.f
); break;
413 case ALU_OP1_FLT_TO_UINT
: dv
= (uint32_t)cv
.f
; break;
414 case ALU_OP1_FRACT
: dv
= cv
.f
- floor(cv
.f
); break;
415 case ALU_OP1_INT_TO_FLT
: dv
= (float)cv
.i
; break;
416 case ALU_OP1_LOG_CLAMPED
:
417 case ALU_OP1_LOG_IEEE
:
421 // don't fold to NAN, let the GPU handle it for now
422 // (prevents degenerate LIT tests from failing)
425 case ALU_OP1_MOV
: dv
= cv
; break;
426 case ALU_OP1_MOVA_INT
: dv
= cv
; break; // FIXME ???
427 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
428 // case ALU_OP1_MOVA_GPR_INT:
429 case ALU_OP1_NOT_INT
: dv
= ~cv
.i
; break;
430 case ALU_OP1_PRED_SET_INV
:
431 dv
= cv
.f
== 0.0f
? 1.0f
: (cv
.f
== 1.0f
? 0.0f
: cv
.f
); break;
432 case ALU_OP1_PRED_SET_RESTORE
: dv
= cv
; break;
433 case ALU_OP1_RECIPSQRT_CLAMPED
:
434 case ALU_OP1_RECIPSQRT_FF
:
435 case ALU_OP1_RECIPSQRT_IEEE
: dv
= 1.0f
/ sqrt(cv
.f
); break;
436 case ALU_OP1_RECIP_CLAMPED
:
437 case ALU_OP1_RECIP_FF
:
438 case ALU_OP1_RECIP_IEEE
: dv
= 1.0f
/ cv
.f
; break;
439 // case ALU_OP1_RECIP_INT:
440 case ALU_OP1_RECIP_UINT
: dv
.u
= (1ull << 32) / cv
.u
; break;
441 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
442 case ALU_OP1_SIN
: dv
= sin(cv
.f
* 2.0f
* M_PI
); break;
443 case ALU_OP1_SQRT_IEEE
: dv
= sqrt(cv
.f
); break;
444 case ALU_OP1_TRUNC
: dv
= trunc(cv
.f
); break;
450 apply_alu_dst_mod(n
.bc
, dv
);
451 assign_source(n
.dst
[0], get_const(dv
));
456 bool expr_handler::fold_alu_op2(alu_node
& n
) {
458 if (n
.src
.size() < 2)
461 unsigned flags
= n
.bc
.op_ptr
->flags
;
463 if (flags
& AF_SET
) {
464 return fold_setcc(n
);
467 value
* v0
= n
.src
[0]->gvalue();
468 value
* v1
= n
.src
[1]->gvalue();
470 assert(v0
&& v1
&& n
.dst
[0]);
472 bool isc0
= v0
->is_const();
473 bool isc1
= v1
->is_const();
478 literal dv
, cv0
, cv1
;
481 cv0
= v0
->get_const_value();
482 apply_alu_src_mod(n
.bc
, 0, cv0
);
486 cv1
= v1
->get_const_value();
487 apply_alu_src_mod(n
.bc
, 1, cv1
);
492 case ALU_OP2_ADD
: dv
= cv0
.f
+ cv1
.f
; break;
493 case ALU_OP2_ADDC_UINT
:
494 dv
= (uint32_t)(((uint64_t)cv0
.u
+ cv1
.u
)>>32); break;
495 case ALU_OP2_ADD_INT
: dv
= cv0
.i
+ cv1
.i
; break;
496 case ALU_OP2_AND_INT
: dv
= cv0
.i
& cv1
.i
; break;
497 case ALU_OP2_ASHR_INT
: dv
= cv0
.i
>> (cv1
.i
& 0x1F); break;
498 case ALU_OP2_BFM_INT
:
499 dv
= (((1 << (cv0
.i
& 0x1F)) - 1) << (cv1
.i
& 0x1F)); break;
500 case ALU_OP2_LSHL_INT
: dv
= cv0
.i
<< cv1
.i
; break;
501 case ALU_OP2_LSHR_INT
: dv
= cv0
.u
>> cv1
.u
; break;
503 case ALU_OP2_MAX_DX10
: dv
= cv0
.f
> cv1
.f
? cv0
.f
: cv1
.f
; break;
504 case ALU_OP2_MAX_INT
: dv
= cv0
.i
> cv1
.i
? cv0
.i
: cv1
.i
; break;
505 case ALU_OP2_MAX_UINT
: dv
= cv0
.u
> cv1
.u
? cv0
.u
: cv1
.u
; break;
507 case ALU_OP2_MIN_DX10
: dv
= cv0
.f
< cv1
.f
? cv0
.f
: cv1
.f
; break;
508 case ALU_OP2_MIN_INT
: dv
= cv0
.i
< cv1
.i
? cv0
.i
: cv1
.i
; break;
509 case ALU_OP2_MIN_UINT
: dv
= cv0
.u
< cv1
.u
? cv0
.u
: cv1
.u
; break;
511 case ALU_OP2_MUL_IEEE
: dv
= cv0
.f
* cv1
.f
; break;
512 case ALU_OP2_MULHI_INT
:
513 dv
= (int32_t)(((int64_t)cv0
.u
* cv1
.u
)>>32); break;
514 case ALU_OP2_MULHI_UINT
:
515 dv
= (uint32_t)(((uint64_t)cv0
.u
* cv1
.u
)>>32); break;
516 case ALU_OP2_MULLO_INT
:
517 dv
= (int32_t)(((int64_t)cv0
.u
* cv1
.u
) & 0xFFFFFFFF); break;
518 case ALU_OP2_MULLO_UINT
:
519 dv
= (uint32_t)(((uint64_t)cv0
.u
* cv1
.u
) & 0xFFFFFFFF); break;
520 case ALU_OP2_OR_INT
: dv
= cv0
.i
| cv1
.i
; break;
521 case ALU_OP2_SUB_INT
: dv
= cv0
.i
- cv1
.i
; break;
522 case ALU_OP2_XOR_INT
: dv
= cv0
.i
^ cv1
.i
; break;
528 } else { // one source is const
530 if (isc0
&& cv0
== literal(0)) {
533 case ALU_OP2_ADD_INT
:
534 case ALU_OP2_MAX_UINT
:
536 case ALU_OP2_XOR_INT
:
537 convert_to_mov(n
, n
.src
[1], n
.bc
.src
[1].neg
, n
.bc
.src
[1].abs
);
538 return fold_alu_op1(n
);
539 case ALU_OP2_AND_INT
:
540 case ALU_OP2_ASHR_INT
:
541 case ALU_OP2_LSHL_INT
:
542 case ALU_OP2_LSHR_INT
:
543 case ALU_OP2_MIN_UINT
:
545 case ALU_OP2_MULHI_UINT
:
546 case ALU_OP2_MULLO_UINT
:
547 convert_to_mov(n
, sh
.get_const_value(literal(0)));
548 return fold_alu_op1(n
);
550 } else if (isc1
&& cv1
== literal(0)) {
553 case ALU_OP2_ADD_INT
:
554 case ALU_OP2_ASHR_INT
:
555 case ALU_OP2_LSHL_INT
:
556 case ALU_OP2_LSHR_INT
:
557 case ALU_OP2_MAX_UINT
:
559 case ALU_OP2_SUB_INT
:
560 case ALU_OP2_XOR_INT
:
561 convert_to_mov(n
, n
.src
[0], n
.bc
.src
[0].neg
, n
.bc
.src
[0].abs
);
562 return fold_alu_op1(n
);
563 case ALU_OP2_AND_INT
:
564 case ALU_OP2_MIN_UINT
:
566 case ALU_OP2_MULHI_UINT
:
567 case ALU_OP2_MULLO_UINT
:
568 convert_to_mov(n
, sh
.get_const_value(literal(0)));
569 return fold_alu_op1(n
);
571 } else if (isc0
&& cv0
== literal(1.0f
)) {
574 case ALU_OP2_MUL_IEEE
:
575 convert_to_mov(n
, n
.src
[1], n
.bc
.src
[1].neg
, n
.bc
.src
[1].abs
);
576 return fold_alu_op1(n
);
578 } else if (isc1
&& cv1
== literal(1.0f
)) {
581 case ALU_OP2_MUL_IEEE
:
582 convert_to_mov(n
, n
.src
[0], n
.bc
.src
[0].neg
, n
.bc
.src
[0].abs
);
583 return fold_alu_op1(n
);
590 apply_alu_dst_mod(n
.bc
, dv
);
591 assign_source(n
.dst
[0], get_const(dv
));
595 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags
,
596 literal s1
, literal s2
) {
598 unsigned cmp_type
= alu_cnd_flags
& AF_CMP_TYPE_MASK
;
599 unsigned cc
= alu_cnd_flags
& AF_CC_MASK
;
604 case AF_CC_E
: return s1
.f
== s2
.f
;
605 case AF_CC_GT
: return s1
.f
> s2
.f
;
606 case AF_CC_GE
: return s1
.f
>= s2
.f
;
607 case AF_CC_NE
: return s1
.f
!= s2
.f
;
608 case AF_CC_LT
: return s1
.f
< s2
.f
;
609 case AF_CC_LE
: return s1
.f
<= s2
.f
;
611 assert(!"invalid condition code");
617 case AF_CC_E
: return s1
.i
== s2
.i
;
618 case AF_CC_GT
: return s1
.i
> s2
.i
;
619 case AF_CC_GE
: return s1
.i
>= s2
.i
;
620 case AF_CC_NE
: return s1
.i
!= s2
.i
;
621 case AF_CC_LT
: return s1
.i
< s2
.i
;
622 case AF_CC_LE
: return s1
.i
<= s2
.i
;
624 assert(!"invalid condition code");
630 case AF_CC_E
: return s1
.u
== s2
.u
;
631 case AF_CC_GT
: return s1
.u
> s2
.u
;
632 case AF_CC_GE
: return s1
.u
>= s2
.u
;
633 case AF_CC_NE
: return s1
.u
!= s2
.u
;
634 case AF_CC_LT
: return s1
.u
< s2
.u
;
635 case AF_CC_LE
: return s1
.u
<= s2
.u
;
637 assert(!"invalid condition code");
642 assert(!"invalid cmp_type");
647 bool expr_handler::fold_alu_op3(alu_node
& n
) {
649 if (n
.src
.size() < 3)
652 value
* v0
= n
.src
[0]->gvalue();
653 value
* v1
= n
.src
[1]->gvalue();
654 value
* v2
= n
.src
[2]->gvalue();
656 assert(v0
&& v1
&& v2
&& n
.dst
[0]);
658 bool isc0
= v0
->is_const();
659 bool isc1
= v1
->is_const();
660 bool isc2
= v2
->is_const();
662 literal dv
, cv0
, cv1
, cv2
;
665 cv0
= v0
->get_const_value();
666 apply_alu_src_mod(n
.bc
, 0, cv0
);
670 cv1
= v1
->get_const_value();
671 apply_alu_src_mod(n
.bc
, 1, cv1
);
675 cv2
= v2
->get_const_value();
676 apply_alu_src_mod(n
.bc
, 2, cv2
);
679 unsigned flags
= n
.bc
.op_ptr
->flags
;
681 if (flags
& AF_CMOV
) {
684 if (v1
== v2
&& n
.bc
.src
[1].neg
== n
.bc
.src
[2].neg
) {
685 // result doesn't depend on condition, convert to MOV
688 // src0 is const, condition can be evaluated, convert to MOV
689 bool cond
= evaluate_condition(n
.bc
.op_ptr
->flags
& (AF_CC_MASK
|
690 AF_CMP_TYPE_MASK
), cv0
, literal(0));
695 // if src is selected, convert to MOV
696 convert_to_mov(n
, n
.src
[src
], n
.bc
.src
[src
].neg
);
697 return fold_alu_op1(n
);
701 if (!isc0
&& !isc1
&& !isc2
)
704 if (isc0
&& isc1
&& isc2
) {
706 case ALU_OP3_MULADD_IEEE
:
707 case ALU_OP3_MULADD
: dv
= cv0
.f
* cv1
.f
+ cv2
.f
; break;
718 case ALU_OP3_MULADD_IEEE
:
720 n
.bc
.set_op(ALU_OP2_ADD
);
721 n
.src
[0] = sh
.get_const_value(dv
);
722 memset(&n
.bc
.src
[0], 0, sizeof(bc_alu_src
));
724 n
.bc
.src
[1] = n
.bc
.src
[2];
726 return fold_alu_op2(n
);
730 if ((isc0
&& cv0
== literal(0)) || (isc1
&& cv1
== literal(0))) {
733 convert_to_mov(n
, n
.src
[2], n
.bc
.src
[2].neg
, n
.bc
.src
[2].abs
);
734 return fold_alu_op1(n
);
740 apply_alu_dst_mod(n
.bc
, dv
);
741 assign_source(n
.dst
[0], get_const(dv
));
745 unsigned invert_setcc_condition(unsigned cc
, bool &swap_args
) {
749 case AF_CC_E
: ncc
= AF_CC_NE
; break;
750 case AF_CC_NE
: ncc
= AF_CC_E
; break;
751 case AF_CC_GE
: ncc
= AF_CC_GT
; swap_args
= true; break;
752 case AF_CC_GT
: ncc
= AF_CC_GE
; swap_args
= true; break;
754 assert(!"unexpected condition code");
760 unsigned get_setcc_op(unsigned cc
, unsigned cmp_type
, bool int_dst
) {
762 if (int_dst
&& cmp_type
== AF_FLOAT_CMP
) {
764 case AF_CC_E
: return ALU_OP2_SETE_DX10
;
765 case AF_CC_NE
: return ALU_OP2_SETNE_DX10
;
766 case AF_CC_GT
: return ALU_OP2_SETGT_DX10
;
767 case AF_CC_GE
: return ALU_OP2_SETGE_DX10
;
774 case AF_CC_E
: return ALU_OP2_SETE
;
775 case AF_CC_NE
: return ALU_OP2_SETNE
;
776 case AF_CC_GT
: return ALU_OP2_SETGT
;
777 case AF_CC_GE
: return ALU_OP2_SETGE
;
783 case AF_CC_E
: return ALU_OP2_SETE_INT
;
784 case AF_CC_NE
: return ALU_OP2_SETNE_INT
;
785 case AF_CC_GT
: return ALU_OP2_SETGT_INT
;
786 case AF_CC_GE
: return ALU_OP2_SETGE_INT
;
792 case AF_CC_E
: return ALU_OP2_SETE_INT
;
793 case AF_CC_NE
: return ALU_OP2_SETNE_INT
;
794 case AF_CC_GT
: return ALU_OP2_SETGT_UINT
;
795 case AF_CC_GE
: return ALU_OP2_SETGE_UINT
;
802 assert(!"unexpected cc&cmp_type combination");
806 unsigned get_predsetcc_op(unsigned cc
, unsigned cmp_type
) {
811 case AF_CC_E
: return ALU_OP2_PRED_SETE
;
812 case AF_CC_NE
: return ALU_OP2_PRED_SETNE
;
813 case AF_CC_GT
: return ALU_OP2_PRED_SETGT
;
814 case AF_CC_GE
: return ALU_OP2_PRED_SETGE
;
820 case AF_CC_E
: return ALU_OP2_PRED_SETE_INT
;
821 case AF_CC_NE
: return ALU_OP2_PRED_SETNE_INT
;
822 case AF_CC_GT
: return ALU_OP2_PRED_SETGT_INT
;
823 case AF_CC_GE
: return ALU_OP2_PRED_SETGE_INT
;
829 case AF_CC_E
: return ALU_OP2_PRED_SETE_INT
;
830 case AF_CC_NE
: return ALU_OP2_PRED_SETNE_INT
;
831 case AF_CC_GT
: return ALU_OP2_PRED_SETGT_UINT
;
832 case AF_CC_GE
: return ALU_OP2_PRED_SETGE_UINT
;
838 assert(!"unexpected cc&cmp_type combination");
842 unsigned get_killcc_op(unsigned cc
, unsigned cmp_type
) {
847 case AF_CC_E
: return ALU_OP2_KILLE
;
848 case AF_CC_NE
: return ALU_OP2_KILLNE
;
849 case AF_CC_GT
: return ALU_OP2_KILLGT
;
850 case AF_CC_GE
: return ALU_OP2_KILLGE
;
856 case AF_CC_E
: return ALU_OP2_KILLE_INT
;
857 case AF_CC_NE
: return ALU_OP2_KILLNE_INT
;
858 case AF_CC_GT
: return ALU_OP2_KILLGT_INT
;
859 case AF_CC_GE
: return ALU_OP2_KILLGE_INT
;
865 case AF_CC_E
: return ALU_OP2_KILLE_INT
;
866 case AF_CC_NE
: return ALU_OP2_KILLNE_INT
;
867 case AF_CC_GT
: return ALU_OP2_KILLGT_UINT
;
868 case AF_CC_GE
: return ALU_OP2_KILLGE_UINT
;
874 assert(!"unexpected cc&cmp_type combination");
878 unsigned get_cndcc_op(unsigned cc
, unsigned cmp_type
) {
883 case AF_CC_E
: return ALU_OP3_CNDE
;
884 case AF_CC_GT
: return ALU_OP3_CNDGT
;
885 case AF_CC_GE
: return ALU_OP3_CNDGE
;
891 case AF_CC_E
: return ALU_OP3_CNDE_INT
;
892 case AF_CC_GT
: return ALU_OP3_CNDGT_INT
;
893 case AF_CC_GE
: return ALU_OP3_CNDGE_INT
;
899 assert(!"unexpected cc&cmp_type combination");
904 void convert_predset_to_set(shader
& sh
, alu_node
* a
) {
906 unsigned flags
= a
->bc
.op_ptr
->flags
;
907 unsigned cc
= flags
& AF_CC_MASK
;
908 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
910 bool swap_args
= false;
912 cc
= invert_setcc_condition(cc
, swap_args
);
914 unsigned newop
= get_setcc_op(cc
, cmp_type
, true);
920 std::swap(a
->src
[0], a
->src
[1]);
921 std::swap(a
->bc
.src
[0], a
->bc
.src
[1]);
924 a
->bc
.update_exec_mask
= 0;
925 a
->bc
.update_pred
= 0;
928 } // namespace r600_sb