2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "sb_shader.h"
33 value
* get_select_value_for_em(shader
& sh
, value
* em
) {
37 node
*predset
= em
->def
;
38 if (!predset
->is_pred_set())
41 alu_node
*s
= sh
.clone(static_cast<alu_node
*>(predset
));
42 convert_predset_to_set(sh
, s
);
44 predset
->insert_after(s
);
46 value
* &d0
= s
->dst
[0];
47 d0
= sh
.create_temp_value();
52 void convert_to_mov(alu_node
&n
, value
*src
, bool neg
, bool abs
) {
55 n
.bc
.src
[0].abs
= abs
;
56 n
.bc
.src
[0].neg
= neg
;
57 n
.bc
.set_op(ALU_OP1_MOV
);
60 expr_handler::expr_handler(shader
& sh
) : sh(sh
), vt(sh
.vt
) {}
62 value
* expr_handler::get_const(const literal
&l
) {
63 value
*v
= sh
.get_const_value(l
);
69 void expr_handler::assign_source(value
*dst
, value
*src
) {
70 dst
->gvn_source
= src
->gvn_source
;
73 bool expr_handler::equal(value
*l
, value
*r
) {
77 if (l
->gvalue() == r
->gvalue())
81 return defs_equal(l
, r
);
83 if (l
->is_rel() && r
->is_rel())
84 return ivars_equal(l
, r
);
89 bool expr_handler::ivars_equal(value
* l
, value
* r
) {
90 if (l
->rel
->gvalue() == r
->rel
->gvalue()
91 && l
->select
== r
->select
) {
93 vvec
&lv
= l
->mdef
.empty() ? l
->muse
: l
->mdef
;
94 vvec
&rv
= r
->mdef
.empty() ? r
->muse
: r
->mdef
;
96 // FIXME: replace this with more precise aliasing test
102 bool expr_handler::defs_equal(value
* l
, value
* r
) {
107 if (d1
->type
!= d2
->type
|| d1
->subtype
!= d2
->subtype
)
110 if (d1
->is_pred_set() || d2
->is_pred_set())
113 if (d1
->type
== NT_OP
) {
114 switch (d1
->subtype
) {
117 static_cast<alu_node
*>(d1
),
118 static_cast<alu_node
*>(d2
));
119 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
120 // static_cast<fetch_node*>(d2);
121 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
122 // static_cast<cf_node*>(d2);
130 bool expr_handler::try_fold(value
* v
) {
131 assert(!v
->gvn_source
);
142 bool expr_handler::try_fold(node
* n
) {
143 return n
->fold_dispatch(this);
146 bool expr_handler::fold(node
& n
) {
147 if (n
.subtype
== NST_PHI
) {
151 // FIXME disabling phi folding for registers for now, otherwise we lose
152 // control flow information in some cases
153 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
154 // probably control flow transformation is required to enable it
158 for(vvec::iterator I
= n
.src
.begin() + 1, E
= n
.src
.end(); I
!= E
; ++I
) {
164 assign_source(n
.dst
[0], s
);
166 assert(n
.subtype
== NST_PSI
);
167 assert(n
.src
.size() >= 6);
170 assert(s
->gvn_source
);
172 for(vvec::iterator I
= n
.src
.begin() + 3, E
= n
.src
.end(); I
!= E
; I
+= 3) {
177 assign_source(n
.dst
[0], s
);
182 bool expr_handler::fold(container_node
& n
) {
186 bool expr_handler::fold_setcc(alu_node
&n
) {
188 value
* v0
= n
.src
[0]->gvalue();
189 value
* v1
= n
.src
[1]->gvalue();
191 assert(v0
&& v1
&& n
.dst
[0]);
193 unsigned flags
= n
.bc
.op_ptr
->flags
;
194 unsigned cc
= flags
& AF_CC_MASK
;
195 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
196 unsigned dst_type
= flags
& AF_DST_TYPE_MASK
;
199 bool have_result
= false;
201 bool isc0
= v0
->is_const();
202 bool isc1
= v1
->is_const();
204 literal dv
, cv0
, cv1
;
207 cv0
= v0
->get_const_value();
208 apply_alu_src_mod(n
.bc
, 0, cv0
);
212 cv1
= v1
->get_const_value();
213 apply_alu_src_mod(n
.bc
, 1, cv1
);
217 cond_result
= evaluate_condition(flags
, cv0
, cv1
);
220 if (cmp_type
== AF_FLOAT_CMP
) {
221 if (n
.bc
.src
[0].abs
&& !n
.bc
.src
[0].neg
) {
222 if (cv1
.f
< 0.0f
&& (cc
== AF_CC_GT
|| cc
== AF_CC_NE
)) {
225 } else if (cv1
.f
<= 0.0f
&& cc
== AF_CC_GE
) {
229 } else if (n
.bc
.src
[0].abs
&& n
.bc
.src
[0].neg
) {
230 if (cv1
.f
> 0.0f
&& (cc
== AF_CC_GE
|| cc
== AF_CC_E
)) {
233 } else if (cv1
.f
>= 0.0f
&& cc
== AF_CC_GT
) {
238 } else if (cmp_type
== AF_UINT_CMP
&& cv1
.u
== 0 && cc
== AF_CC_GE
) {
243 if (cmp_type
== AF_FLOAT_CMP
) {
244 if (n
.bc
.src
[1].abs
&& !n
.bc
.src
[1].neg
) {
245 if (cv0
.f
<= 0.0f
&& cc
== AF_CC_GT
) {
248 } else if (cv0
.f
< 0.0f
&& (cc
== AF_CC_GE
|| cc
== AF_CC_E
)) {
252 } else if (n
.bc
.src
[1].abs
&& n
.bc
.src
[1].neg
) {
253 if (cv0
.f
>= 0.0f
&& cc
== AF_CC_GE
) {
256 } else if (cv0
.f
> 0.0f
&& (cc
== AF_CC_GT
|| cc
== AF_CC_NE
)) {
261 } else if (cmp_type
== AF_UINT_CMP
&& cv0
.u
== 0 && cc
== AF_CC_GT
) {
265 } else if (v0
== v1
) {
266 bc_alu_src
&s0
= n
.bc
.src
[0], &s1
= n
.bc
.src
[1];
267 if (s0
.abs
== s1
.abs
&& s0
.neg
== s1
.neg
&& cmp_type
!= AF_FLOAT_CMP
) {
268 // NOTE can't handle float comparisons here because of NaNs
269 cond_result
= (cc
== AF_CC_E
|| cc
== AF_CC_GE
);
278 result
= dst_type
!= AF_FLOAT_DST
?
279 literal(0xFFFFFFFFu
) : literal(1.0f
);
283 convert_to_mov(n
, sh
.get_const_value(result
));
284 return fold_alu_op1(n
);
290 bool expr_handler::fold(alu_node
& n
) {
292 switch (n
.bc
.op_ptr
->src_count
) {
293 case 1: return fold_alu_op1(n
);
294 case 2: return fold_alu_op2(n
);
295 case 3: return fold_alu_op3(n
);
302 bool expr_handler::fold(fetch_node
& n
) {
305 for (vvec::iterator I
= n
.dst
.begin(), E
= n
.dst
.end(); I
!= E
; ++I
) {
308 if (n
.bc
.dst_sel
[chan
] == SEL_0
)
309 assign_source(*I
, get_const(0.0f
));
310 else if (n
.bc
.dst_sel
[chan
] == SEL_1
)
311 assign_source(*I
, get_const(1.0f
));
318 bool expr_handler::fold(cf_node
& n
) {
322 void expr_handler::apply_alu_src_mod(const bc_alu
&bc
, unsigned src
,
324 const bc_alu_src
&s
= bc
.src
[src
];
332 void expr_handler::apply_alu_dst_mod(const bc_alu
&bc
, literal
&v
) {
333 float omod_coeff
[] = {2.0f
, 4.0, 0.5f
};
336 v
= v
.f
* omod_coeff
[bc
.omod
- 1];
338 v
= float_clamp(v
.f
);
341 bool expr_handler::args_equal(const vvec
&l
, const vvec
&r
) {
343 assert(l
.size() == r
.size());
347 for (int k
= 0; k
< s
; ++k
) {
348 if (!l
[k
]->v_equal(r
[k
]))
355 bool expr_handler::ops_equal(const alu_node
*l
, const alu_node
* r
) {
356 const bc_alu
&b0
= l
->bc
;
357 const bc_alu
&b1
= r
->bc
;
362 unsigned src_count
= b0
.op_ptr
->src_count
;
364 if (b0
.index_mode
!= b1
.index_mode
)
367 if (b0
.clamp
!= b1
.clamp
|| b0
.omod
!= b1
.omod
)
370 for (unsigned s
= 0; s
< src_count
; ++s
) {
371 const bc_alu_src
&s0
= b0
.src
[s
];
372 const bc_alu_src
&s1
= b1
.src
[s
];
374 if (s0
.abs
!= s1
.abs
|| s0
.neg
!= s1
.neg
)
377 return args_equal(l
->src
, r
->src
);
380 bool expr_handler::fold_alu_op1(alu_node
& n
) {
382 assert(!n
.src
.empty());
386 value
* v0
= n
.src
[0]->gvalue();
388 assert(v0
&& n
.dst
[0]);
390 if (!v0
->is_const()) {
391 // handle (MOV -(MOV -x)) => (MOV x)
392 if (n
.bc
.op
== ALU_OP1_MOV
&& n
.bc
.src
[0].neg
&& !n
.bc
.src
[1].abs
393 && v0
->def
&& v0
->def
->is_alu_op(ALU_OP1_MOV
)) {
394 alu_node
*sd
= static_cast<alu_node
*>(v0
->def
);
395 if (!sd
->bc
.clamp
&& !sd
->bc
.omod
&& !sd
->bc
.src
[0].abs
&&
397 n
.src
[0] = sd
->src
[0];
399 v0
= n
.src
[0]->gvalue();
403 if ((n
.bc
.op
== ALU_OP1_MOV
|| n
.bc
.op
== ALU_OP1_MOVA_INT
||
404 n
.bc
.op
== ALU_OP1_MOVA_GPR_INT
)
405 && n
.bc
.clamp
== 0 && n
.bc
.omod
== 0
406 && n
.bc
.src
[0].abs
== 0 && n
.bc
.src
[0].neg
== 0 &&
407 n
.src
.size() == 1 /* RIM/SIM can be appended as additional values */) {
408 assign_source(n
.dst
[0], v0
);
414 literal dv
, cv
= v0
->get_const_value();
415 apply_alu_src_mod(n
.bc
, 0, cv
);
418 case ALU_OP1_CEIL
: dv
= ceil(cv
.f
); break;
419 case ALU_OP1_COS
: dv
= cos(cv
.f
* 2.0f
* M_PI
); break;
420 case ALU_OP1_EXP_IEEE
: dv
= exp2(cv
.f
); break;
421 case ALU_OP1_FLOOR
: dv
= floor(cv
.f
); break;
422 case ALU_OP1_FLT_TO_INT
: dv
= (int)cv
.f
; break; // FIXME: round modes ????
423 case ALU_OP1_FLT_TO_INT_FLOOR
: dv
= (int32_t)floor(cv
.f
); break;
424 case ALU_OP1_FLT_TO_INT_RPI
: dv
= (int32_t)floor(cv
.f
+ 0.5f
); break;
425 case ALU_OP1_FLT_TO_INT_TRUNC
: dv
= (int32_t)trunc(cv
.f
); break;
426 case ALU_OP1_FLT_TO_UINT
: dv
= (uint32_t)cv
.f
; break;
427 case ALU_OP1_FRACT
: dv
= cv
.f
- floor(cv
.f
); break;
428 case ALU_OP1_INT_TO_FLT
: dv
= (float)cv
.i
; break;
429 case ALU_OP1_LOG_CLAMPED
:
430 case ALU_OP1_LOG_IEEE
:
434 // don't fold to NAN, let the GPU handle it for now
435 // (prevents degenerate LIT tests from failing)
438 case ALU_OP1_MOV
: dv
= cv
; break;
439 case ALU_OP1_MOVA_INT
: dv
= cv
; break; // FIXME ???
440 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
441 // case ALU_OP1_MOVA_GPR_INT:
442 case ALU_OP1_NOT_INT
: dv
= ~cv
.i
; break;
443 case ALU_OP1_PRED_SET_INV
:
444 dv
= cv
.f
== 0.0f
? 1.0f
: (cv
.f
== 1.0f
? 0.0f
: cv
.f
); break;
445 case ALU_OP1_PRED_SET_RESTORE
: dv
= cv
; break;
446 case ALU_OP1_RECIPSQRT_CLAMPED
:
447 case ALU_OP1_RECIPSQRT_FF
:
448 case ALU_OP1_RECIPSQRT_IEEE
: dv
= 1.0f
/ sqrt(cv
.f
); break;
449 case ALU_OP1_RECIP_CLAMPED
:
450 case ALU_OP1_RECIP_FF
:
451 case ALU_OP1_RECIP_IEEE
: dv
= 1.0f
/ cv
.f
; break;
452 // case ALU_OP1_RECIP_INT:
453 case ALU_OP1_RECIP_UINT
: dv
.u
= (1ull << 32) / cv
.u
; break;
454 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
455 case ALU_OP1_SIN
: dv
= sin(cv
.f
* 2.0f
* M_PI
); break;
456 case ALU_OP1_SQRT_IEEE
: dv
= sqrt(cv
.f
); break;
457 case ALU_OP1_TRUNC
: dv
= trunc(cv
.f
); break;
463 apply_alu_dst_mod(n
.bc
, dv
);
464 assign_source(n
.dst
[0], get_const(dv
));
468 bool expr_handler::fold_mul_add(alu_node
*n
) {
471 value
* v0
= n
->src
[0]->gvalue();
473 alu_node
*d0
= (v0
->def
&& v0
->def
->is_alu_inst()) ?
474 static_cast<alu_node
*>(v0
->def
) : NULL
;
477 if (d0
->is_alu_op(ALU_OP2_MUL_IEEE
))
479 else if (d0
->is_alu_op(ALU_OP2_MUL
))
484 if (!d0
->bc
.src
[0].abs
&& !d0
->bc
.src
[1].abs
&&
485 !n
->bc
.src
[1].abs
&& !n
->bc
.src
[0].abs
&& !d0
->bc
.omod
&&
486 !d0
->bc
.clamp
&& !n
->bc
.omod
&&
487 (!d0
->src
[0]->is_kcache() || !d0
->src
[1]->is_kcache() ||
488 !n
->src
[1]->is_kcache())) {
490 bool mul_neg
= n
->bc
.src
[0].neg
;
493 n
->bc
.set_op(ieee
? ALU_OP3_MULADD_IEEE
: ALU_OP3_MULADD
);
494 n
->src
[2] = n
->src
[1];
495 n
->bc
.src
[2] = n
->bc
.src
[1];
496 n
->src
[0] = d0
->src
[0];
497 n
->bc
.src
[0] = d0
->bc
.src
[0];
498 n
->src
[1] = d0
->src
[1];
499 n
->bc
.src
[1] = d0
->bc
.src
[1];
501 n
->bc
.src
[0].neg
^= mul_neg
;
508 value
* v1
= n
->src
[1]->gvalue();
510 alu_node
*d1
= (v1
->def
&& v1
->def
->is_alu_inst()) ?
511 static_cast<alu_node
*>(v1
->def
) : NULL
;
514 if (d1
->is_alu_op(ALU_OP2_MUL_IEEE
))
516 else if (d1
->is_alu_op(ALU_OP2_MUL
))
521 if (!d1
->bc
.src
[1].abs
&& !d1
->bc
.src
[0].abs
&&
522 !n
->bc
.src
[0].abs
&& !n
->bc
.src
[1].abs
&& !d1
->bc
.omod
&&
523 !d1
->bc
.clamp
&& !n
->bc
.omod
&&
524 (!d1
->src
[0]->is_kcache() || !d1
->src
[1]->is_kcache() ||
525 !n
->src
[0]->is_kcache())) {
527 bool mul_neg
= n
->bc
.src
[1].neg
;
530 n
->bc
.set_op(ieee
? ALU_OP3_MULADD_IEEE
: ALU_OP3_MULADD
);
531 n
->src
[2] = n
->src
[0];
532 n
->bc
.src
[2] = n
->bc
.src
[0];
533 n
->src
[1] = d1
->src
[1];
534 n
->bc
.src
[1] = d1
->bc
.src
[1];
535 n
->src
[0] = d1
->src
[0];
536 n
->bc
.src
[0] = d1
->bc
.src
[0];
538 n
->bc
.src
[1].neg
^= mul_neg
;
548 bool expr_handler::eval_const_op(unsigned op
, literal
&r
,
549 literal cv0
, literal cv1
) {
552 case ALU_OP2_ADD
: r
= cv0
.f
+ cv1
.f
; break;
553 case ALU_OP2_ADDC_UINT
:
554 r
= (uint32_t)(((uint64_t)cv0
.u
+ cv1
.u
)>>32); break;
555 case ALU_OP2_ADD_INT
: r
= cv0
.i
+ cv1
.i
; break;
556 case ALU_OP2_AND_INT
: r
= cv0
.i
& cv1
.i
; break;
557 case ALU_OP2_ASHR_INT
: r
= cv0
.i
>> (cv1
.i
& 0x1F); break;
558 case ALU_OP2_BFM_INT
:
559 r
= (((1 << (cv0
.i
& 0x1F)) - 1) << (cv1
.i
& 0x1F)); break;
560 case ALU_OP2_LSHL_INT
: r
= cv0
.i
<< cv1
.i
; break;
561 case ALU_OP2_LSHR_INT
: r
= cv0
.u
>> cv1
.u
; break;
563 case ALU_OP2_MAX_DX10
: r
= cv0
.f
> cv1
.f
? cv0
.f
: cv1
.f
; break;
564 case ALU_OP2_MAX_INT
: r
= cv0
.i
> cv1
.i
? cv0
.i
: cv1
.i
; break;
565 case ALU_OP2_MAX_UINT
: r
= cv0
.u
> cv1
.u
? cv0
.u
: cv1
.u
; break;
567 case ALU_OP2_MIN_DX10
: r
= cv0
.f
< cv1
.f
? cv0
.f
: cv1
.f
; break;
568 case ALU_OP2_MIN_INT
: r
= cv0
.i
< cv1
.i
? cv0
.i
: cv1
.i
; break;
569 case ALU_OP2_MIN_UINT
: r
= cv0
.u
< cv1
.u
? cv0
.u
: cv1
.u
; break;
571 case ALU_OP2_MUL_IEEE
: r
= cv0
.f
* cv1
.f
; break;
572 case ALU_OP2_MULHI_INT
:
573 r
= (int32_t)(((int64_t)cv0
.u
* cv1
.u
)>>32); break;
574 case ALU_OP2_MULHI_UINT
:
575 r
= (uint32_t)(((uint64_t)cv0
.u
* cv1
.u
)>>32); break;
576 case ALU_OP2_MULLO_INT
:
577 r
= (int32_t)(((int64_t)cv0
.u
* cv1
.u
) & 0xFFFFFFFF); break;
578 case ALU_OP2_MULLO_UINT
:
579 r
= (uint32_t)(((uint64_t)cv0
.u
* cv1
.u
) & 0xFFFFFFFF); break;
580 case ALU_OP2_OR_INT
: r
= cv0
.i
| cv1
.i
; break;
581 case ALU_OP2_SUB_INT
: r
= cv0
.i
- cv1
.i
; break;
582 case ALU_OP2_XOR_INT
: r
= cv0
.i
^ cv1
.i
; break;
591 // fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
592 bool expr_handler::fold_assoc(alu_node
*n
) {
599 unsigned op
= n
->bc
.op
;
600 bool allow_neg
= false, cur_neg
= false;
601 bool distribute_neg
= false;
605 distribute_neg
= true;
609 case ALU_OP2_MUL_IEEE
:
616 case ALU_OP3_MULADD_IEEE
:
618 op
= ALU_OP2_MUL_IEEE
;
621 if (n
->bc
.op_ptr
->src_count
!= 2)
625 // check if we can evaluate the op
626 if (!eval_const_op(op
, cr
, literal(0), literal(0)))
631 value
*v0
= a
->src
[0]->gvalue();
632 value
*v1
= a
->src
[1]->gvalue();
636 if (v1
->is_const()) {
637 literal arg
= v1
->get_const_value();
638 apply_alu_src_mod(a
->bc
, 1, arg
);
639 if (cur_neg
&& distribute_neg
)
645 eval_const_op(op
, cr
, cr
, arg
);
648 alu_node
*d0
= static_cast<alu_node
*>(v0
->def
);
649 if ((d0
->is_alu_op(op
) ||
650 (op
== ALU_OP2_MUL_IEEE
&&
651 d0
->is_alu_op(ALU_OP2_MUL
))) &&
652 !d0
->bc
.omod
&& !d0
->bc
.clamp
&&
654 (!a
->bc
.src
[0].neg
|| allow_neg
)) {
655 cur_neg
^= a
->bc
.src
[0].neg
;
664 if (v0
->is_const()) {
665 literal arg
= v0
->get_const_value();
666 apply_alu_src_mod(a
->bc
, 0, arg
);
667 if (cur_neg
&& distribute_neg
)
671 eval_const_op(op
, cr
, cr
, arg
);
679 eval_const_op(op
, cr
, cr
, arg
);
682 alu_node
*d1
= static_cast<alu_node
*>(v1
->def
);
683 if ((d1
->is_alu_op(op
) ||
684 (op
== ALU_OP2_MUL_IEEE
&&
685 d1
->is_alu_op(ALU_OP2_MUL
))) &&
686 !d1
->bc
.omod
&& !d1
->bc
.clamp
&&
688 (!a
->bc
.src
[1].neg
|| allow_neg
)) {
689 cur_neg
^= a
->bc
.src
[1].neg
;
701 if (last_arg
== -1) {
703 apply_alu_dst_mod(n
->bc
, cr
);
705 if (n
->bc
.op
== op
) {
706 convert_to_mov(*n
, sh
.get_const_value(cr
));
709 } else { // MULADD => ADD
710 n
->src
[0] = n
->src
[2];
711 n
->bc
.src
[0] = n
->bc
.src
[2];
712 n
->src
[1] = sh
.get_const_value(cr
);
713 memset(&n
->bc
.src
[1], 0, sizeof(bc_alu_src
));
716 n
->bc
.set_op(ALU_OP2_ADD
);
718 } else if (last_arg
>= 0) {
719 n
->src
[0] = a
->src
[last_arg
];
720 n
->bc
.src
[0] = a
->bc
.src
[last_arg
];
721 n
->bc
.src
[0].neg
^= cur_neg
;
722 n
->src
[1] = sh
.get_const_value(cr
);
723 memset(&n
->bc
.src
[1], 0, sizeof(bc_alu_src
));
729 bool expr_handler::fold_alu_op2(alu_node
& n
) {
731 if (n
.src
.size() < 2)
734 unsigned flags
= n
.bc
.op_ptr
->flags
;
736 if (flags
& AF_SET
) {
737 return fold_setcc(n
);
740 if (!sh
.safe_math
&& (flags
& AF_M_ASSOC
)) {
745 value
* v0
= n
.src
[0]->gvalue();
746 value
* v1
= n
.src
[1]->gvalue();
750 // handle some operations with equal args, e.g. x + x => x * 2
752 if (n
.bc
.src
[0].neg
== n
.bc
.src
[1].neg
&&
753 n
.bc
.src
[0].abs
== n
.bc
.src
[1].abs
) {
755 case ALU_OP2_MIN
: // (MIN x, x) => (MOV x)
757 convert_to_mov(n
, v0
, n
.bc
.src
[0].neg
, n
.bc
.src
[0].abs
);
758 return fold_alu_op1(n
);
759 case ALU_OP2_ADD
: // (ADD x, x) => (MUL x, 2)
761 n
.src
[1] = sh
.get_const_value(2.0f
);
762 memset(&n
.bc
.src
[1], 0, sizeof(bc_alu_src
));
763 n
.bc
.set_op(ALU_OP2_MUL
);
764 return fold_alu_op2(n
);
769 if (n
.bc
.src
[0].neg
!= n
.bc
.src
[1].neg
&&
770 n
.bc
.src
[0].abs
== n
.bc
.src
[1].abs
) {
772 case ALU_OP2_ADD
: // (ADD x, -x) => (MOV 0)
774 convert_to_mov(n
, sh
.get_const_value(literal(0)));
775 return fold_alu_op1(n
);
782 if (n
.bc
.op
== ALU_OP2_ADD
) {
783 if (fold_mul_add(&n
))
787 bool isc0
= v0
->is_const();
788 bool isc1
= v1
->is_const();
793 literal dv
, cv0
, cv1
;
796 cv0
= v0
->get_const_value();
797 apply_alu_src_mod(n
.bc
, 0, cv0
);
801 cv1
= v1
->get_const_value();
802 apply_alu_src_mod(n
.bc
, 1, cv1
);
807 if (!eval_const_op(n
.bc
.op
, dv
, cv0
, cv1
))
810 } else { // one source is const
812 if (isc0
&& cv0
== literal(0)) {
815 case ALU_OP2_ADD_INT
:
816 case ALU_OP2_MAX_UINT
:
818 case ALU_OP2_XOR_INT
:
819 convert_to_mov(n
, n
.src
[1], n
.bc
.src
[1].neg
, n
.bc
.src
[1].abs
);
820 return fold_alu_op1(n
);
821 case ALU_OP2_AND_INT
:
822 case ALU_OP2_ASHR_INT
:
823 case ALU_OP2_LSHL_INT
:
824 case ALU_OP2_LSHR_INT
:
825 case ALU_OP2_MIN_UINT
:
827 case ALU_OP2_MULHI_UINT
:
828 case ALU_OP2_MULLO_UINT
:
829 convert_to_mov(n
, sh
.get_const_value(literal(0)));
830 return fold_alu_op1(n
);
832 } else if (isc1
&& cv1
== literal(0)) {
835 case ALU_OP2_ADD_INT
:
836 case ALU_OP2_ASHR_INT
:
837 case ALU_OP2_LSHL_INT
:
838 case ALU_OP2_LSHR_INT
:
839 case ALU_OP2_MAX_UINT
:
841 case ALU_OP2_SUB_INT
:
842 case ALU_OP2_XOR_INT
:
843 convert_to_mov(n
, n
.src
[0], n
.bc
.src
[0].neg
, n
.bc
.src
[0].abs
);
844 return fold_alu_op1(n
);
845 case ALU_OP2_AND_INT
:
846 case ALU_OP2_MIN_UINT
:
848 case ALU_OP2_MULHI_UINT
:
849 case ALU_OP2_MULLO_UINT
:
850 convert_to_mov(n
, sh
.get_const_value(literal(0)));
851 return fold_alu_op1(n
);
853 } else if (isc0
&& cv0
== literal(1.0f
)) {
856 case ALU_OP2_MUL_IEEE
:
857 convert_to_mov(n
, n
.src
[1], n
.bc
.src
[1].neg
, n
.bc
.src
[1].abs
);
858 return fold_alu_op1(n
);
860 } else if (isc1
&& cv1
== literal(1.0f
)) {
863 case ALU_OP2_MUL_IEEE
:
864 convert_to_mov(n
, n
.src
[0], n
.bc
.src
[0].neg
, n
.bc
.src
[0].abs
);
865 return fold_alu_op1(n
);
872 apply_alu_dst_mod(n
.bc
, dv
);
873 assign_source(n
.dst
[0], get_const(dv
));
877 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags
,
878 literal s1
, literal s2
) {
880 unsigned cmp_type
= alu_cnd_flags
& AF_CMP_TYPE_MASK
;
881 unsigned cc
= alu_cnd_flags
& AF_CC_MASK
;
886 case AF_CC_E
: return s1
.f
== s2
.f
;
887 case AF_CC_GT
: return s1
.f
> s2
.f
;
888 case AF_CC_GE
: return s1
.f
>= s2
.f
;
889 case AF_CC_NE
: return s1
.f
!= s2
.f
;
890 case AF_CC_LT
: return s1
.f
< s2
.f
;
891 case AF_CC_LE
: return s1
.f
<= s2
.f
;
893 assert(!"invalid condition code");
899 case AF_CC_E
: return s1
.i
== s2
.i
;
900 case AF_CC_GT
: return s1
.i
> s2
.i
;
901 case AF_CC_GE
: return s1
.i
>= s2
.i
;
902 case AF_CC_NE
: return s1
.i
!= s2
.i
;
903 case AF_CC_LT
: return s1
.i
< s2
.i
;
904 case AF_CC_LE
: return s1
.i
<= s2
.i
;
906 assert(!"invalid condition code");
912 case AF_CC_E
: return s1
.u
== s2
.u
;
913 case AF_CC_GT
: return s1
.u
> s2
.u
;
914 case AF_CC_GE
: return s1
.u
>= s2
.u
;
915 case AF_CC_NE
: return s1
.u
!= s2
.u
;
916 case AF_CC_LT
: return s1
.u
< s2
.u
;
917 case AF_CC_LE
: return s1
.u
<= s2
.u
;
919 assert(!"invalid condition code");
924 assert(!"invalid cmp_type");
929 bool expr_handler::fold_alu_op3(alu_node
& n
) {
931 if (n
.src
.size() < 3)
934 if (!sh
.safe_math
&& (n
.bc
.op_ptr
->flags
& AF_M_ASSOC
)) {
939 value
* v0
= n
.src
[0]->gvalue();
940 value
* v1
= n
.src
[1]->gvalue();
941 value
* v2
= n
.src
[2]->gvalue();
943 assert(v0
&& v1
&& v2
&& n
.dst
[0]);
945 bool isc0
= v0
->is_const();
946 bool isc1
= v1
->is_const();
947 bool isc2
= v2
->is_const();
949 literal dv
, cv0
, cv1
, cv2
;
952 cv0
= v0
->get_const_value();
953 apply_alu_src_mod(n
.bc
, 0, cv0
);
957 cv1
= v1
->get_const_value();
958 apply_alu_src_mod(n
.bc
, 1, cv1
);
962 cv2
= v2
->get_const_value();
963 apply_alu_src_mod(n
.bc
, 2, cv2
);
966 unsigned flags
= n
.bc
.op_ptr
->flags
;
968 if (flags
& AF_CMOV
) {
971 if (v1
== v2
&& n
.bc
.src
[1].neg
== n
.bc
.src
[2].neg
) {
972 // result doesn't depend on condition, convert to MOV
975 // src0 is const, condition can be evaluated, convert to MOV
976 bool cond
= evaluate_condition(n
.bc
.op_ptr
->flags
& (AF_CC_MASK
|
977 AF_CMP_TYPE_MASK
), cv0
, literal(0));
982 // if src is selected, convert to MOV
983 convert_to_mov(n
, n
.src
[src
], n
.bc
.src
[src
].neg
);
984 return fold_alu_op1(n
);
988 // handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
989 if (!sh
.safe_math
&& (n
.bc
.op
== ALU_OP3_MULADD
||
990 n
.bc
.op
== ALU_OP3_MULADD_IEEE
)) {
992 unsigned op
= n
.bc
.op
== ALU_OP3_MULADD_IEEE
?
993 ALU_OP2_MUL_IEEE
: ALU_OP2_MUL
;
995 if (!isc2
&& v2
->def
&& v2
->def
->is_alu_op(op
)) {
997 alu_node
*md
= static_cast<alu_node
*>(v2
->def
);
998 value
*mv0
= md
->src
[0]->gvalue();
999 value
*mv1
= md
->src
[1]->gvalue();
1006 } else if (v0
== mv1
) {
1009 } else if (v1
== mv0
) {
1012 } else if (v1
== mv1
) {
1018 value
*va0
= es0
== 0 ? v1
: v0
;
1019 value
*va1
= es1
== 0 ? mv1
: mv0
;
1021 alu_node
*add
= sh
.create_alu();
1022 add
->bc
.set_op(ALU_OP2_ADD
);
1027 value
*t
= sh
.create_temp_value();
1032 add
->bc
.src
[0] = n
.bc
.src
[!es0
];
1033 add
->bc
.src
[1] = md
->bc
.src
[!es1
];
1035 add
->bc
.src
[1].neg
^= n
.bc
.src
[2].neg
^
1036 (n
.bc
.src
[es0
].neg
!= md
->bc
.src
[es1
].neg
);
1038 n
.insert_before(add
);
1044 n
.src
[0] = n
.src
[1];
1045 n
.bc
.src
[0] = n
.bc
.src
[1];
1049 memset(&n
.bc
.src
[1], 0, sizeof(bc_alu_src
));
1054 return fold_alu_op2(n
);
1059 if (!isc0
&& !isc1
&& !isc2
)
1062 if (isc0
&& isc1
&& isc2
) {
1064 case ALU_OP3_MULADD_IEEE
:
1065 case ALU_OP3_MULADD
: dv
= cv0
.f
* cv1
.f
+ cv2
.f
; break;
1075 case ALU_OP3_MULADD
:
1076 case ALU_OP3_MULADD_IEEE
:
1078 n
.bc
.set_op(ALU_OP2_ADD
);
1079 n
.src
[0] = sh
.get_const_value(dv
);
1080 memset(&n
.bc
.src
[0], 0, sizeof(bc_alu_src
));
1081 n
.src
[1] = n
.src
[2];
1082 n
.bc
.src
[1] = n
.bc
.src
[2];
1084 return fold_alu_op2(n
);
1088 if (n
.bc
.op
== ALU_OP3_MULADD
) {
1089 if ((isc0
&& cv0
== literal(0)) || (isc1
&& cv1
== literal(0))) {
1090 convert_to_mov(n
, n
.src
[2], n
.bc
.src
[2].neg
, n
.bc
.src
[2].abs
);
1091 return fold_alu_op1(n
);
1095 if (n
.bc
.op
== ALU_OP3_MULADD
|| n
.bc
.op
== ALU_OP3_MULADD_IEEE
) {
1096 unsigned op
= n
.bc
.op
== ALU_OP3_MULADD_IEEE
?
1097 ALU_OP2_MUL_IEEE
: ALU_OP2_MUL
;
1099 if (isc1
&& v0
== v2
) {
1100 cv1
.f
+= (n
.bc
.src
[2].neg
!= n
.bc
.src
[0].neg
? -1.0f
: 1.0f
);
1101 n
.src
[1] = sh
.get_const_value(cv1
);
1102 n
.bc
.src
[1].neg
= 0;
1103 n
.bc
.src
[1].abs
= 0;
1106 return fold_alu_op2(n
);
1107 } else if (isc0
&& v1
== v2
) {
1108 cv0
.f
+= (n
.bc
.src
[2].neg
!= n
.bc
.src
[1].neg
? -1.0f
: 1.0f
);
1109 n
.src
[0] = sh
.get_const_value(cv0
);
1110 n
.bc
.src
[0].neg
= 0;
1111 n
.bc
.src
[0].abs
= 0;
1114 return fold_alu_op2(n
);
1121 apply_alu_dst_mod(n
.bc
, dv
);
1122 assign_source(n
.dst
[0], get_const(dv
));
1126 unsigned invert_setcc_condition(unsigned cc
, bool &swap_args
) {
1130 case AF_CC_E
: ncc
= AF_CC_NE
; break;
1131 case AF_CC_NE
: ncc
= AF_CC_E
; break;
1132 case AF_CC_GE
: ncc
= AF_CC_GT
; swap_args
= true; break;
1133 case AF_CC_GT
: ncc
= AF_CC_GE
; swap_args
= true; break;
1135 assert(!"unexpected condition code");
1141 unsigned get_setcc_op(unsigned cc
, unsigned cmp_type
, bool int_dst
) {
1143 if (int_dst
&& cmp_type
== AF_FLOAT_CMP
) {
1145 case AF_CC_E
: return ALU_OP2_SETE_DX10
;
1146 case AF_CC_NE
: return ALU_OP2_SETNE_DX10
;
1147 case AF_CC_GT
: return ALU_OP2_SETGT_DX10
;
1148 case AF_CC_GE
: return ALU_OP2_SETGE_DX10
;
1153 case AF_FLOAT_CMP
: {
1155 case AF_CC_E
: return ALU_OP2_SETE
;
1156 case AF_CC_NE
: return ALU_OP2_SETNE
;
1157 case AF_CC_GT
: return ALU_OP2_SETGT
;
1158 case AF_CC_GE
: return ALU_OP2_SETGE
;
1164 case AF_CC_E
: return ALU_OP2_SETE_INT
;
1165 case AF_CC_NE
: return ALU_OP2_SETNE_INT
;
1166 case AF_CC_GT
: return ALU_OP2_SETGT_INT
;
1167 case AF_CC_GE
: return ALU_OP2_SETGE_INT
;
1173 case AF_CC_E
: return ALU_OP2_SETE_INT
;
1174 case AF_CC_NE
: return ALU_OP2_SETNE_INT
;
1175 case AF_CC_GT
: return ALU_OP2_SETGT_UINT
;
1176 case AF_CC_GE
: return ALU_OP2_SETGE_UINT
;
1183 assert(!"unexpected cc&cmp_type combination");
1187 unsigned get_predsetcc_op(unsigned cc
, unsigned cmp_type
) {
1190 case AF_FLOAT_CMP
: {
1192 case AF_CC_E
: return ALU_OP2_PRED_SETE
;
1193 case AF_CC_NE
: return ALU_OP2_PRED_SETNE
;
1194 case AF_CC_GT
: return ALU_OP2_PRED_SETGT
;
1195 case AF_CC_GE
: return ALU_OP2_PRED_SETGE
;
1201 case AF_CC_E
: return ALU_OP2_PRED_SETE_INT
;
1202 case AF_CC_NE
: return ALU_OP2_PRED_SETNE_INT
;
1203 case AF_CC_GT
: return ALU_OP2_PRED_SETGT_INT
;
1204 case AF_CC_GE
: return ALU_OP2_PRED_SETGE_INT
;
1210 case AF_CC_E
: return ALU_OP2_PRED_SETE_INT
;
1211 case AF_CC_NE
: return ALU_OP2_PRED_SETNE_INT
;
1212 case AF_CC_GT
: return ALU_OP2_PRED_SETGT_UINT
;
1213 case AF_CC_GE
: return ALU_OP2_PRED_SETGE_UINT
;
1219 assert(!"unexpected cc&cmp_type combination");
1223 unsigned get_killcc_op(unsigned cc
, unsigned cmp_type
) {
1226 case AF_FLOAT_CMP
: {
1228 case AF_CC_E
: return ALU_OP2_KILLE
;
1229 case AF_CC_NE
: return ALU_OP2_KILLNE
;
1230 case AF_CC_GT
: return ALU_OP2_KILLGT
;
1231 case AF_CC_GE
: return ALU_OP2_KILLGE
;
1237 case AF_CC_E
: return ALU_OP2_KILLE_INT
;
1238 case AF_CC_NE
: return ALU_OP2_KILLNE_INT
;
1239 case AF_CC_GT
: return ALU_OP2_KILLGT_INT
;
1240 case AF_CC_GE
: return ALU_OP2_KILLGE_INT
;
1246 case AF_CC_E
: return ALU_OP2_KILLE_INT
;
1247 case AF_CC_NE
: return ALU_OP2_KILLNE_INT
;
1248 case AF_CC_GT
: return ALU_OP2_KILLGT_UINT
;
1249 case AF_CC_GE
: return ALU_OP2_KILLGE_UINT
;
1255 assert(!"unexpected cc&cmp_type combination");
1259 unsigned get_cndcc_op(unsigned cc
, unsigned cmp_type
) {
1262 case AF_FLOAT_CMP
: {
1264 case AF_CC_E
: return ALU_OP3_CNDE
;
1265 case AF_CC_GT
: return ALU_OP3_CNDGT
;
1266 case AF_CC_GE
: return ALU_OP3_CNDGE
;
1272 case AF_CC_E
: return ALU_OP3_CNDE_INT
;
1273 case AF_CC_GT
: return ALU_OP3_CNDGT_INT
;
1274 case AF_CC_GE
: return ALU_OP3_CNDGE_INT
;
1280 assert(!"unexpected cc&cmp_type combination");
1285 void convert_predset_to_set(shader
& sh
, alu_node
* a
) {
1287 unsigned flags
= a
->bc
.op_ptr
->flags
;
1288 unsigned cc
= flags
& AF_CC_MASK
;
1289 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
1291 bool swap_args
= false;
1293 cc
= invert_setcc_condition(cc
, swap_args
);
1295 unsigned newop
= get_setcc_op(cc
, cmp_type
, true);
1298 a
->bc
.set_op(newop
);
1301 std::swap(a
->src
[0], a
->src
[1]);
1302 std::swap(a
->bc
.src
[0], a
->bc
.src
[1]);
1305 a
->bc
.update_exec_mask
= 0;
1306 a
->bc
.update_pred
= 0;
1309 } // namespace r600_sb