2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "sb_shader.h"
33 value
* get_select_value_for_em(shader
& sh
, value
* em
) {
37 node
*predset
= em
->def
;
38 if (!predset
->is_pred_set())
41 alu_node
*s
= sh
.clone(static_cast<alu_node
*>(predset
));
42 convert_predset_to_set(sh
, s
);
44 predset
->insert_after(s
);
46 value
* &d0
= s
->dst
[0];
47 d0
= sh
.create_temp_value();
52 expr_handler::expr_handler(shader
& sh
) : sh(sh
), vt(sh
.vt
) {}
54 value
* expr_handler::get_const(const literal
&l
) {
55 value
*v
= sh
.get_const_value(l
);
61 void expr_handler::assign_source(value
*dst
, value
*src
) {
62 dst
->gvn_source
= src
->gvn_source
;
65 bool expr_handler::equal(value
*l
, value
*r
) {
69 if (l
->gvalue() == r
->gvalue())
73 return defs_equal(l
, r
);
75 if (l
->is_rel() && r
->is_rel())
76 return ivars_equal(l
, r
);
81 bool expr_handler::ivars_equal(value
* l
, value
* r
) {
82 if (l
->rel
->gvalue() == r
->rel
->gvalue()
83 && l
->select
== r
->select
) {
85 vvec
&lv
= l
->mdef
.empty() ? l
->muse
: l
->mdef
;
86 vvec
&rv
= r
->mdef
.empty() ? r
->muse
: r
->mdef
;
88 // FIXME: replace this with more precise aliasing test
94 bool expr_handler::defs_equal(value
* l
, value
* r
) {
99 if (d1
->type
!= d2
->type
|| d1
->subtype
!= d2
->subtype
)
102 if (d1
->is_pred_set() || d2
->is_pred_set())
105 if (d1
->type
== NT_OP
) {
106 switch (d1
->subtype
) {
109 static_cast<alu_node
*>(d1
),
110 static_cast<alu_node
*>(d2
));
111 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
112 // static_cast<fetch_node*>(d2);
113 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
114 // static_cast<cf_node*>(d2);
122 bool expr_handler::try_fold(value
* v
) {
123 assert(!v
->gvn_source
);
134 bool expr_handler::try_fold(node
* n
) {
135 return n
->fold_dispatch(this);
138 bool expr_handler::fold(node
& n
) {
139 if (n
.subtype
== NST_PHI
) {
143 // FIXME disabling phi folding for registers for now, otherwise we lose
144 // control flow information in some cases
145 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
146 // probably control flow transformation is required to enable it
150 for(vvec::iterator I
= n
.src
.begin() + 1, E
= n
.src
.end(); I
!= E
; ++I
) {
156 assign_source(n
.dst
[0], s
);
158 assert(n
.subtype
== NST_PSI
);
159 assert(n
.src
.size() >= 6);
162 assert(s
->gvn_source
);
164 for(vvec::iterator I
= n
.src
.begin() + 3, E
= n
.src
.end(); I
!= E
; I
+= 3) {
169 assign_source(n
.dst
[0], s
);
174 bool expr_handler::fold(container_node
& n
) {
178 bool expr_handler::fold_setcc(alu_node
&n
) {
185 bool expr_handler::fold(alu_node
& n
) {
187 if (n
.bc
.op_ptr
->flags
& (AF_PRED
| AF_KILL
)) {
192 switch (n
.bc
.op_ptr
->src_count
) {
193 case 1: return fold_alu_op1(n
);
194 case 2: return fold_alu_op2(n
);
195 case 3: return fold_alu_op3(n
);
202 bool expr_handler::fold(fetch_node
& n
) {
205 for (vvec::iterator I
= n
.dst
.begin(), E
= n
.dst
.end(); I
!= E
; ++I
) {
208 if (n
.bc
.dst_sel
[chan
] == SEL_0
)
209 assign_source(*I
, get_const(0.0f
));
210 else if (n
.bc
.dst_sel
[chan
] == SEL_1
)
211 assign_source(*I
, get_const(1.0f
));
218 bool expr_handler::fold(cf_node
& n
) {
222 void expr_handler::apply_alu_src_mod(const bc_alu
&bc
, unsigned src
,
224 const bc_alu_src
&s
= bc
.src
[src
];
232 void expr_handler::apply_alu_dst_mod(const bc_alu
&bc
, literal
&v
) {
233 float omod_coeff
[] = {2.0f
, 4.0, 0.5f
};
236 v
= v
.f
* omod_coeff
[bc
.omod
- 1];
238 v
= float_clamp(v
.f
);
241 bool expr_handler::args_equal(const vvec
&l
, const vvec
&r
) {
243 assert(l
.size() == r
.size());
247 for (int k
= 0; k
< s
; ++k
) {
248 if (!l
[k
]->v_equal(r
[k
]))
255 bool expr_handler::ops_equal(const alu_node
*l
, const alu_node
* r
) {
256 const bc_alu
&b0
= l
->bc
;
257 const bc_alu
&b1
= r
->bc
;
262 unsigned src_count
= b0
.op_ptr
->src_count
;
264 if (b0
.index_mode
!= b1
.index_mode
)
267 if (b0
.clamp
!= b1
.clamp
|| b0
.omod
!= b1
.omod
)
270 for (unsigned s
= 0; s
< src_count
; ++s
) {
271 const bc_alu_src
&s0
= b0
.src
[s
];
272 const bc_alu_src
&s1
= b1
.src
[s
];
274 if (s0
.abs
!= s1
.abs
|| s0
.neg
!= s1
.neg
)
277 return args_equal(l
->src
, r
->src
);
280 bool expr_handler::fold_alu_op1(alu_node
& n
) {
282 assert(!n
.src
.empty());
286 value
* v0
= n
.src
[0];
288 assert(v0
&& n
.dst
[0]);
290 if (!v0
->is_const()) {
291 if ((n
.bc
.op
== ALU_OP1_MOV
|| n
.bc
.op
== ALU_OP1_MOVA_INT
||
292 n
.bc
.op
== ALU_OP1_MOVA_GPR_INT
)
293 && n
.bc
.clamp
== 0 && n
.bc
.omod
== 0
294 && n
.bc
.src
[0].abs
== 0 && n
.bc
.src
[0].neg
== 0) {
295 assign_source(n
.dst
[0], v0
);
301 literal dv
, cv
= v0
->get_const_value();
302 apply_alu_src_mod(n
.bc
, 0, cv
);
305 case ALU_OP1_CEIL
: dv
= ceil(cv
.f
); break;
306 case ALU_OP1_COS
: dv
= cos(cv
.f
* 2.0f
* M_PI
); break;
307 case ALU_OP1_EXP_IEEE
: dv
= exp2(cv
.f
); break;
308 case ALU_OP1_FLOOR
: dv
= floor(cv
.f
); break;
309 case ALU_OP1_FLT_TO_INT
: dv
= (int)cv
.f
; break; // FIXME: round modes ????
310 case ALU_OP1_FLT_TO_INT_FLOOR
: dv
= (int32_t)floor(cv
.f
); break;
311 case ALU_OP1_FLT_TO_INT_RPI
: dv
= (int32_t)floor(cv
.f
+ 0.5f
); break;
312 case ALU_OP1_FLT_TO_INT_TRUNC
: dv
= (int32_t)trunc(cv
.f
); break;
313 case ALU_OP1_FLT_TO_UINT
: dv
= (uint32_t)cv
.f
; break;
314 case ALU_OP1_FRACT
: dv
= cv
.f
- floor(cv
.f
); break;
315 case ALU_OP1_INT_TO_FLT
: dv
= (float)cv
.i
; break;
316 case ALU_OP1_LOG_CLAMPED
:
317 case ALU_OP1_LOG_IEEE
:
321 // don't fold to NAN, let the GPU handle it for now
322 // (prevents degenerate LIT tests from failing)
325 case ALU_OP1_MOV
: dv
= cv
; break;
326 case ALU_OP1_MOVA_INT
: dv
= cv
; break; // FIXME ???
327 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
328 // case ALU_OP1_MOVA_GPR_INT:
329 case ALU_OP1_NOT_INT
: dv
= ~cv
.i
; break;
330 case ALU_OP1_PRED_SET_INV
:
331 dv
= cv
.f
== 0.0f
? 1.0f
: (cv
.f
== 1.0f
? 0.0f
: cv
.f
); break;
332 case ALU_OP1_PRED_SET_RESTORE
: dv
= cv
; break;
333 case ALU_OP1_RECIPSQRT_CLAMPED
:
334 case ALU_OP1_RECIPSQRT_FF
:
335 case ALU_OP1_RECIPSQRT_IEEE
: dv
= 1.0f
/ sqrt(cv
.f
); break;
336 case ALU_OP1_RECIP_CLAMPED
:
337 case ALU_OP1_RECIP_FF
:
338 case ALU_OP1_RECIP_IEEE
: dv
= 1.0f
/ cv
.f
; break;
339 // case ALU_OP1_RECIP_INT:
340 // case ALU_OP1_RECIP_UINT:
341 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
342 case ALU_OP1_SIN
: dv
= sin(cv
.f
* 2.0f
* M_PI
); break;
343 case ALU_OP1_SQRT_IEEE
: dv
= sqrt(cv
.f
); break;
344 case ALU_OP1_TRUNC
: dv
= trunc(cv
.f
); break;
350 apply_alu_dst_mod(n
.bc
, dv
);
351 assign_source(n
.dst
[0], get_const(dv
));
355 bool expr_handler::fold_alu_op2(alu_node
& n
) {
357 if (n
.src
.size() < 2)
360 value
* v0
= n
.src
[0];
361 value
* v1
= n
.src
[1];
363 assert(v0
&& v1
&& n
.dst
[0]);
365 bool isc0
= v0
->is_const();
366 bool isc1
= v1
->is_const();
371 literal dv
, cv0
, cv1
;
374 cv0
= v0
->get_const_value();
375 apply_alu_src_mod(n
.bc
, 0, cv0
);
379 cv1
= v1
->get_const_value();
380 apply_alu_src_mod(n
.bc
, 1, cv1
);
385 case ALU_OP2_ADD
: dv
= cv0
.f
+ cv1
.f
; break;
386 case ALU_OP2_ADDC_UINT
:
387 dv
= (uint32_t)(((uint64_t)cv0
.u
+ cv1
.u
)>>32); break;
388 case ALU_OP2_ADD_INT
: dv
= cv0
.i
+ cv1
.i
; break;
389 case ALU_OP2_AND_INT
: dv
= cv0
.i
& cv1
.i
; break;
390 case ALU_OP2_ASHR_INT
: dv
= cv0
.i
>> (cv1
.i
& 0x1F); break;
391 case ALU_OP2_BFM_INT
:
392 dv
= (((1 << (cv0
.i
& 0x1F)) - 1) << (cv1
.i
& 0x1F)); break;
393 case ALU_OP2_LSHL_INT
: dv
= cv0
.i
<< cv1
.i
; break;
394 case ALU_OP2_LSHR_INT
: dv
= cv0
.u
>> cv1
.u
; break;
396 case ALU_OP2_MAX_DX10
: dv
= cv0
.f
> cv1
.f
? cv0
.f
: cv1
.f
; break;
397 case ALU_OP2_MAX_INT
: dv
= cv0
.i
> cv1
.i
? cv0
.i
: cv1
.i
; break;
398 case ALU_OP2_MAX_UINT
: dv
= cv0
.u
> cv1
.u
? cv0
.u
: cv1
.u
; break;
400 case ALU_OP2_MIN_DX10
: dv
= cv0
.f
< cv1
.f
? cv0
.f
: cv1
.f
; break;
401 case ALU_OP2_MIN_INT
: dv
= cv0
.i
< cv1
.i
? cv0
.i
: cv1
.i
; break;
402 case ALU_OP2_MIN_UINT
: dv
= cv0
.u
< cv1
.u
? cv0
.u
: cv1
.u
; break;
404 case ALU_OP2_MUL_IEEE
: dv
= cv0
.f
* cv1
.f
; break;
405 case ALU_OP2_MULHI_INT
:
406 dv
= (int32_t)(((int64_t)cv0
.u
* cv1
.u
)>>32); break;
407 case ALU_OP2_MULHI_UINT
:
408 dv
= (uint32_t)(((uint64_t)cv0
.u
* cv1
.u
)>>32); break;
409 case ALU_OP2_MULLO_INT
:
410 dv
= (int32_t)(((int64_t)cv0
.u
* cv1
.u
) & 0xFFFFFFFF); break;
411 case ALU_OP2_MULLO_UINT
:
412 dv
= (uint32_t)(((uint64_t)cv0
.u
* cv1
.u
) & 0xFFFFFFFF); break;
413 case ALU_OP2_OR_INT
: dv
= cv0
.i
| cv1
.i
; break;
414 case ALU_OP2_SUB_INT
: dv
= cv0
.i
- cv1
.i
; break;
415 case ALU_OP2_XOR_INT
: dv
= cv0
.i
^ cv1
.i
; break;
417 case ALU_OP2_SETE
: dv
= cv0
.f
== cv1
.f
? 1.0f
: 0.0f
; break;
423 } else { // one source is const
425 // TODO handle 1 * anything, 0 * anything, 0 + anything, etc
430 apply_alu_dst_mod(n
.bc
, dv
);
431 assign_source(n
.dst
[0], get_const(dv
));
435 bool expr_handler::evaluate_condition(unsigned alu_cnd_flags
,
436 literal s1
, literal s2
) {
438 unsigned cmp_type
= alu_cnd_flags
& AF_CMP_TYPE_MASK
;
439 unsigned cc
= alu_cnd_flags
& AF_CC_MASK
;
444 case AF_CC_E
: return s1
.f
== s2
.f
;
445 case AF_CC_GT
: return s1
.f
> s2
.f
;
446 case AF_CC_GE
: return s1
.f
>= s2
.f
;
447 case AF_CC_NE
: return s1
.f
!= s2
.f
;
448 case AF_CC_LT
: return s1
.f
< s2
.f
;
449 case AF_CC_LE
: return s1
.f
<= s2
.f
;
451 assert(!"invalid condition code");
457 case AF_CC_E
: return s1
.i
== s2
.i
;
458 case AF_CC_GT
: return s1
.i
> s2
.i
;
459 case AF_CC_GE
: return s1
.i
>= s2
.i
;
460 case AF_CC_NE
: return s1
.i
!= s2
.i
;
461 case AF_CC_LT
: return s1
.i
< s2
.i
;
462 case AF_CC_LE
: return s1
.i
<= s2
.i
;
464 assert(!"invalid condition code");
470 case AF_CC_E
: return s1
.u
== s2
.u
;
471 case AF_CC_GT
: return s1
.u
> s2
.u
;
472 case AF_CC_GE
: return s1
.u
>= s2
.u
;
473 case AF_CC_NE
: return s1
.u
!= s2
.u
;
474 case AF_CC_LT
: return s1
.u
< s2
.u
;
475 case AF_CC_LE
: return s1
.u
<= s2
.u
;
477 assert(!"invalid condition code");
482 assert(!"invalid cmp_type");
487 bool expr_handler::fold_alu_op3(alu_node
& n
) {
489 if (n
.src
.size() < 3)
492 value
* v0
= n
.src
[0];
493 value
* v1
= n
.src
[1];
494 value
* v2
= n
.src
[2];
496 assert(v0
&& v1
&& v2
&& n
.dst
[0]);
498 bool isc0
= v0
->is_const();
499 bool isc1
= v1
->is_const();
500 bool isc2
= v2
->is_const();
502 literal dv
, cv0
, cv1
, cv2
;
505 cv0
= v0
->get_const_value();
506 apply_alu_src_mod(n
.bc
, 0, cv0
);
510 cv1
= v1
->get_const_value();
511 apply_alu_src_mod(n
.bc
, 1, cv1
);
515 cv2
= v2
->get_const_value();
516 apply_alu_src_mod(n
.bc
, 2, cv2
);
519 if (n
.bc
.op_ptr
->flags
& AF_CMOV
) {
522 if (v1
->gvalue() == v2
->gvalue() &&
523 n
.bc
.src
[1].neg
== n
.bc
.src
[2].neg
) {
524 // result doesn't depend on condition, convert to MOV
527 // src0 is const, condition can be evaluated, convert to MOV
528 bool cond
= evaluate_condition(n
.bc
.op_ptr
->flags
& (AF_CC_MASK
|
529 AF_CMP_TYPE_MASK
), cv0
, literal(0));
534 // if src is selected, convert to MOV
535 n
.bc
.src
[0] = n
.bc
.src
[src
];
536 n
.src
[0] = n
.src
[src
];
538 n
.bc
.set_op(ALU_OP1_MOV
);
539 return fold_alu_op1(n
);
543 if (!isc0
&& !isc1
&& !isc2
)
546 if (isc0
&& isc1
&& isc2
) {
548 case ALU_OP3_MULADD
: dv
= cv0
.f
* cv1
.f
+ cv2
.f
; break;
563 apply_alu_dst_mod(n
.bc
, dv
);
564 assign_source(n
.dst
[0], get_const(dv
));
568 unsigned invert_setcc_condition(unsigned cc
, bool &swap_args
) {
572 case AF_CC_E
: ncc
= AF_CC_NE
; break;
573 case AF_CC_NE
: ncc
= AF_CC_E
; break;
574 case AF_CC_GE
: ncc
= AF_CC_GT
; swap_args
= true; break;
575 case AF_CC_GT
: ncc
= AF_CC_GE
; swap_args
= true; break;
577 assert(!"unexpected condition code");
583 unsigned get_setcc_op(unsigned cc
, unsigned cmp_type
, bool int_dst
) {
585 if (int_dst
&& cmp_type
== AF_FLOAT_CMP
) {
587 case AF_CC_E
: return ALU_OP2_SETE_DX10
;
588 case AF_CC_NE
: return ALU_OP2_SETNE_DX10
;
589 case AF_CC_GT
: return ALU_OP2_SETGT_DX10
;
590 case AF_CC_GE
: return ALU_OP2_SETGE_DX10
;
597 case AF_CC_E
: return ALU_OP2_SETE
;
598 case AF_CC_NE
: return ALU_OP2_SETNE
;
599 case AF_CC_GT
: return ALU_OP2_SETGT
;
600 case AF_CC_GE
: return ALU_OP2_SETGE
;
606 case AF_CC_E
: return ALU_OP2_SETE_INT
;
607 case AF_CC_NE
: return ALU_OP2_SETNE_INT
;
608 case AF_CC_GT
: return ALU_OP2_SETGT_INT
;
609 case AF_CC_GE
: return ALU_OP2_SETGE_INT
;
615 case AF_CC_E
: return ALU_OP2_SETE_INT
;
616 case AF_CC_NE
: return ALU_OP2_SETNE_INT
;
617 case AF_CC_GT
: return ALU_OP2_SETGT_UINT
;
618 case AF_CC_GE
: return ALU_OP2_SETGE_UINT
;
625 assert(!"unexpected cc&cmp_type combination");
629 unsigned get_predsetcc_op(unsigned cc
, unsigned cmp_type
) {
634 case AF_CC_E
: return ALU_OP2_PRED_SETE
;
635 case AF_CC_NE
: return ALU_OP2_PRED_SETNE
;
636 case AF_CC_GT
: return ALU_OP2_PRED_SETGT
;
637 case AF_CC_GE
: return ALU_OP2_PRED_SETGE
;
643 case AF_CC_E
: return ALU_OP2_PRED_SETE_INT
;
644 case AF_CC_NE
: return ALU_OP2_PRED_SETNE_INT
;
645 case AF_CC_GT
: return ALU_OP2_PRED_SETGT_INT
;
646 case AF_CC_GE
: return ALU_OP2_PRED_SETGE_INT
;
652 case AF_CC_E
: return ALU_OP2_PRED_SETE_INT
;
653 case AF_CC_NE
: return ALU_OP2_PRED_SETNE_INT
;
654 case AF_CC_GT
: return ALU_OP2_PRED_SETGT_UINT
;
655 case AF_CC_GE
: return ALU_OP2_PRED_SETGE_UINT
;
661 assert(!"unexpected cc&cmp_type combination");
665 unsigned get_killcc_op(unsigned cc
, unsigned cmp_type
) {
670 case AF_CC_E
: return ALU_OP2_KILLE
;
671 case AF_CC_NE
: return ALU_OP2_KILLNE
;
672 case AF_CC_GT
: return ALU_OP2_KILLGT
;
673 case AF_CC_GE
: return ALU_OP2_KILLGE
;
679 case AF_CC_E
: return ALU_OP2_KILLE_INT
;
680 case AF_CC_NE
: return ALU_OP2_KILLNE_INT
;
681 case AF_CC_GT
: return ALU_OP2_KILLGT_INT
;
682 case AF_CC_GE
: return ALU_OP2_KILLGE_INT
;
688 case AF_CC_E
: return ALU_OP2_KILLE_INT
;
689 case AF_CC_NE
: return ALU_OP2_KILLNE_INT
;
690 case AF_CC_GT
: return ALU_OP2_KILLGT_UINT
;
691 case AF_CC_GE
: return ALU_OP2_KILLGE_UINT
;
697 assert(!"unexpected cc&cmp_type combination");
701 unsigned get_cndcc_op(unsigned cc
, unsigned cmp_type
) {
706 case AF_CC_E
: return ALU_OP3_CNDE
;
707 case AF_CC_GT
: return ALU_OP3_CNDGT
;
708 case AF_CC_GE
: return ALU_OP3_CNDGE
;
714 case AF_CC_E
: return ALU_OP3_CNDE_INT
;
715 case AF_CC_GT
: return ALU_OP3_CNDGT_INT
;
716 case AF_CC_GE
: return ALU_OP3_CNDGE_INT
;
722 assert(!"unexpected cc&cmp_type combination");
727 void convert_predset_to_set(shader
& sh
, alu_node
* a
) {
729 unsigned flags
= a
->bc
.op_ptr
->flags
;
730 unsigned cc
= flags
& AF_CC_MASK
;
731 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
733 bool swap_args
= false;
735 cc
= invert_setcc_condition(cc
, swap_args
);
737 unsigned newop
= get_setcc_op(cc
, cmp_type
, true);
743 std::swap(a
->src
[0], a
->src
[1]);
744 std::swap(a
->bc
.src
[0], a
->bc
.src
[1]);
747 a
->bc
.update_exec_mask
= 0;
748 a
->bc
.update_pred
= 0;
751 } // namespace r600_sb