2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "sb_shader.h"
33 value
* get_select_value_for_em(shader
& sh
, value
* em
) {
37 node
*predset
= em
->def
;
38 if (!predset
->is_pred_set())
41 alu_node
*s
= sh
.clone(static_cast<alu_node
*>(predset
));
42 convert_predset_to_set(sh
, s
);
44 predset
->insert_after(s
);
46 value
* &d0
= s
->dst
[0];
47 d0
= sh
.create_temp_value();
52 expr_handler::expr_handler(shader
& sh
) : sh(sh
), vt(sh
.vt
) {}
54 value
* expr_handler::get_const(const literal
&l
) {
55 value
*v
= sh
.get_const_value(l
);
61 void expr_handler::assign_source(value
*dst
, value
*src
) {
62 dst
->gvn_source
= src
->gvn_source
;
65 bool expr_handler::equal(value
*l
, value
*r
) {
69 if (l
->gvalue() == r
->gvalue())
73 return defs_equal(l
, r
);
75 if (l
->is_rel() && r
->is_rel())
76 return ivars_equal(l
, r
);
81 bool expr_handler::ivars_equal(value
* l
, value
* r
) {
82 if (l
->rel
->gvalue() == r
->rel
->gvalue()
83 && l
->select
== r
->select
) {
85 vvec
&lv
= l
->mdef
.empty() ? l
->muse
: l
->mdef
;
86 vvec
&rv
= r
->mdef
.empty() ? r
->muse
: r
->mdef
;
88 // FIXME: replace this with more precise aliasing test
94 bool expr_handler::defs_equal(value
* l
, value
* r
) {
99 if (d1
->type
!= d2
->type
|| d1
->subtype
!= d2
->subtype
)
102 if (d1
->is_pred_set() || d2
->is_pred_set())
105 if (d1
->type
== NT_OP
) {
106 switch (d1
->subtype
) {
109 static_cast<alu_node
*>(d1
),
110 static_cast<alu_node
*>(d2
));
111 // case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
112 // static_cast<fetch_node*>(d2);
113 // case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
114 // static_cast<cf_node*>(d2);
122 bool expr_handler::try_fold(value
* v
) {
123 assert(!v
->gvn_source
);
134 bool expr_handler::try_fold(node
* n
) {
135 return n
->fold_dispatch(this);
138 bool expr_handler::fold(node
& n
) {
139 if (n
.subtype
== NST_PHI
) {
143 // FIXME disabling phi folding for registers for now, otherwise we lose
144 // control flow information in some cases
145 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
146 // probably control flow transformation is required to enable it
150 for(vvec::iterator I
= n
.src
.begin() + 1, E
= n
.src
.end(); I
!= E
; ++I
) {
156 assign_source(n
.dst
[0], s
);
158 assert(n
.subtype
== NST_PSI
);
159 assert(n
.src
.size() >= 6);
162 assert(s
->gvn_source
);
164 for(vvec::iterator I
= n
.src
.begin() + 3, E
= n
.src
.end(); I
!= E
; I
+= 3) {
169 assign_source(n
.dst
[0], s
);
174 bool expr_handler::fold(container_node
& n
) {
178 bool expr_handler::fold_setcc(alu_node
&n
) {
185 bool expr_handler::fold(alu_node
& n
) {
187 if (n
.bc
.op_ptr
->flags
& (AF_PRED
| AF_KILL
)) {
192 switch (n
.bc
.op_ptr
->src_count
) {
193 case 1: return fold_alu_op1(n
);
194 case 2: return fold_alu_op2(n
);
195 case 3: return fold_alu_op3(n
);
202 bool expr_handler::fold(fetch_node
& n
) {
205 for (vvec::iterator I
= n
.dst
.begin(), E
= n
.dst
.end(); I
!= E
; ++I
) {
208 if (n
.bc
.dst_sel
[chan
] == SEL_0
)
209 assign_source(*I
, get_const(0.0f
));
210 else if (n
.bc
.dst_sel
[chan
] == SEL_1
)
211 assign_source(*I
, get_const(1.0f
));
218 bool expr_handler::fold(cf_node
& n
) {
222 void expr_handler::apply_alu_src_mod(const bc_alu
&bc
, unsigned src
,
224 const bc_alu_src
&s
= bc
.src
[src
];
232 void expr_handler::apply_alu_dst_mod(const bc_alu
&bc
, literal
&v
) {
233 float omod_coeff
[] = {2.0f
, 4.0, 0.5f
};
236 v
= v
.f
* omod_coeff
[bc
.omod
- 1];
238 v
= float_clamp(v
.f
);
241 bool expr_handler::args_equal(const vvec
&l
, const vvec
&r
) {
243 assert(l
.size() == r
.size());
247 for (int k
= 0; k
< s
; ++k
) {
248 if (!l
[k
]->v_equal(r
[k
]))
255 bool expr_handler::ops_equal(const alu_node
*l
, const alu_node
* r
) {
256 const bc_alu
&b0
= l
->bc
;
257 const bc_alu
&b1
= r
->bc
;
262 unsigned src_count
= b0
.op_ptr
->src_count
;
264 if (b0
.index_mode
!= b1
.index_mode
)
267 if (b0
.clamp
!= b1
.clamp
|| b0
.omod
!= b1
.omod
)
270 for (unsigned s
= 0; s
< src_count
; ++s
) {
271 const bc_alu_src
&s0
= b0
.src
[s
];
272 const bc_alu_src
&s1
= b1
.src
[s
];
274 if (s0
.abs
!= s1
.abs
|| s0
.neg
!= s1
.neg
)
277 return args_equal(l
->src
, r
->src
);
280 bool expr_handler::fold_alu_op1(alu_node
& n
) {
282 assert(!n
.src
.empty());
286 value
* v0
= n
.src
[0];
288 assert(v0
&& n
.dst
[0]);
290 if (!v0
->is_const()) {
291 if ((n
.bc
.op
== ALU_OP1_MOV
|| n
.bc
.op
== ALU_OP1_MOVA_INT
||
292 n
.bc
.op
== ALU_OP1_MOVA_GPR_INT
)
293 && n
.bc
.clamp
== 0 && n
.bc
.omod
== 0
294 && n
.bc
.src
[0].abs
== 0 && n
.bc
.src
[0].neg
== 0) {
295 assign_source(n
.dst
[0], v0
);
301 literal dv
, cv
= v0
->get_const_value();
302 apply_alu_src_mod(n
.bc
, 0, cv
);
305 case ALU_OP1_CEIL
: dv
= ceil(cv
.f
); break;
306 case ALU_OP1_COS
: dv
= cos(cv
.f
* 2.0f
* M_PI
); break;
307 case ALU_OP1_EXP_IEEE
: dv
= exp2(cv
.f
); break;
308 case ALU_OP1_FLOOR
: dv
= floor(cv
.f
); break;
309 case ALU_OP1_FLT_TO_INT
: dv
= (int)cv
.f
; break; // FIXME: round modes ????
310 case ALU_OP1_FLT_TO_INT_FLOOR
: dv
= (int32_t)floor(cv
.f
); break;
311 case ALU_OP1_FLT_TO_INT_RPI
: dv
= (int32_t)floor(cv
.f
+ 0.5f
); break;
312 case ALU_OP1_FLT_TO_INT_TRUNC
: dv
= (int32_t)trunc(cv
.f
); break;
313 case ALU_OP1_FLT_TO_UINT
: dv
= (uint32_t)cv
.f
; break;
314 case ALU_OP1_FRACT
: dv
= cv
.f
- floor(cv
.f
); break;
315 case ALU_OP1_INT_TO_FLT
: dv
= (float)cv
.i
; break;
316 case ALU_OP1_LOG_CLAMPED
:
317 case ALU_OP1_LOG_IEEE
:
321 // don't fold to NAN, let the GPU handle it for now
322 // (prevents degenerate LIT tests from failing)
325 case ALU_OP1_MOV
: dv
= cv
; break;
326 case ALU_OP1_MOVA_INT
: dv
= cv
; break; // FIXME ???
327 // case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
328 // case ALU_OP1_MOVA_GPR_INT:
329 case ALU_OP1_NOT_INT
: dv
= ~cv
.i
; break;
330 case ALU_OP1_PRED_SET_INV
:
331 dv
= cv
.f
== 0.0f
? 1.0f
: (cv
.f
== 1.0f
? 0.0f
: cv
.f
); break;
332 case ALU_OP1_PRED_SET_RESTORE
: dv
= cv
; break;
333 case ALU_OP1_RECIPSQRT_CLAMPED
:
334 case ALU_OP1_RECIPSQRT_FF
:
335 case ALU_OP1_RECIPSQRT_IEEE
: dv
= 1.0f
/ sqrt(cv
.f
); break;
336 case ALU_OP1_RECIP_CLAMPED
:
337 case ALU_OP1_RECIP_FF
:
338 case ALU_OP1_RECIP_IEEE
: dv
= 1.0f
/ cv
.f
; break;
339 // case ALU_OP1_RECIP_INT:
340 // case ALU_OP1_RECIP_UINT:
341 // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
342 case ALU_OP1_SIN
: dv
= sin(cv
.f
* 2.0f
* M_PI
); break;
343 case ALU_OP1_SQRT_IEEE
: dv
= sqrt(cv
.f
); break;
344 case ALU_OP1_TRUNC
: dv
= trunc(cv
.f
); break;
350 apply_alu_dst_mod(n
.bc
, dv
);
351 assign_source(n
.dst
[0], get_const(dv
));
355 bool expr_handler::fold_alu_op2(alu_node
& n
) {
357 if (n
.src
.size() < 2)
360 value
* v0
= n
.src
[0];
361 value
* v1
= n
.src
[1];
363 assert(v0
&& v1
&& n
.dst
[0]);
365 bool isc0
= v0
->is_const();
366 bool isc1
= v1
->is_const();
371 literal dv
, cv0
, cv1
;
374 cv0
= v0
->get_const_value();
375 apply_alu_src_mod(n
.bc
, 0, cv0
);
379 cv1
= v1
->get_const_value();
380 apply_alu_src_mod(n
.bc
, 1, cv1
);
385 case ALU_OP2_ADD
: dv
= cv0
.f
+ cv1
.f
; break;
386 case ALU_OP2_ADDC_UINT
:
387 dv
= (uint32_t)(((uint64_t)cv0
.u
+ cv1
.u
)>>32); break;
388 case ALU_OP2_ADD_INT
: dv
= cv0
.i
+ cv1
.i
; break;
389 case ALU_OP2_AND_INT
: dv
= cv0
.i
& cv1
.i
; break;
390 case ALU_OP2_ASHR_INT
: dv
= cv0
.i
>> (cv1
.i
& 0x1F); break;
391 case ALU_OP2_BFM_INT
:
392 dv
= (((1 << (cv0
.i
& 0x1F)) - 1) << (cv1
.i
& 0x1F)); break;
393 case ALU_OP2_LSHL_INT
: dv
= cv0
.i
<< cv1
.i
; break;
394 case ALU_OP2_LSHR_INT
: dv
= cv0
.u
>> cv1
.u
; break;
396 case ALU_OP2_MAX_DX10
: dv
= cv0
.f
> cv1
.f
? cv0
.f
: cv1
.f
; break;
397 case ALU_OP2_MAX_INT
: dv
= cv0
.i
> cv1
.i
? cv0
.i
: cv1
.i
; break;
398 case ALU_OP2_MAX_UINT
: dv
= cv0
.u
> cv1
.u
? cv0
.u
: cv1
.u
; break;
400 case ALU_OP2_MIN_DX10
: dv
= cv0
.f
< cv1
.f
? cv0
.f
: cv1
.f
; break;
401 case ALU_OP2_MIN_INT
: dv
= cv0
.i
< cv1
.i
? cv0
.i
: cv1
.i
; break;
402 case ALU_OP2_MIN_UINT
: dv
= cv0
.u
< cv1
.u
? cv0
.u
: cv1
.u
; break;
404 case ALU_OP2_MUL_IEEE
: dv
= cv0
.f
* cv1
.f
; break;
405 case ALU_OP2_MULHI_INT
:
406 dv
= (int32_t)(((int64_t)cv0
.u
* cv1
.u
)>>32); break;
407 case ALU_OP2_MULHI_UINT
:
408 dv
= (uint32_t)(((uint64_t)cv0
.u
* cv1
.u
)>>32); break;
409 case ALU_OP2_MULLO_INT
:
410 dv
= (int32_t)(((int64_t)cv0
.u
* cv1
.u
) & 0xFFFFFFFF); break;
411 case ALU_OP2_MULLO_UINT
:
412 dv
= (uint32_t)(((uint64_t)cv0
.u
* cv1
.u
) & 0xFFFFFFFF); break;
413 case ALU_OP2_OR_INT
: dv
= cv0
.i
| cv1
.i
; break;
414 case ALU_OP2_SUB_INT
: dv
= cv0
.i
- cv1
.i
; break;
415 case ALU_OP2_XOR_INT
: dv
= cv0
.i
^ cv1
.i
; break;
417 case ALU_OP2_SETE
: dv
= cv0
.f
== cv1
.f
? 1.0f
: 0.0f
; break;
423 } else { // one source is const
425 // TODO handle 1 * anything, 0 * anything, 0 + anything, etc
430 apply_alu_dst_mod(n
.bc
, dv
);
431 assign_source(n
.dst
[0], get_const(dv
));
435 bool expr_handler::fold_alu_op3(alu_node
& n
) {
437 if (n
.src
.size() < 3)
440 // TODO handle CNDxx by some common path
442 value
* v0
= n
.src
[0];
443 value
* v1
= n
.src
[1];
444 value
* v2
= n
.src
[2];
446 assert(v0
&& v1
&& v2
&& n
.dst
[0]);
448 bool isc0
= v0
->is_const();
449 bool isc1
= v1
->is_const();
450 bool isc2
= v2
->is_const();
452 if (!isc0
&& !isc1
&& !isc2
)
455 literal dv
, cv0
, cv1
, cv2
;
458 cv0
= v0
->get_const_value();
459 apply_alu_src_mod(n
.bc
, 0, cv0
);
463 cv1
= v1
->get_const_value();
464 apply_alu_src_mod(n
.bc
, 1, cv1
);
468 cv2
= v2
->get_const_value();
469 apply_alu_src_mod(n
.bc
, 2, cv2
);
472 if (isc0
&& isc1
&& isc2
) {
474 case ALU_OP3_MULADD
: dv
= cv0
.f
* cv1
.f
+ cv2
.f
; break;
489 apply_alu_dst_mod(n
.bc
, dv
);
490 assign_source(n
.dst
[0], get_const(dv
));
494 unsigned invert_setcc_condition(unsigned cc
, bool &swap_args
) {
498 case AF_CC_E
: ncc
= AF_CC_NE
; break;
499 case AF_CC_NE
: ncc
= AF_CC_E
; break;
500 case AF_CC_GE
: ncc
= AF_CC_GT
; swap_args
= true; break;
501 case AF_CC_GT
: ncc
= AF_CC_GE
; swap_args
= true; break;
503 assert(!"unexpected condition code");
509 unsigned get_setcc_opcode(unsigned cc
, unsigned cmp_type
, bool int_dst
) {
511 if (int_dst
&& cmp_type
== AF_FLOAT_CMP
) {
513 case AF_CC_E
: return ALU_OP2_SETE_DX10
;
514 case AF_CC_NE
: return ALU_OP2_SETNE_DX10
;
515 case AF_CC_GT
: return ALU_OP2_SETGT_DX10
;
516 case AF_CC_GE
: return ALU_OP2_SETGE_DX10
;
523 case AF_CC_E
: return ALU_OP2_SETE
;
524 case AF_CC_NE
: return ALU_OP2_SETNE
;
525 case AF_CC_GT
: return ALU_OP2_SETGT
;
526 case AF_CC_GE
: return ALU_OP2_SETGE
;
532 case AF_CC_E
: return ALU_OP2_SETE_INT
;
533 case AF_CC_NE
: return ALU_OP2_SETNE_INT
;
534 case AF_CC_GT
: return ALU_OP2_SETGT_INT
;
535 case AF_CC_GE
: return ALU_OP2_SETGE_INT
;
541 case AF_CC_GT
: return ALU_OP2_SETGT_UINT
;
542 case AF_CC_GE
: return ALU_OP2_SETGE_UINT
;
549 assert(!"unexpected cc&cmp_type combination");
553 unsigned get_predsetcc_opcode(unsigned cc
, unsigned cmp_type
) {
558 case AF_CC_E
: return ALU_OP2_PRED_SETE
;
559 case AF_CC_NE
: return ALU_OP2_PRED_SETNE
;
560 case AF_CC_GT
: return ALU_OP2_PRED_SETGT
;
561 case AF_CC_GE
: return ALU_OP2_PRED_SETGE
;
567 case AF_CC_E
: return ALU_OP2_PRED_SETE_INT
;
568 case AF_CC_NE
: return ALU_OP2_PRED_SETNE_INT
;
569 case AF_CC_GT
: return ALU_OP2_PRED_SETGT_INT
;
570 case AF_CC_GE
: return ALU_OP2_PRED_SETGE_INT
;
576 case AF_CC_GT
: return ALU_OP2_PRED_SETGT_UINT
;
577 case AF_CC_GE
: return ALU_OP2_PRED_SETGE_UINT
;
583 assert(!"unexpected cc&cmp_type combination");
587 void convert_predset_to_set(shader
& sh
, alu_node
* a
) {
589 unsigned flags
= a
->bc
.op_ptr
->flags
;
590 unsigned cc
= flags
& AF_CC_MASK
;
591 unsigned cmp_type
= flags
& AF_CMP_TYPE_MASK
;
593 bool swap_args
= false;
595 cc
= invert_setcc_condition(cc
, swap_args
);
597 unsigned newop
= get_setcc_opcode(cc
, cmp_type
, true);
603 std::swap(a
->src
[0], a
->src
[1]);
604 std::swap(a
->bc
.src
[0], a
->bc
.src
[1]);
607 a
->bc
.update_exec_mask
= 0;
608 a
->bc
.update_pred
= 0;
611 } // namespace r600_sb