2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define FBC_DUMP(q) do { q } while (0)
36 #include "sb_shader.h"
41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node
*b4
) {
43 alu_group_node
*g
= sh
.create_alu_group();
44 alu_node
*a
= sh
.create_alu();
46 a
->bc
.set_op(ALU_OP0_NOP
);
53 int bc_finalizer::run() {
57 regions_vec
&rv
= sh
.get_regions();
58 for (regions_vec::reverse_iterator I
= rv
.rbegin(), E
= rv
.rend(); I
!= E
;
64 bool loop
= r
->is_loop();
76 // workaround for some problems on r6xx/7xx
77 // add ALU NOP to each vertex shader
78 if (!ctx
.is_egcm() && (sh
.target
== TARGET_VS
|| sh
.target
== TARGET_ES
)) {
79 cf_node
*c
= sh
.create_clause(NST_ALU_CLAUSE
);
81 alu_group_node
*g
= sh
.create_alu_group();
83 alu_node
*a
= sh
.create_alu();
84 a
->bc
.set_op(ALU_OP0_NOP
);
90 sh
.root
->push_back(c
);
92 c
= sh
.create_cf(CF_OP_NOP
);
93 sh
.root
->push_back(c
);
98 if (!ctx
.is_cayman() && last_cf
->bc
.op_ptr
->flags
& CF_ALU
) {
99 last_cf
= sh
.create_cf(CF_OP_NOP
);
100 sh
.root
->push_back(last_cf
);
103 if (ctx
.is_cayman()) {
105 cf_node
*c
= sh
.create_cf(CF_OP_CF_END
);
106 sh
.root
->push_back(c
);
108 last_cf
->insert_after(sh
.create_cf(CF_OP_CF_END
));
110 last_cf
->bc
.end_of_program
= 1;
112 for (unsigned t
= EXP_PIXEL
; t
< EXP_TYPE_COUNT
; ++t
) {
113 cf_node
*le
= last_export
[t
];
115 le
->bc
.set_op(CF_OP_EXPORT_DONE
);
123 void bc_finalizer::finalize_loop(region_node
* r
) {
127 cf_node
*loop_start
= sh
.create_cf(CF_OP_LOOP_START_DX10
);
128 cf_node
*loop_end
= sh
.create_cf(CF_OP_LOOP_END
);
130 // Update last_cf, but don't overwrite it if it's outside the current loop nest since
131 // it may point to a cf that is later in program order.
132 // The single parent level check is sufficient since finalize_loop() is processed in
133 // reverse order from innermost to outermost loop nest level.
134 if (!last_cf
|| last_cf
->get_parent_region() == r
) {
138 loop_start
->jump_after(loop_end
);
139 loop_end
->jump_after(loop_start
);
141 for (depart_vec::iterator I
= r
->departs
.begin(), E
= r
->departs
.end();
143 depart_node
*dep
= *I
;
144 cf_node
*loop_break
= sh
.create_cf(CF_OP_LOOP_BREAK
);
145 loop_break
->jump(loop_end
);
146 dep
->push_back(loop_break
);
150 // FIXME produces unnecessary LOOP_CONTINUE
151 for (repeat_vec::iterator I
= r
->repeats
.begin(), E
= r
->repeats
.end();
153 repeat_node
*rep
= *I
;
154 if (!(rep
->parent
== r
&& rep
->prev
== NULL
)) {
155 cf_node
*loop_cont
= sh
.create_cf(CF_OP_LOOP_CONTINUE
);
156 loop_cont
->jump(loop_end
);
157 rep
->push_back(loop_cont
);
162 r
->push_front(loop_start
);
163 r
->push_back(loop_end
);
166 void bc_finalizer::finalize_if(region_node
* r
) {
170 // expecting the following control flow structure here:
173 // - depart/repeat 1 (it may be depart/repeat for some outer region)
177 // - depart/repeat 2 (possibly for outer region)
179 // - some optional code
182 // - optional <else> code> ...
186 container_node
*repdep1
= static_cast<container_node
*>(r
->first
);
187 assert(repdep1
->is_depart() || repdep1
->is_repeat());
189 if_node
*n_if
= static_cast<if_node
*>(repdep1
->first
);
194 assert(n_if
->is_if());
196 container_node
*repdep2
= static_cast<container_node
*>(n_if
->first
);
197 assert(repdep2
->is_depart() || repdep2
->is_repeat());
199 cf_node
*if_jump
= sh
.create_cf(CF_OP_JUMP
);
200 cf_node
*if_pop
= sh
.create_cf(CF_OP_POP
);
202 if (!last_cf
|| last_cf
->get_parent_region() == r
) {
205 if_pop
->bc
.pop_count
= 1;
206 if_pop
->jump_after(if_pop
);
208 r
->push_front(if_jump
);
209 r
->push_back(if_pop
);
211 /* the depart/repeat 1 is actually part of the "else" code.
212 * if it's a depart for an outer loop region it will want to
213 * insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need
214 * to emit the else clause.
216 bool has_else
= n_if
->next
;
218 if (repdep1
->is_depart()) {
219 depart_node
*dep1
= static_cast<depart_node
*>(repdep1
);
220 if (dep1
->target
!= r
&& dep1
->target
->is_loop())
224 if (repdep1
->is_repeat()) {
225 repeat_node
*rep1
= static_cast<repeat_node
*>(repdep1
);
226 if (rep1
->target
!= r
&& rep1
->target
->is_loop())
231 cf_node
*nelse
= sh
.create_cf(CF_OP_ELSE
);
232 n_if
->insert_after(nelse
);
233 if_jump
->jump(nelse
);
234 nelse
->jump_after(if_pop
);
235 nelse
->bc
.pop_count
= 1;
238 if_jump
->jump_after(if_pop
);
239 if_jump
->bc
.pop_count
= 1;
245 for (depart_vec::iterator I
= r
->departs
.begin(), E
= r
->departs
.end();
250 assert(r
->repeats
.empty());
253 void bc_finalizer::run_on(container_node
* c
) {
254 node
*prev_node
= NULL
;
255 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
258 if (n
->is_alu_group()) {
259 finalize_alu_group(static_cast<alu_group_node
*>(n
), prev_node
);
261 if (n
->is_alu_clause()) {
262 cf_node
*c
= static_cast<cf_node
*>(n
);
264 if (c
->bc
.op
== CF_OP_ALU_PUSH_BEFORE
&& ctx
.is_egcm()) {
265 if (ctx
.stack_workaround_8xx
) {
266 region_node
*r
= c
->get_parent_region();
269 unsigned elems
= get_stack_depth(r
, loops
, ifs
);
270 unsigned dmod1
= elems
% ctx
.stack_entry_size
;
271 unsigned dmod2
= (elems
+ 1) % ctx
.stack_entry_size
;
273 if (elems
&& (!dmod1
|| !dmod2
))
274 c
->flags
|= NF_ALU_STACK_WORKAROUND
;
276 } else if (ctx
.stack_workaround_9xx
) {
277 region_node
*r
= c
->get_parent_region();
280 get_stack_depth(r
, loops
, ifs
);
282 c
->flags
|= NF_ALU_STACK_WORKAROUND
;
287 } else if (n
->is_fetch_inst()) {
288 finalize_fetch(static_cast<fetch_node
*>(n
));
289 } else if (n
->is_cf_inst()) {
290 finalize_cf(static_cast<cf_node
*>(n
));
292 if (n
->is_container())
293 run_on(static_cast<container_node
*>(n
));
299 void bc_finalizer::finalize_alu_group(alu_group_node
* g
, node
*prev_node
) {
301 alu_node
*last
= NULL
;
302 alu_group_node
*prev_g
= NULL
;
303 bool add_nop
= false;
304 if (prev_node
&& prev_node
->is_alu_group()) {
305 prev_g
= static_cast<alu_group_node
*>(prev_node
);
308 for (node_iterator I
= g
->begin(), E
= g
->end(); I
!= E
; ++I
) {
309 alu_node
*n
= static_cast<alu_node
*>(*I
);
310 unsigned slot
= n
->bc
.slot
;
311 value
*d
= n
->dst
.empty() ? NULL
: n
->dst
[0];
313 if (d
&& d
->is_special_reg()) {
314 assert((n
->bc
.op_ptr
->flags
& AF_MOVA
) || d
->is_geometry_emit() || d
->is_lds_oq() || d
->is_lds_access() || d
->is_scratch());
318 sel_chan fdst
= d
? d
->get_final_gpr() : sel_chan(0, 0);
321 assert(fdst
.chan() == slot
|| slot
== SLOT_TRANS
);
324 if (!(n
->bc
.op_ptr
->flags
& AF_MOVA
&& ctx
.is_cayman()))
325 n
->bc
.dst_gpr
= fdst
.sel();
326 n
->bc
.dst_chan
= d
? fdst
.chan() : slot
< SLOT_TRANS
? slot
: 0;
329 if (d
&& d
->is_rel() && d
->rel
&& !d
->rel
->is_const()) {
331 update_ngpr(d
->array
->gpr
.sel() + d
->array
->array_size
-1);
336 n
->bc
.write_mask
= d
!= NULL
;
339 if (n
->bc
.op_ptr
->flags
& AF_PRED
) {
340 n
->bc
.update_pred
= (n
->dst
[1] != NULL
);
341 n
->bc
.update_exec_mask
= (n
->dst
[2] != NULL
);
344 // FIXME handle predication here
345 n
->bc
.pred_sel
= PRED_SEL_OFF
;
347 update_ngpr(n
->bc
.dst_gpr
);
349 add_nop
|= finalize_alu_src(g
, n
, prev_g
);
355 if (sh
.get_ctx().r6xx_gpr_index_workaround
) {
356 insert_rv6xx_load_ar_workaround(g
);
362 bool bc_finalizer::finalize_alu_src(alu_group_node
* g
, alu_node
* a
, alu_group_node
*prev
) {
364 bool add_nop
= false;
366 sblog
<< "finalize_alu_src: ";
373 for (vvec::iterator I
= sv
.begin(), E
= sv
.end(); I
!= E
; ++I
, ++si
) {
377 bc_alu_src
&src
= a
->bc
.src
[si
];
385 sc
= v
->get_final_gpr();
387 src
.chan
= sc
.chan();
388 if (!v
->rel
->is_const()) {
390 update_ngpr(v
->array
->gpr
.sel() + v
->array
->array_size
-1);
391 if (prev
&& !add_nop
) {
392 for (node_iterator pI
= prev
->begin(), pE
= prev
->end(); pI
!= pE
; ++pI
) {
393 alu_node
*pn
= static_cast<alu_node
*>(*pI
);
394 if (pn
->bc
.dst_gpr
== src
.sel
) {
405 gpr
= v
->get_final_gpr();
407 src
.chan
= gpr
.chan();
408 update_ngpr(src
.sel
);
411 src
.sel
= v
->gpr
.sel();
412 src
.chan
= v
->gpr
.chan();
413 update_ngpr(src
.sel
);
417 literal lv
= v
->literal_value
;
420 if (lv
== literal(0))
422 else if (lv
== literal(0.5f
))
423 src
.sel
= ALU_SRC_0_5
;
424 else if (lv
== literal(1.0f
))
426 else if (lv
== literal(1))
427 src
.sel
= ALU_SRC_1_INT
;
428 else if (lv
== literal(-1))
429 src
.sel
= ALU_SRC_M_1_INT
;
431 src
.sel
= ALU_SRC_LITERAL
;
432 src
.chan
= g
->literal_chan(lv
);
438 cf_node
*clause
= static_cast<cf_node
*>(g
->parent
);
439 assert(clause
->is_alu_clause());
440 sel_chan k
= translate_kcache(clause
, v
);
442 assert(k
&& "kcache translation failed");
448 case VLK_SPECIAL_REG
:
449 if (v
->select
.sel() == SV_LDS_OQA
) {
450 src
.sel
= ALU_SRC_LDS_OQ_A_POP
;
452 } else if (v
->select
.sel() == SV_LDS_OQB
) {
453 src
.sel
= ALU_SRC_LDS_OQ_B_POP
;
461 case VLK_SPECIAL_CONST
:
462 src
.sel
= v
->select
.sel();
463 src
.chan
= v
->select
.chan();
466 assert(!"unknown value kind");
469 if (prev
&& !add_nop
) {
470 for (node_iterator pI
= prev
->begin(), pE
= prev
->end(); pI
!= pE
; ++pI
) {
471 alu_node
*pn
= static_cast<alu_node
*>(*pI
);
472 if (pn
->bc
.dst_rel
) {
473 if (pn
->bc
.dst_gpr
== src
.sel
) {
483 a
->bc
.src
[si
++].sel
= 0;
488 void bc_finalizer::copy_fetch_src(fetch_node
&dst
, fetch_node
&src
, unsigned arg_start
)
492 for (unsigned chan
= 0; chan
< 4; ++chan
) {
494 dst
.bc
.dst_sel
[chan
] = SEL_MASK
;
496 unsigned sel
= SEL_MASK
;
498 value
*v
= src
.src
[arg_start
+ chan
];
500 if (!v
|| v
->is_undef()) {
502 } else if (v
->is_const()) {
503 literal l
= v
->literal_value
;
506 else if (l
== literal(1.0f
))
509 sblog
<< "invalid fetch constant operand " << chan
<< " ";
515 } else if (v
->is_any_gpr()) {
516 unsigned vreg
= v
->gpr
.sel();
517 unsigned vchan
= v
->gpr
.chan();
521 else if ((unsigned)reg
!= vreg
) {
522 sblog
<< "invalid fetch source operand " << chan
<< " ";
531 sblog
<< "invalid fetch source operand " << chan
<< " ";
537 dst
.bc
.src_sel
[chan
] = sel
;
543 dst
.bc
.src_gpr
= reg
>= 0 ? reg
: 0;
546 void bc_finalizer::emit_set_grad(fetch_node
* f
) {
548 assert(f
->src
.size() == 12 || f
->src
.size() == 13);
549 unsigned ops
[2] = { FETCH_OP_SET_GRADIENTS_V
, FETCH_OP_SET_GRADIENTS_H
};
551 unsigned arg_start
= 0;
553 for (unsigned op
= 0; op
< 2; ++op
) {
554 fetch_node
*n
= sh
.create_fetch();
555 n
->bc
.set_op(ops
[op
]);
559 copy_fetch_src(*n
, *f
, arg_start
);
566 void bc_finalizer::emit_set_texture_offsets(fetch_node
&f
) {
567 assert(f
.src
.size() == 8);
569 fetch_node
*n
= sh
.create_fetch();
571 n
->bc
.set_op(FETCH_OP_SET_TEXTURE_OFFSETS
);
573 copy_fetch_src(*n
, f
, 4);
578 void bc_finalizer::finalize_fetch(fetch_node
* f
) {
584 unsigned src_count
= 4;
586 unsigned flags
= f
->bc
.op_ptr
->flags
;
588 if (flags
& FF_VTX
) {
590 } else if (flags
& FF_GDS
) {
592 } else if (flags
& FF_USEGRAD
) {
594 } else if (flags
& FF_USE_TEXTURE_OFFSETS
) {
595 emit_set_texture_offsets(*f
);
598 for (unsigned chan
= 0; chan
< src_count
; ++chan
) {
600 unsigned sel
= f
->bc
.src_sel
[chan
];
605 value
*v
= f
->src
[chan
];
609 } else if (v
->is_const()) {
610 literal l
= v
->literal_value
;
613 else if (l
== literal(1.0f
))
616 sblog
<< "invalid fetch constant operand " << chan
<< " ";
622 } else if (v
->is_any_gpr()) {
623 unsigned vreg
= v
->gpr
.sel();
624 unsigned vchan
= v
->gpr
.chan();
628 else if ((unsigned)reg
!= vreg
) {
629 sblog
<< "invalid fetch source operand " << chan
<< " ";
638 sblog
<< "invalid fetch source operand " << chan
<< " ";
644 f
->bc
.src_sel
[chan
] = sel
;
650 f
->bc
.src_gpr
= reg
>= 0 ? reg
: 0;
656 unsigned dst_swz
[4] = {SEL_MASK
, SEL_MASK
, SEL_MASK
, SEL_MASK
};
658 for (unsigned chan
= 0; chan
< 4; ++chan
) {
660 unsigned sel
= f
->bc
.dst_sel
[chan
];
665 value
*v
= f
->dst
[chan
];
669 if (v
->is_any_gpr()) {
670 unsigned vreg
= v
->gpr
.sel();
671 unsigned vchan
= v
->gpr
.chan();
675 else if ((unsigned)reg
!= vreg
) {
676 sblog
<< "invalid fetch dst operand " << chan
<< " ";
682 dst_swz
[vchan
] = sel
;
685 sblog
<< "invalid fetch dst operand " << chan
<< " ";
693 for (unsigned i
= 0; i
< 4; ++i
)
694 f
->bc
.dst_sel
[i
] = dst_swz
[i
];
696 if ((flags
& FF_GDS
) && reg
== -1) {
697 f
->bc
.dst_sel
[0] = SEL_MASK
;
706 f
->bc
.dst_gpr
= reg
>= 0 ? reg
: 0;
709 void bc_finalizer::finalize_cf(cf_node
* c
) {
711 unsigned flags
= c
->bc
.op_ptr
->flags
;
713 c
->bc
.end_of_program
= 0;
716 if (flags
& CF_EXP
) {
717 c
->bc
.set_op(CF_OP_EXPORT
);
718 last_export
[c
->bc
.type
] = c
;
722 for (unsigned chan
= 0; chan
< 4; ++chan
) {
724 unsigned sel
= c
->bc
.sel
[chan
];
729 value
*v
= c
->src
[chan
];
733 } else if (v
->is_const()) {
734 literal l
= v
->literal_value
;
737 else if (l
== literal(1.0f
))
740 sblog
<< "invalid export constant operand " << chan
<< " ";
746 } else if (v
->is_any_gpr()) {
747 unsigned vreg
= v
->gpr
.sel();
748 unsigned vchan
= v
->gpr
.chan();
752 else if ((unsigned)reg
!= vreg
) {
753 sblog
<< "invalid export source operand " << chan
<< " ";
762 sblog
<< "invalid export source operand " << chan
<< " ";
768 c
->bc
.sel
[chan
] = sel
;
774 c
->bc
.rw_gpr
= reg
>= 0 ? reg
: 0;
776 } else if (flags
& CF_MEM
) {
782 for (unsigned chan
= 0; chan
< 4; ++chan
) {
784 if (ctx
.hw_class
== HW_CLASS_R600
&& c
->bc
.op
== CF_OP_MEM_SCRATCH
&&
785 (c
->bc
.type
== 2 || c
->bc
.type
== 3))
790 if (!v
|| v
->is_undef())
793 if (!v
->is_any_gpr() || v
->gpr
.chan() != chan
) {
794 sblog
<< "invalid source operand " << chan
<< " ";
799 unsigned vreg
= v
->gpr
.sel();
802 else if ((unsigned)reg
!= vreg
) {
803 sblog
<< "invalid source operand " << chan
<< " ";
815 c
->bc
.rw_gpr
= reg
>= 0 ? reg
: 0;
816 c
->bc
.comp_mask
= mask
;
818 if (((flags
& CF_RAT
) || (!(flags
& CF_STRM
))) && (c
->bc
.type
& 1)) {
822 for (unsigned chan
= 0; chan
< 4; ++chan
) {
823 value
*v
= c
->src
[4 + chan
];
824 if (!v
|| v
->is_undef())
827 if (!v
->is_any_gpr() || v
->gpr
.chan() != chan
) {
828 sblog
<< "invalid source operand " << chan
<< " ";
833 unsigned vreg
= v
->gpr
.sel();
836 else if ((unsigned)reg
!= vreg
) {
837 sblog
<< "invalid source operand " << chan
<< " ";
849 c
->bc
.index_gpr
= reg
>= 0 ? reg
: 0;
851 } else if (flags
& CF_CALL
) {
852 update_nstack(c
->get_parent_region(), ctx
.wavefront_size
== 16 ? 2 : 1);
856 sel_chan
bc_finalizer::translate_kcache(cf_node
* alu
, value
* v
) {
857 unsigned sel
= v
->select
.kcache_sel();
858 unsigned bank
= v
->select
.kcache_bank();
859 unsigned chan
= v
->select
.chan();
860 static const unsigned kc_base
[] = {128, 160, 256, 288};
864 unsigned line
= sel
>> 4;
866 for (unsigned k
= 0; k
< 4; ++k
) {
867 bc_kcache
&kc
= alu
->bc
.kc
[k
];
869 if (kc
.mode
== KC_LOCK_NONE
)
872 if (kc
.bank
== bank
&& (kc
.addr
== line
||
873 (kc
.mode
== KC_LOCK_2
&& kc
.addr
+ 1 == line
))) {
875 sel
= kc_base
[k
] + (sel
- (kc
.addr
<< 4));
877 return sel_chan(sel
, chan
);
881 assert(!"kcache translation error");
885 void bc_finalizer::update_ngpr(unsigned gpr
) {
886 if (gpr
< MAX_GPR
- ctx
.alu_temp_gprs
&& gpr
>= ngpr
)
890 unsigned bc_finalizer::get_stack_depth(node
*n
, unsigned &loops
,
891 unsigned &ifs
, unsigned add
) {
892 unsigned stack_elements
= add
;
893 bool has_non_wqm_push
= (add
!= 0);
894 region_node
*r
= n
->is_region() ?
895 static_cast<region_node
*>(n
) : n
->get_parent_region();
905 has_non_wqm_push
= true;
907 r
= r
->get_parent_region();
909 stack_elements
+= (loops
* ctx
.stack_entry_size
) + ifs
;
911 // reserve additional elements in some cases
912 switch (ctx
.hw_class
) {
915 // If any non-WQM push is invoked, 2 elements should be reserved.
916 if (has_non_wqm_push
)
919 case HW_CLASS_CAYMAN
:
920 // If any stack operation is invoked, 2 elements should be reserved
924 case HW_CLASS_EVERGREEN
:
925 // According to the docs we need to reserve 1 element for each of the
927 // 1) non-WQM push is used with WQM/LOOP frames on stack
928 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
930 // It was found that the conditions above are not sufficient, there are
931 // other cases where we also need to reserve stack space, that's why
932 // we always reserve 1 stack element if we have non-WQM push on stack.
933 // Condition 2 is ignored for now because we don't use this instruction.
934 if (has_non_wqm_push
)
937 case HW_CLASS_UNKNOWN
:
940 return stack_elements
;
943 void bc_finalizer::update_nstack(region_node
* r
, unsigned add
) {
946 unsigned elems
= r
? get_stack_depth(r
, loops
, ifs
, add
) : add
;
948 // XXX all chips expect this value to be computed using 4 as entry size,
949 // not the real entry size
950 unsigned stack_entries
= (elems
+ 3) >> 2;
952 if (nstack
< stack_entries
)
953 nstack
= stack_entries
;
956 void bc_finalizer::cf_peephole() {
957 if (ctx
.stack_workaround_8xx
|| ctx
.stack_workaround_9xx
) {
958 for (node_iterator N
, I
= sh
.root
->begin(), E
= sh
.root
->end(); I
!= E
;
961 cf_node
*c
= static_cast<cf_node
*>(*I
);
963 if (c
->bc
.op
== CF_OP_ALU_PUSH_BEFORE
&&
964 (c
->flags
& NF_ALU_STACK_WORKAROUND
)) {
965 cf_node
*push
= sh
.create_cf(CF_OP_PUSH
);
966 c
->insert_before(push
);
968 c
->bc
.set_op(CF_OP_ALU
);
973 for (node_iterator N
, I
= sh
.root
->begin(), E
= sh
.root
->end(); I
!= E
;
977 cf_node
*c
= static_cast<cf_node
*>(*I
);
979 if (c
->jump_after_target
) {
980 if (c
->jump_target
->next
== NULL
) {
981 c
->jump_target
->insert_after(sh
.create_cf(CF_OP_NOP
));
982 if (last_cf
== c
->jump_target
)
983 last_cf
= static_cast<cf_node
*>(c
->jump_target
->next
);
985 c
->jump_target
= static_cast<cf_node
*>(c
->jump_target
->next
);
986 c
->jump_after_target
= false;
989 if (c
->is_cf_op(CF_OP_POP
)) {
991 if (p
->is_alu_clause()) {
992 cf_node
*a
= static_cast<cf_node
*>(p
);
994 if (a
->bc
.op
== CF_OP_ALU
) {
995 a
->bc
.set_op(CF_OP_ALU_POP_AFTER
);
999 } else if (c
->is_cf_op(CF_OP_JUMP
) && c
->jump_target
== c
->next
) {
1000 // if JUMP is immediately followed by its jump target,
1001 // then JUMP is useless and we can eliminate it
1007 } // namespace r600_sb