2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define BCP_DUMP(q) do { q } while (0)
35 #include "r600_pipe.h"
36 #include "r600_shader.h"
37 #include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1
42 #include "sb_shader.h"
44 #include "util/macros.h"
48 int bc_parser::decode() {
54 dec
= new bc_decoder(ctx
, dw
, bc_ndw
);
56 shader_target t
= TARGET_UNKNOWN
;
60 case PIPE_SHADER_FRAGMENT
: t
= TARGET_PS
; break;
61 case PIPE_SHADER_VERTEX
:
62 t
= pshader
->vs_as_ls
? TARGET_LS
: (pshader
->vs_as_es
? TARGET_ES
: TARGET_VS
);
64 case PIPE_SHADER_GEOMETRY
: t
= TARGET_GS
; break;
65 case PIPE_SHADER_COMPUTE
: t
= TARGET_COMPUTE
; break;
66 case PIPE_SHADER_TESS_CTRL
: t
= TARGET_HS
; break;
67 case PIPE_SHADER_TESS_EVAL
: t
= pshader
->tes_as_es
? TARGET_ES
: TARGET_VS
; break;
68 default: assert(!"unknown shader target"); return -1; break;
71 if (bc
->type
== PIPE_SHADER_COMPUTE
)
77 sh
= new shader(ctx
, t
, bc
->debug_id
);
78 sh
->safe_math
= sb_context::safe_math
|| (t
== TARGET_COMPUTE
|| bc
->precise
);
80 int r
= decode_shader();
85 sh
->nstack
= bc
->nstack
;
90 int bc_parser::decode_shader() {
99 if ((r
= decode_cf(i
, eop
)))
102 } while (!eop
|| (i
>> 1) < max_cf
);
107 int bc_parser::prepare() {
109 if ((r
= parse_decls()))
111 if ((r
= prepare_ir()))
116 int bc_parser::parse_decls() {
120 sh
->add_gpr_array(0, bc
->ngpr
, 0x0F);
122 // compute shaders have some values preloaded in R0, R1
123 sh
->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
124 sh
->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
128 if (pshader
->indirect_files
& ~((1 << TGSI_FILE_CONSTANT
) | (1 << TGSI_FILE_SAMPLER
))) {
130 assert(pshader
->num_arrays
);
132 if (pshader
->num_arrays
) {
133 for (unsigned i
= 0; i
< pshader
->num_arrays
; ++i
) {
134 r600_shader_array
&a
= pshader
->arrays
[i
];
135 sh
->add_gpr_array(a
.gpr_start
, a
.gpr_count
, a
.comp_mask
);
138 sh
->add_gpr_array(0, pshader
->bc
.ngpr
, 0x0F);
142 // GS inputs can add indirect addressing
143 if (sh
->target
== TARGET_GS
) {
144 if (pshader
->num_arrays
) {
145 for (unsigned i
= 0; i
< pshader
->num_arrays
; ++i
) {
146 r600_shader_array
&a
= pshader
->arrays
[i
];
147 sh
->add_gpr_array(a
.gpr_start
, a
.gpr_count
, a
.comp_mask
);
152 if (sh
->target
== TARGET_VS
|| sh
->target
== TARGET_ES
|| sh
->target
== TARGET_HS
|| sh
->target
== TARGET_LS
)
153 sh
->add_input(0, 1, 0x0F);
154 else if (sh
->target
== TARGET_GS
) {
155 sh
->add_input(0, 1, 0x0F);
156 sh
->add_input(1, 1, 0x0F);
157 } else if (sh
->target
== TARGET_COMPUTE
) {
158 sh
->add_input(0, 1, 0x0F);
159 sh
->add_input(1, 1, 0x0F);
162 bool ps_interp
= ctx
.hw_class
>= HW_CLASS_EVERGREEN
163 && sh
->target
== TARGET_PS
;
165 bool ij_interpolators
[6];
166 memset(ij_interpolators
, 0, sizeof(ij_interpolators
));
168 for (unsigned i
= 0; i
< pshader
->ninput
; ++i
) {
169 r600_shader_io
& in
= pshader
->input
[i
];
170 bool preloaded
= sh
->target
== TARGET_PS
&& !(ps_interp
&& in
.spi_sid
);
171 sh
->add_input(in
.gpr
, preloaded
, /*in.write_mask*/ 0x0F);
172 if (ps_interp
&& in
.spi_sid
) {
173 int k
= eg_get_interpolator_index(in
.interpolate
, in
.interpolate_location
);
175 ij_interpolators
[k
] |= true;
176 if (in
.uses_interpolate_at_centroid
) {
177 k
= eg_get_interpolator_index(in
.interpolate
, TGSI_INTERPOLATE_LOC_CENTROID
);
178 ij_interpolators
[k
] |= true;
185 /* add the egcm ij interpolators to live inputs */
187 for (unsigned i
= 0; i
< ARRAY_SIZE(ij_interpolators
); i
++) {
188 num_ij
+= ij_interpolators
[i
];
191 unsigned mask
= (1 << (2 * num_ij
)) - 1;
195 sh
->add_input(gpr
, true, mask
& 0x0F);
204 int bc_parser::decode_cf(unsigned &i
, bool &eop
) {
208 cf_node
*cf
= sh
->create_cf();
209 sh
->root
->push_back(cf
);
211 unsigned id
= i
>> 1;
215 if (cf_map
.size() < id
+ 1)
216 cf_map
.resize(id
+ 1);
220 if ((r
= dec
->decode_cf(i
, cf
->bc
)))
223 cf_op_flags flags
= (cf_op_flags
)cf
->bc
.op_ptr
->flags
;
225 if (flags
& CF_ALU
) {
226 if ((r
= decode_alu_clause(cf
)))
228 } else if (flags
& CF_FETCH
) {
229 if ((r
= decode_fetch_clause(cf
)))
231 } else if (flags
& CF_EXP
) {
234 assert(!cf
->bc
.rw_rel
);
235 } else if (flags
& CF_MEM
) {
238 assert(!cf
->bc
.rw_rel
);
239 } else if (flags
& CF_BRANCH
) {
240 if (cf
->bc
.addr
> max_cf
)
241 max_cf
= cf
->bc
.addr
;
244 eop
= cf
->bc
.end_of_program
|| cf
->bc
.op
== CF_OP_CF_END
||
245 cf
->bc
.op
== CF_OP_RET
;
249 int bc_parser::decode_alu_clause(cf_node
* cf
) {
250 unsigned i
= cf
->bc
.addr
<< 1, cnt
= cf
->bc
.count
+ 1, gcnt
;
252 cf
->subtype
= NST_ALU_CLAUSE
;
255 memset(slots
[0], 0, 5*sizeof(slots
[0][0]));
260 decode_alu_group(cf
, i
, gcnt
);
269 int bc_parser::decode_alu_group(cf_node
* cf
, unsigned &i
, unsigned &gcnt
) {
272 alu_group_node
*g
= sh
->create_alu_group();
275 memset(slots
[cgroup
], 0, 5*sizeof(slots
[0][0]));
278 unsigned literal_mask
= 0;
281 n
= sh
->create_alu();
284 if ((r
= dec
->decode_alu(i
, n
->bc
)))
287 if (!sh
->assign_slot(n
, slots
[cgroup
])) {
288 assert(!"alu slot assignment failed");
294 } while (gcnt
<= 5 && !n
->bc
.last
);
298 for (node_iterator I
= g
->begin(), E
= g
->end(); I
!= E
; ++I
) {
299 n
= static_cast<alu_node
*>(*I
);
304 for (int k
= 0; k
< n
->bc
.op_ptr
->src_count
; ++k
) {
305 bc_alu_src
&src
= n
->bc
.src
[k
];
308 if (src
.sel
== ALU_SRC_LITERAL
) {
309 literal_mask
|= (1 << src
.chan
);
310 src
.value
.u
= dw
[i
+ src
.chan
];
315 unsigned literal_ndw
= 0;
316 while (literal_mask
) {
317 g
->literals
.push_back(dw
[i
+ literal_ndw
]);
322 literal_ndw
= (literal_ndw
+ 1) & ~1u;
325 gcnt
+= literal_ndw
>> 1;
331 int bc_parser::prepare_alu_clause(cf_node
* cf
) {
333 // loop over alu groups
334 for (node_iterator I
= cf
->begin(), E
= cf
->end(); I
!= E
; ++I
) {
335 assert(I
->subtype
== NST_ALU_GROUP
);
336 alu_group_node
*g
= static_cast<alu_group_node
*>(*I
);
337 prepare_alu_group(cf
, g
);
343 void bc_parser::save_set_cf_index(value
*val
, unsigned idx
)
347 cf_index_value
[idx
] = val
;
349 value
*bc_parser::get_cf_index_value(unsigned idx
)
352 assert(cf_index_value
[idx
]);
353 return cf_index_value
[idx
];
355 void bc_parser::save_mova(alu_node
*mova
)
360 alu_node
*bc_parser::get_mova()
366 int bc_parser::prepare_alu_group(cf_node
* cf
, alu_group_node
*g
) {
371 memset(slots
[cgroup
], 0, 5*sizeof(slots
[0][0]));
373 for (node_iterator I
= g
->begin(), E
= g
->end();
375 n
= static_cast<alu_node
*>(*I
);
376 bool ubo_indexing
[2] = {};
378 if (!sh
->assign_slot(n
, slots
[cgroup
])) {
379 assert(!"alu slot assignment failed");
383 unsigned src_count
= n
->bc
.op_ptr
->src_count
;
385 if (ctx
.alu_slots(n
->bc
.op
) & AF_4SLOT
)
386 n
->flags
|= NF_ALU_4SLOT
;
388 n
->src
.resize(src_count
);
390 unsigned flags
= n
->bc
.op_ptr
->flags
;
392 if (flags
& AF_LDS
) {
393 bool need_rw
= false, need_oqa
= false, need_oqb
= false;
394 int ndst
= 0, ncount
= 0;
396 /* all non-read operations have side effects */
397 if (n
->bc
.op
!= LDS_OP2_LDS_READ2_RET
&&
398 n
->bc
.op
!= LDS_OP1_LDS_READ_REL_RET
&&
399 n
->bc
.op
!= LDS_OP1_LDS_READ_RET
) {
400 n
->flags
|= NF_DONT_KILL
;
405 if (n
->bc
.op
>= LDS_OP2_LDS_ADD_RET
&& n
->bc
.op
<= LDS_OP1_LDS_USHORT_READ_RET
) {
410 if (n
->bc
.op
== LDS_OP2_LDS_READ2_RET
|| n
->bc
.op
== LDS_OP1_LDS_READ_REL_RET
) {
417 n
->dst
[ncount
++] = sh
->get_special_value(SV_LDS_OQA
);
419 n
->dst
[ncount
++] = sh
->get_special_value(SV_LDS_OQB
);
421 n
->dst
[ncount
++] = sh
->get_special_value(SV_LDS_RW
);
423 n
->flags
|= NF_DONT_MOVE
| NF_DONT_HOIST
;
425 } else if (flags
& AF_PRED
) {
427 if (n
->bc
.update_pred
)
428 n
->dst
[1] = sh
->get_special_value(SV_ALU_PRED
);
429 if (n
->bc
.update_exec_mask
)
430 n
->dst
[2] = sh
->get_special_value(SV_EXEC_MASK
);
432 n
->flags
|= NF_DONT_HOIST
;
434 } else if (flags
& AF_KILL
) {
437 n
->dst
[1] = sh
->get_special_value(SV_VALID_MASK
);
440 n
->flags
|= NF_DONT_HOIST
| NF_DONT_MOVE
|
441 NF_DONT_KILL
| NF_SCHEDULE_EARLY
;
447 if (n
->bc
.op
== ALU_OP0_SET_CF_IDX0
|| n
->bc
.op
== ALU_OP0_SET_CF_IDX1
) {
448 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
449 // DCE will kill this op
450 save_set_cf_index(get_mova()->src
[0], n
->bc
.op
== ALU_OP0_SET_CF_IDX1
);
451 } else if (flags
& AF_MOVA
) {
453 n
->dst
[0] = sh
->get_special_value(SV_AR_INDEX
);
456 n
->flags
|= NF_DONT_HOIST
;
458 } else if ((n
->bc
.op_ptr
->src_count
== 3 || n
->bc
.write_mask
) && !(flags
& AF_LDS
)) {
459 assert(!n
->bc
.dst_rel
|| n
->bc
.index_mode
== INDEX_AR_X
);
461 value
*v
= sh
->get_gpr_value(false, n
->bc
.dst_gpr
, n
->bc
.dst_chan
,
467 if (n
->bc
.pred_sel
) {
468 sh
->has_alu_predication
= true;
469 n
->pred
= sh
->get_special_value(SV_ALU_PRED
);
472 for (unsigned s
= 0; s
< src_count
; ++s
) {
473 bc_alu_src
&src
= n
->bc
.src
[s
];
475 if (src
.sel
== ALU_SRC_LITERAL
) {
476 n
->src
[s
] = sh
->get_const_value(src
.value
);
477 } else if (src
.sel
== ALU_SRC_PS
|| src
.sel
== ALU_SRC_PV
) {
478 unsigned pgroup
= !cgroup
, prev_slot
= src
.sel
== ALU_SRC_PS
?
479 SLOT_TRANS
: src
.chan
;
481 // XXX shouldn't happen but llvm backend uses PS on cayman
482 if (prev_slot
== SLOT_TRANS
&& ctx
.is_cayman())
485 alu_node
*prev_alu
= slots
[pgroup
][prev_slot
];
489 if (!prev_alu
->dst
[0]) {
490 value
* t
= sh
->create_temp_value();
491 prev_alu
->dst
[0] = t
;
494 value
*d
= prev_alu
->dst
[0];
497 d
= sh
->get_gpr_value(true, prev_alu
->bc
.dst_gpr
,
498 prev_alu
->bc
.dst_chan
,
499 prev_alu
->bc
.dst_rel
);
503 } else if (ctx
.is_kcache_sel(src
.sel
)) {
504 unsigned sel
= src
.sel
, kc_addr
;
505 unsigned kc_set
= ((sel
>> 7) & 2) + ((sel
>> 5) & 1);
507 bc_kcache
&kc
= cf
->bc
.kc
[kc_set
];
508 kc_addr
= (kc
.addr
<< 4) + (sel
& 0x1F);
509 n
->src
[s
] = sh
->get_kcache_value(kc
.bank
, kc_addr
, src
.chan
, (alu_kcache_index_mode
)kc
.index_mode
);
511 if (kc
.index_mode
!= KC_INDEX_NONE
) {
512 assert(kc
.index_mode
!= KC_LOCK_LOOP
);
513 ubo_indexing
[kc
.index_mode
- KC_INDEX_0
] = true;
515 } else if (src
.sel
< MAX_GPR
) {
516 value
*v
= sh
->get_gpr_value(true, src
.sel
, src
.chan
, src
.rel
);
520 } else if (src
.sel
>= ALU_SRC_PARAM_OFFSET
) {
521 // using slot for value channel because in fact the slot
522 // determines the channel that is loaded by INTERP_LOAD_P0
523 // (and maybe some others).
524 // otherwise GVN will consider INTERP_LOAD_P0s with the same
525 // param index as equal instructions and leave only one of them
526 n
->src
[s
] = sh
->get_special_ro_value(sel_chan(src
.sel
,
528 } else if (ctx
.is_lds_oq(src
.sel
)) {
530 case ALU_SRC_LDS_OQ_A
:
531 case ALU_SRC_LDS_OQ_B
:
532 assert(!"Unsupported LDS queue access in SB");
534 case ALU_SRC_LDS_OQ_A_POP
:
535 n
->src
[s
] = sh
->get_special_value(SV_LDS_OQA
);
537 case ALU_SRC_LDS_OQ_B_POP
:
538 n
->src
[s
] = sh
->get_special_value(SV_LDS_OQB
);
541 n
->flags
|= NF_DONT_HOIST
| NF_DONT_MOVE
;
546 n
->src
[s
] = sh
->get_const_value(0);
549 n
->src
[s
] = sh
->get_const_value(0.5f
);
552 n
->src
[s
] = sh
->get_const_value(1.0f
);
555 n
->src
[s
] = sh
->get_const_value(1);
557 case ALU_SRC_M_1_INT
:
558 n
->src
[s
] = sh
->get_const_value(-1);
561 n
->src
[s
] = sh
->get_special_ro_value(src
.sel
);
567 // add UBO index values if any as dependencies
568 if (ubo_indexing
[0]) {
569 n
->src
.push_back(get_cf_index_value(0));
571 if (ubo_indexing
[1]) {
572 n
->src
.push_back(get_cf_index_value(1));
575 if ((flags
& AF_MOVA
) && (n
->bc
.dst_gpr
== CM_V_SQ_MOVA_DST_CF_IDX0
|| n
->bc
.dst_gpr
== CM_V_SQ_MOVA_DST_CF_IDX1
) &&
577 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
578 save_set_cf_index(n
->src
[0], n
->bc
.dst_gpr
== CM_V_SQ_MOVA_DST_CF_IDX1
);
581 // pack multislot instructions into alu_packed_node
583 alu_packed_node
*p
= NULL
;
584 for (node_iterator N
, I
= g
->begin(), E
= g
->end(); I
!= E
; I
= N
) {
586 alu_node
*a
= static_cast<alu_node
*>(*I
);
587 unsigned sflags
= a
->bc
.slot_flags
;
589 if (sflags
== AF_4V
|| (ctx
.is_cayman() && sflags
== AF_S
)) {
591 p
= sh
->create_alu_packed();
601 if (p
->count() == 3 && ctx
.is_cayman()) {
602 // cayman's scalar instruction that can use 3 or 4 slots
604 // FIXME for simplicity we'll always add 4th slot,
605 // but probably we might want to always remove 4th slot and make
606 // sure that regalloc won't choose 'w' component for dst
608 alu_node
*f
= static_cast<alu_node
*>(p
->first
);
609 alu_node
*a
= sh
->create_alu();
611 a
->dst
.resize(f
->dst
.size());
621 int bc_parser::decode_fetch_clause(cf_node
* cf
) {
623 unsigned i
= cf
->bc
.addr
<< 1, cnt
= cf
->bc
.count
+ 1;
625 if (cf
->bc
.op_ptr
->flags
& FF_GDS
)
626 cf
->subtype
= NST_GDS_CLAUSE
;
628 cf
->subtype
= NST_TEX_CLAUSE
;
631 fetch_node
*n
= sh
->create_fetch();
633 if ((r
= dec
->decode_fetch(i
, n
->bc
)))
635 if (n
->bc
.src_rel
|| n
->bc
.dst_rel
)
642 int bc_parser::prepare_fetch_clause(cf_node
*cf
) {
644 vvec grad_v
, grad_h
, texture_offsets
;
646 for (node_iterator I
= cf
->begin(), E
= cf
->end(); I
!= E
; ++I
) {
648 fetch_node
*n
= static_cast<fetch_node
*>(*I
);
649 assert(n
->is_valid());
651 unsigned flags
= n
->bc
.op_ptr
->flags
;
653 unsigned vtx
= flags
& FF_VTX
;
654 unsigned gds
= flags
& FF_GDS
;
655 unsigned num_src
= gds
? 2 : vtx
? ctx
.vtx_src_num
: 4;
660 n
->flags
|= NF_DONT_HOIST
| NF_DONT_MOVE
| NF_DONT_KILL
;
662 if (flags
& (FF_SETGRAD
| FF_USEGRAD
| FF_GETGRAD
)) {
663 sh
->uses_gradients
= true;
666 if (flags
& (FF_SETGRAD
| FF_SET_TEXTURE_OFFSETS
)) {
671 case FETCH_OP_SET_GRADIENTS_V
:
674 case FETCH_OP_SET_GRADIENTS_H
:
677 case FETCH_OP_SET_TEXTURE_OFFSETS
:
678 grad
= &texture_offsets
;
681 assert(!"unexpected SET_GRAD instruction");
688 for(unsigned s
= 0; s
< 4; ++s
) {
689 unsigned sw
= n
->bc
.src_sel
[s
];
691 (*grad
)[s
] = sh
->get_gpr_value(true, n
->bc
.src_gpr
,
693 else if (sw
== SEL_0
)
694 (*grad
)[s
] = sh
->get_const_value(0.0f
);
695 else if (sw
== SEL_1
)
696 (*grad
)[s
] = sh
->get_const_value(1.0f
);
699 // Fold source values for instructions with hidden target values in to the instructions
700 // using them. The set instructions are later re-emitted by bc_finalizer
701 if (flags
& FF_USEGRAD
) {
703 std::copy(grad_v
.begin(), grad_v
.end(), n
->src
.begin() + 4);
704 std::copy(grad_h
.begin(), grad_h
.end(), n
->src
.begin() + 8);
705 } else if (flags
& FF_USE_TEXTURE_OFFSETS
) {
707 std::copy(texture_offsets
.begin(), texture_offsets
.end(), n
->src
.begin() + 4);
712 for(int s
= 0; s
< 4; ++s
) {
713 if (n
->bc
.dst_sel
[s
] != SEL_MASK
)
714 n
->dst
[s
] = sh
->get_gpr_value(false, n
->bc
.dst_gpr
, s
, false);
715 // NOTE: it doesn't matter here which components of the result we
716 // are using, but original n->bc.dst_sel should be taken into
717 // account when building the bytecode
719 for(unsigned s
= 0; s
< num_src
; ++s
) {
720 if (n
->bc
.src_sel
[s
] <= SEL_W
)
721 n
->src
[s
] = sh
->get_gpr_value(true, n
->bc
.src_gpr
,
722 n
->bc
.src_sel
[s
], false);
725 // Scheduler will emit the appropriate instructions to set CF_IDX0/1
726 if (n
->bc
.sampler_index_mode
!= V_SQ_CF_INDEX_NONE
) {
727 n
->src
.push_back(get_cf_index_value(n
->bc
.sampler_index_mode
== V_SQ_CF_INDEX_1
));
729 if (n
->bc
.resource_index_mode
!= V_SQ_CF_INDEX_NONE
) {
730 n
->src
.push_back(get_cf_index_value(n
->bc
.resource_index_mode
== V_SQ_CF_INDEX_1
));
734 if (n
->bc
.op
== FETCH_OP_READ_SCRATCH
) {
735 n
->src
.push_back(sh
->get_special_value(SV_SCRATCH
));
736 n
->dst
.push_back(sh
->get_special_value(SV_SCRATCH
));
743 int bc_parser::prepare_ir() {
745 for(id_cf_map::iterator I
= cf_map
.begin(), E
= cf_map
.end(); I
!= E
; ++I
) {
751 unsigned flags
= c
->bc
.op_ptr
->flags
;
753 if (flags
& CF_ALU
) {
754 prepare_alu_clause(c
);
755 } else if (flags
& CF_FETCH
) {
756 prepare_fetch_clause(c
);
757 } else if (c
->bc
.op
== CF_OP_CALL_FS
) {
759 c
->flags
|= NF_SCHEDULE_EARLY
| NF_DONT_MOVE
;
760 } else if (flags
& CF_LOOP_START
) {
762 } else if (c
->bc
.op
== CF_OP_JUMP
) {
764 } else if (c
->bc
.op
== CF_OP_LOOP_END
) {
766 } else if (c
->bc
.op
== CF_OP_LOOP_CONTINUE
) {
767 assert(!loop_stack
.empty());
768 repeat_node
*rep
= sh
->create_repeat(loop_stack
.top());
769 if (c
->parent
->first
!= c
)
770 rep
->move(c
->parent
->first
, c
);
771 c
->replace_with(rep
);
772 sh
->simplify_dep_rep(rep
);
773 } else if (c
->bc
.op
== CF_OP_LOOP_BREAK
) {
774 assert(!loop_stack
.empty());
775 depart_node
*dep
= sh
->create_depart(loop_stack
.top());
776 if (c
->parent
->first
!= c
)
777 dep
->move(c
->parent
->first
, c
);
778 c
->replace_with(dep
);
779 sh
->simplify_dep_rep(dep
);
780 } else if (flags
& CF_EXP
) {
782 // unroll burst exports
784 assert(c
->bc
.op
== CF_OP_EXPORT
|| c
->bc
.op
== CF_OP_EXPORT_DONE
);
786 c
->bc
.set_op(CF_OP_EXPORT
);
788 unsigned burst_count
= c
->bc
.burst_count
;
789 unsigned eop
= c
->bc
.end_of_program
;
791 c
->bc
.end_of_program
= 0;
792 c
->bc
.burst_count
= 0;
797 for(int s
= 0; s
< 4; ++s
) {
798 switch (c
->bc
.sel
[s
]) {
800 c
->src
[s
] = sh
->get_const_value(0.0f
);
803 c
->src
[s
] = sh
->get_const_value(1.0f
);
808 if (c
->bc
.sel
[s
] <= SEL_W
)
809 c
->src
[s
] = sh
->get_gpr_value(true, c
->bc
.rw_gpr
,
810 c
->bc
.sel
[s
], false);
812 assert(!"invalid src_sel for export");
819 cf_node
*cf_next
= sh
->create_cf();
821 ++cf_next
->bc
.rw_gpr
;
822 ++cf_next
->bc
.array_base
;
824 c
->insert_after(cf_next
);
829 c
->bc
.end_of_program
= eop
;
830 } else if (flags
& CF_MEM
) {
832 unsigned burst_count
= c
->bc
.burst_count
;
833 unsigned eop
= c
->bc
.end_of_program
;
835 c
->bc
.end_of_program
= 0;
836 c
->bc
.burst_count
= 0;
840 if (ctx
.hw_class
== HW_CLASS_R600
&& c
->bc
.op
== CF_OP_MEM_SCRATCH
&&
841 (c
->bc
.type
== 2 || c
->bc
.type
== 3)) {
843 for(int s
= 0; s
< 4; ++s
) {
844 if (c
->bc
.comp_mask
& (1 << s
))
846 sh
->get_gpr_value(true, c
->bc
.rw_gpr
, s
, false);
852 for(int s
= 0; s
< 4; ++s
) {
853 if (c
->bc
.comp_mask
& (1 << s
))
855 sh
->get_gpr_value(true, c
->bc
.rw_gpr
, s
, false);
859 if (((flags
& CF_RAT
) || (!(flags
& CF_STRM
))) && (c
->bc
.type
& 1)) { // indexed write
861 for(int s
= 0; s
< 3; ++s
) {
863 sh
->get_gpr_value(true, c
->bc
.index_gpr
, s
, false);
866 // FIXME probably we can relax it a bit
867 c
->flags
|= NF_DONT_HOIST
| NF_DONT_MOVE
;
870 if (flags
& CF_EMIT
) {
871 // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
872 c
->src
.push_back(sh
->get_special_value(SV_GEOMETRY_EMIT
));
873 c
->dst
.push_back(sh
->get_special_value(SV_GEOMETRY_EMIT
));
874 if (sh
->target
== TARGET_ES
) {
875 // For ES shaders this is an export
876 c
->flags
|= NF_DONT_KILL
;
879 else if (c
->bc
.op
== CF_OP_MEM_SCRATCH
) {
880 c
->src
.push_back(sh
->get_special_value(SV_SCRATCH
));
881 c
->dst
.push_back(sh
->get_special_value(SV_SCRATCH
));
887 cf_node
*cf_next
= sh
->create_cf();
889 ++cf_next
->bc
.rw_gpr
;
891 // FIXME is it correct?
892 cf_next
->bc
.array_base
+= cf_next
->bc
.elem_size
+ 1;
894 c
->insert_after(cf_next
);
898 c
->bc
.end_of_program
= eop
;
900 } else if (flags
& CF_EMIT
) {
902 cf_node
*prev
= static_cast<cf_node
*>(c
->prev
);
903 if (c
->bc
.op
== CF_OP_CUT_VERTEX
&&
904 prev
&& prev
->is_valid() &&
905 prev
->bc
.op
== CF_OP_EMIT_VERTEX
&&
906 c
->bc
.count
== prev
->bc
.count
) {
907 prev
->bc
.set_op(CF_OP_EMIT_CUT_VERTEX
);
908 prev
->bc
.end_of_program
= c
->bc
.end_of_program
;
912 c
->flags
|= NF_DONT_KILL
| NF_DONT_HOIST
| NF_DONT_MOVE
;
914 c
->src
.push_back(sh
->get_special_value(SV_GEOMETRY_EMIT
));
915 c
->dst
.push_back(sh
->get_special_value(SV_GEOMETRY_EMIT
));
917 } else if (c
->bc
.op
== CF_OP_WAIT_ACK
) {
918 c
->src
.push_back(sh
->get_special_value(SV_SCRATCH
));
919 c
->dst
.push_back(sh
->get_special_value(SV_SCRATCH
));
923 assert(loop_stack
.empty());
927 int bc_parser::prepare_loop(cf_node
* c
) {
928 assert(c
->bc
.addr
-1 < cf_map
.size());
930 cf_node
*end
= cf_map
[c
->bc
.addr
- 1];
931 assert(end
->bc
.op
== CF_OP_LOOP_END
);
932 assert(c
->parent
== end
->parent
);
934 region_node
*reg
= sh
->create_region();
935 repeat_node
*rep
= sh
->create_repeat(reg
);
938 c
->insert_before(reg
);
939 rep
->move(c
, end
->next
);
941 reg
->src_loop
= true;
943 loop_stack
.push(reg
);
947 int bc_parser::prepare_if(cf_node
* c
) {
948 assert(c
->bc
.addr
-1 < cf_map
.size());
949 cf_node
*c_else
= NULL
, *end
= cf_map
[c
->bc
.addr
];
952 return 0; // not quite sure how this happens, malformed input?
955 sblog
<< "parsing JUMP @" << c
->bc
.id
;
959 if (end
->bc
.op
== CF_OP_ELSE
) {
961 sblog
<< " found ELSE : ";
967 end
= cf_map
[c_else
->bc
.addr
];
970 sblog
<< " no else\n";
976 if (c_else
->parent
!= c
->parent
)
979 if (end
&& end
->parent
!= c
->parent
)
982 region_node
*reg
= sh
->create_region();
984 depart_node
*dep2
= sh
->create_depart(reg
);
985 depart_node
*dep
= sh
->create_depart(reg
);
986 if_node
*n_if
= sh
->create_if();
988 c
->insert_before(reg
);
991 dep
->move(c_else
, end
);
995 dep
->push_front(n_if
);
996 n_if
->push_back(dep2
);
998 n_if
->cond
= sh
->get_special_value(SV_EXEC_MASK
);
1004 } // namespace r600_sb