2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define BCP_DUMP(q) do { q } while (0)
35 #include "r600_pipe.h"
36 #include "r600_shader.h"
41 #include "sb_shader.h"
46 int bc_parser::decode() {
52 dec
= new bc_decoder(ctx
, dw
, bc_ndw
);
54 shader_target t
= TARGET_UNKNOWN
;
58 case TGSI_PROCESSOR_FRAGMENT
: t
= TARGET_PS
; break;
59 case TGSI_PROCESSOR_VERTEX
:
60 t
= pshader
->vs_as_es
? TARGET_ES
: TARGET_VS
;
62 case TGSI_PROCESSOR_GEOMETRY
: t
= TARGET_GS
; break;
63 case TGSI_PROCESSOR_COMPUTE
: t
= TARGET_COMPUTE
; break;
64 default: assert(!"unknown shader target"); return -1; break;
67 if (bc
->type
== TGSI_PROCESSOR_COMPUTE
)
73 sh
= new shader(ctx
, t
, bc
->debug_id
);
74 sh
->safe_math
= sb_context::safe_math
|| (t
== TARGET_COMPUTE
);
76 int r
= decode_shader();
81 sh
->nstack
= bc
->nstack
;
86 int bc_parser::decode_shader() {
95 if ((r
= decode_cf(i
, eop
)))
98 } while (!eop
|| (i
>> 1) < max_cf
);
103 int bc_parser::prepare() {
105 if ((r
= parse_decls()))
107 if ((r
= prepare_ir()))
112 int bc_parser::parse_decls() {
116 sh
->add_gpr_array(0, bc
->ngpr
, 0x0F);
118 // compute shaders have some values preloaded in R0, R1
119 sh
->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
120 sh
->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
124 if (pshader
->indirect_files
& ~(1 << TGSI_FILE_CONSTANT
)) {
126 assert(pshader
->num_arrays
);
128 if (pshader
->num_arrays
) {
129 for (unsigned i
= 0; i
< pshader
->num_arrays
; ++i
) {
130 r600_shader_array
&a
= pshader
->arrays
[i
];
131 sh
->add_gpr_array(a
.gpr_start
, a
.gpr_count
, a
.comp_mask
);
134 sh
->add_gpr_array(0, pshader
->bc
.ngpr
, 0x0F);
138 // GS inputs can add indirect addressing
139 if (sh
->target
== TARGET_GS
) {
140 if (pshader
->num_arrays
) {
141 for (unsigned i
= 0; i
< pshader
->num_arrays
; ++i
) {
142 r600_shader_array
&a
= pshader
->arrays
[i
];
143 sh
->add_gpr_array(a
.gpr_start
, a
.gpr_count
, a
.comp_mask
);
148 if (sh
->target
== TARGET_VS
|| sh
->target
== TARGET_ES
)
149 sh
->add_input(0, 1, 0x0F);
150 else if (sh
->target
== TARGET_GS
) {
151 sh
->add_input(0, 1, 0x0F);
152 sh
->add_input(1, 1, 0x0F);
155 bool ps_interp
= ctx
.hw_class
>= HW_CLASS_EVERGREEN
156 && sh
->target
== TARGET_PS
;
158 bool ij_interpolators
[6];
159 memset(ij_interpolators
, 0, sizeof(ij_interpolators
));
161 for (unsigned i
= 0; i
< pshader
->ninput
; ++i
) {
162 r600_shader_io
& in
= pshader
->input
[i
];
163 bool preloaded
= sh
->target
== TARGET_PS
&& !(ps_interp
&& in
.spi_sid
);
164 sh
->add_input(in
.gpr
, preloaded
, /*in.write_mask*/ 0x0F);
165 if (ps_interp
&& in
.spi_sid
) {
166 int k
= eg_get_interpolator_index(in
.interpolate
, in
.interpolate_location
);
168 ij_interpolators
[k
] |= true;
173 /* add the egcm ij interpolators to live inputs */
175 for (unsigned i
= 0; i
< Elements(ij_interpolators
); i
++) {
176 num_ij
+= ij_interpolators
[i
];
179 unsigned mask
= (1 << (2 * num_ij
)) - 1;
183 sh
->add_input(gpr
, true, mask
& 0x0F);
192 int bc_parser::decode_cf(unsigned &i
, bool &eop
) {
196 cf_node
*cf
= sh
->create_cf();
197 sh
->root
->push_back(cf
);
199 unsigned id
= i
>> 1;
203 if (cf_map
.size() < id
+ 1)
204 cf_map
.resize(id
+ 1);
208 if ((r
= dec
->decode_cf(i
, cf
->bc
)))
211 cf_op_flags flags
= (cf_op_flags
)cf
->bc
.op_ptr
->flags
;
213 if (flags
& CF_ALU
) {
214 if ((r
= decode_alu_clause(cf
)))
216 } else if (flags
& CF_FETCH
) {
217 if ((r
= decode_fetch_clause(cf
)))
219 } else if (flags
& CF_EXP
) {
222 assert(!cf
->bc
.rw_rel
);
223 } else if (flags
& CF_MEM
) {
226 assert(!cf
->bc
.rw_rel
);
227 } else if (flags
& CF_BRANCH
) {
228 if (cf
->bc
.addr
> max_cf
)
229 max_cf
= cf
->bc
.addr
;
232 eop
= cf
->bc
.end_of_program
|| cf
->bc
.op
== CF_OP_CF_END
||
233 cf
->bc
.op
== CF_OP_RET
;
237 int bc_parser::decode_alu_clause(cf_node
* cf
) {
238 unsigned i
= cf
->bc
.addr
<< 1, cnt
= cf
->bc
.count
+ 1, gcnt
;
240 cf
->subtype
= NST_ALU_CLAUSE
;
243 memset(slots
[0], 0, 5*sizeof(slots
[0][0]));
248 decode_alu_group(cf
, i
, gcnt
);
257 int bc_parser::decode_alu_group(cf_node
* cf
, unsigned &i
, unsigned &gcnt
) {
260 alu_group_node
*g
= sh
->create_alu_group();
263 memset(slots
[cgroup
], 0, 5*sizeof(slots
[0][0]));
266 unsigned literal_mask
= 0;
269 n
= sh
->create_alu();
272 if ((r
= dec
->decode_alu(i
, n
->bc
)))
275 if (!sh
->assign_slot(n
, slots
[cgroup
])) {
276 assert(!"alu slot assignment failed");
282 } while (gcnt
<= 5 && !n
->bc
.last
);
286 for (node_iterator I
= g
->begin(), E
= g
->end(); I
!= E
; ++I
) {
287 n
= static_cast<alu_node
*>(*I
);
292 for (int k
= 0; k
< n
->bc
.op_ptr
->src_count
; ++k
) {
293 bc_alu_src
&src
= n
->bc
.src
[k
];
296 if (src
.sel
== ALU_SRC_LITERAL
) {
297 literal_mask
|= (1 << src
.chan
);
298 src
.value
.u
= dw
[i
+ src
.chan
];
303 unsigned literal_ndw
= 0;
304 while (literal_mask
) {
305 g
->literals
.push_back(dw
[i
+ literal_ndw
]);
310 literal_ndw
= (literal_ndw
+ 1) & ~1u;
313 gcnt
+= literal_ndw
>> 1;
319 int bc_parser::prepare_alu_clause(cf_node
* cf
) {
321 // loop over alu groups
322 for (node_iterator I
= cf
->begin(), E
= cf
->end(); I
!= E
; ++I
) {
323 assert(I
->subtype
== NST_ALU_GROUP
);
324 alu_group_node
*g
= static_cast<alu_group_node
*>(*I
);
325 prepare_alu_group(cf
, g
);
331 int bc_parser::prepare_alu_group(cf_node
* cf
, alu_group_node
*g
) {
336 memset(slots
[cgroup
], 0, 5*sizeof(slots
[0][0]));
338 for (node_iterator I
= g
->begin(), E
= g
->end();
340 n
= static_cast<alu_node
*>(*I
);
342 if (!sh
->assign_slot(n
, slots
[cgroup
])) {
343 assert(!"alu slot assignment failed");
347 unsigned src_count
= n
->bc
.op_ptr
->src_count
;
349 if (ctx
.alu_slots(n
->bc
.op
) & AF_4SLOT
)
350 n
->flags
|= NF_ALU_4SLOT
;
352 n
->src
.resize(src_count
);
354 unsigned flags
= n
->bc
.op_ptr
->flags
;
356 if (flags
& AF_PRED
) {
358 if (n
->bc
.update_pred
)
359 n
->dst
[1] = sh
->get_special_value(SV_ALU_PRED
);
360 if (n
->bc
.update_exec_mask
)
361 n
->dst
[2] = sh
->get_special_value(SV_EXEC_MASK
);
363 n
->flags
|= NF_DONT_HOIST
;
365 } else if (flags
& AF_KILL
) {
368 n
->dst
[1] = sh
->get_special_value(SV_VALID_MASK
);
371 n
->flags
|= NF_DONT_HOIST
| NF_DONT_MOVE
|
372 NF_DONT_KILL
| NF_SCHEDULE_EARLY
;
378 if (flags
& AF_MOVA
) {
380 n
->dst
[0] = sh
->get_special_value(SV_AR_INDEX
);
382 n
->flags
|= NF_DONT_HOIST
;
384 } else if (n
->bc
.op_ptr
->src_count
== 3 || n
->bc
.write_mask
) {
385 assert(!n
->bc
.dst_rel
|| n
->bc
.index_mode
== INDEX_AR_X
);
387 value
*v
= sh
->get_gpr_value(false, n
->bc
.dst_gpr
, n
->bc
.dst_chan
,
393 if (n
->bc
.pred_sel
) {
394 sh
->has_alu_predication
= true;
395 n
->pred
= sh
->get_special_value(SV_ALU_PRED
);
398 for (unsigned s
= 0; s
< src_count
; ++s
) {
399 bc_alu_src
&src
= n
->bc
.src
[s
];
401 if (src
.sel
== ALU_SRC_LITERAL
) {
402 n
->src
[s
] = sh
->get_const_value(src
.value
);
403 } else if (src
.sel
== ALU_SRC_PS
|| src
.sel
== ALU_SRC_PV
) {
404 unsigned pgroup
= !cgroup
, prev_slot
= src
.sel
== ALU_SRC_PS
?
405 SLOT_TRANS
: src
.chan
;
407 // XXX shouldn't happen but llvm backend uses PS on cayman
408 if (prev_slot
== SLOT_TRANS
&& ctx
.is_cayman())
411 alu_node
*prev_alu
= slots
[pgroup
][prev_slot
];
415 if (!prev_alu
->dst
[0]) {
416 value
* t
= sh
->create_temp_value();
417 prev_alu
->dst
[0] = t
;
420 value
*d
= prev_alu
->dst
[0];
423 d
= sh
->get_gpr_value(true, prev_alu
->bc
.dst_gpr
,
424 prev_alu
->bc
.dst_chan
,
425 prev_alu
->bc
.dst_rel
);
429 } else if (ctx
.is_kcache_sel(src
.sel
)) {
430 unsigned sel
= src
.sel
, kc_addr
;
431 unsigned kc_set
= ((sel
>> 7) & 2) + ((sel
>> 5) & 1);
433 bc_kcache
&kc
= cf
->bc
.kc
[kc_set
];
434 kc_addr
= (kc
.addr
<< 4) + (sel
& 0x1F);
435 n
->src
[s
] = sh
->get_kcache_value(kc
.bank
, kc_addr
, src
.chan
);
436 } else if (src
.sel
< MAX_GPR
) {
437 value
*v
= sh
->get_gpr_value(true, src
.sel
, src
.chan
, src
.rel
);
441 } else if (src
.sel
>= ALU_SRC_PARAM_OFFSET
) {
442 // using slot for value channel because in fact the slot
443 // determines the channel that is loaded by INTERP_LOAD_P0
444 // (and maybe some others).
445 // otherwise GVN will consider INTERP_LOAD_P0s with the same
446 // param index as equal instructions and leave only one of them
447 n
->src
[s
] = sh
->get_special_ro_value(sel_chan(src
.sel
,
452 n
->src
[s
] = sh
->get_const_value(0);
455 n
->src
[s
] = sh
->get_const_value(0.5f
);
458 n
->src
[s
] = sh
->get_const_value(1.0f
);
461 n
->src
[s
] = sh
->get_const_value(1);
463 case ALU_SRC_M_1_INT
:
464 n
->src
[s
] = sh
->get_const_value(-1);
467 n
->src
[s
] = sh
->get_special_ro_value(src
.sel
);
474 // pack multislot instructions into alu_packed_node
476 alu_packed_node
*p
= NULL
;
477 for (node_iterator N
, I
= g
->begin(), E
= g
->end(); I
!= E
; I
= N
) {
479 alu_node
*a
= static_cast<alu_node
*>(*I
);
480 unsigned sflags
= a
->bc
.slot_flags
;
482 if (sflags
== AF_4V
|| (ctx
.is_cayman() && sflags
== AF_S
)) {
484 p
= sh
->create_alu_packed();
494 if (p
->count() == 3 && ctx
.is_cayman()) {
495 // cayman's scalar instruction that can use 3 or 4 slots
497 // FIXME for simplicity we'll always add 4th slot,
498 // but probably we might want to always remove 4th slot and make
499 // sure that regalloc won't choose 'w' component for dst
501 alu_node
*f
= static_cast<alu_node
*>(p
->first
);
502 alu_node
*a
= sh
->create_alu();
504 a
->dst
.resize(f
->dst
.size());
514 int bc_parser::decode_fetch_clause(cf_node
* cf
) {
516 unsigned i
= cf
->bc
.addr
<< 1, cnt
= cf
->bc
.count
+ 1;
518 cf
->subtype
= NST_TEX_CLAUSE
;
521 fetch_node
*n
= sh
->create_fetch();
523 if ((r
= dec
->decode_fetch(i
, n
->bc
)))
525 if (n
->bc
.src_rel
|| n
->bc
.dst_rel
)
532 int bc_parser::prepare_fetch_clause(cf_node
*cf
) {
534 vvec grad_v
, grad_h
, texture_offsets
;
536 for (node_iterator I
= cf
->begin(), E
= cf
->end(); I
!= E
; ++I
) {
538 fetch_node
*n
= static_cast<fetch_node
*>(*I
);
539 assert(n
->is_valid());
541 unsigned flags
= n
->bc
.op_ptr
->flags
;
543 unsigned vtx
= flags
& FF_VTX
;
544 unsigned num_src
= vtx
? ctx
.vtx_src_num
: 4;
548 if (flags
& (FF_SETGRAD
| FF_USEGRAD
| FF_GETGRAD
)) {
549 sh
->uses_gradients
= true;
552 if (flags
& (FF_SETGRAD
| FF_SET_TEXTURE_OFFSETS
)) {
557 case FETCH_OP_SET_GRADIENTS_V
:
560 case FETCH_OP_SET_GRADIENTS_H
:
563 case FETCH_OP_SET_TEXTURE_OFFSETS
:
564 grad
= &texture_offsets
;
567 assert(!"unexpected SET_GRAD instruction");
574 for(unsigned s
= 0; s
< 4; ++s
) {
575 unsigned sw
= n
->bc
.src_sel
[s
];
577 (*grad
)[s
] = sh
->get_gpr_value(true, n
->bc
.src_gpr
,
579 else if (sw
== SEL_0
)
580 (*grad
)[s
] = sh
->get_const_value(0.0f
);
581 else if (sw
== SEL_1
)
582 (*grad
)[s
] = sh
->get_const_value(1.0f
);
585 // Fold source values for instructions with hidden target values in to the instructions
586 // using them. The set instructions are later re-emitted by bc_finalizer
587 if (flags
& FF_USEGRAD
) {
589 std::copy(grad_v
.begin(), grad_v
.end(), n
->src
.begin() + 4);
590 std::copy(grad_h
.begin(), grad_h
.end(), n
->src
.begin() + 8);
591 } else if (flags
& FF_USE_TEXTURE_OFFSETS
) {
593 std::copy(texture_offsets
.begin(), texture_offsets
.end(), n
->src
.begin() + 4);
598 for(int s
= 0; s
< 4; ++s
) {
599 if (n
->bc
.dst_sel
[s
] != SEL_MASK
)
600 n
->dst
[s
] = sh
->get_gpr_value(false, n
->bc
.dst_gpr
, s
, false);
601 // NOTE: it doesn't matter here which components of the result we
602 // are using, but original n->bc.dst_sel should be taken into
603 // account when building the bytecode
605 for(unsigned s
= 0; s
< num_src
; ++s
) {
606 if (n
->bc
.src_sel
[s
] <= SEL_W
)
607 n
->src
[s
] = sh
->get_gpr_value(true, n
->bc
.src_gpr
,
608 n
->bc
.src_sel
[s
], false);
617 int bc_parser::prepare_ir() {
619 for(id_cf_map::iterator I
= cf_map
.begin(), E
= cf_map
.end(); I
!= E
; ++I
) {
625 unsigned flags
= c
->bc
.op_ptr
->flags
;
627 if (flags
& CF_ALU
) {
628 prepare_alu_clause(c
);
629 } else if (flags
& CF_FETCH
) {
630 prepare_fetch_clause(c
);
631 } else if (c
->bc
.op
== CF_OP_CALL_FS
) {
633 c
->flags
|= NF_SCHEDULE_EARLY
| NF_DONT_MOVE
;
634 } else if (flags
& CF_LOOP_START
) {
636 } else if (c
->bc
.op
== CF_OP_JUMP
) {
638 } else if (c
->bc
.op
== CF_OP_LOOP_END
) {
640 } else if (c
->bc
.op
== CF_OP_LOOP_CONTINUE
) {
641 assert(!loop_stack
.empty());
642 repeat_node
*rep
= sh
->create_repeat(loop_stack
.top());
643 if (c
->parent
->first
!= c
)
644 rep
->move(c
->parent
->first
, c
);
645 c
->replace_with(rep
);
646 sh
->simplify_dep_rep(rep
);
647 } else if (c
->bc
.op
== CF_OP_LOOP_BREAK
) {
648 assert(!loop_stack
.empty());
649 depart_node
*dep
= sh
->create_depart(loop_stack
.top());
650 if (c
->parent
->first
!= c
)
651 dep
->move(c
->parent
->first
, c
);
652 c
->replace_with(dep
);
653 sh
->simplify_dep_rep(dep
);
654 } else if (flags
& CF_EXP
) {
656 // unroll burst exports
658 assert(c
->bc
.op
== CF_OP_EXPORT
|| c
->bc
.op
== CF_OP_EXPORT_DONE
);
660 c
->bc
.set_op(CF_OP_EXPORT
);
662 unsigned burst_count
= c
->bc
.burst_count
;
663 unsigned eop
= c
->bc
.end_of_program
;
665 c
->bc
.end_of_program
= 0;
666 c
->bc
.burst_count
= 0;
671 for(int s
= 0; s
< 4; ++s
) {
672 switch (c
->bc
.sel
[s
]) {
674 c
->src
[s
] = sh
->get_const_value(0.0f
);
677 c
->src
[s
] = sh
->get_const_value(1.0f
);
682 if (c
->bc
.sel
[s
] <= SEL_W
)
683 c
->src
[s
] = sh
->get_gpr_value(true, c
->bc
.rw_gpr
,
684 c
->bc
.sel
[s
], false);
686 assert(!"invalid src_sel for export");
693 cf_node
*cf_next
= sh
->create_cf();
695 ++cf_next
->bc
.rw_gpr
;
696 ++cf_next
->bc
.array_base
;
698 c
->insert_after(cf_next
);
703 c
->bc
.end_of_program
= eop
;
704 } else if (flags
& CF_MEM
) {
706 unsigned burst_count
= c
->bc
.burst_count
;
707 unsigned eop
= c
->bc
.end_of_program
;
709 c
->bc
.end_of_program
= 0;
710 c
->bc
.burst_count
= 0;
716 for(int s
= 0; s
< 4; ++s
) {
717 if (c
->bc
.comp_mask
& (1 << s
))
719 sh
->get_gpr_value(true, c
->bc
.rw_gpr
, s
, false);
722 if (((flags
& CF_RAT
) || (!(flags
& CF_STRM
))) && (c
->bc
.type
& 1)) { // indexed write
724 for(int s
= 0; s
< 3; ++s
) {
726 sh
->get_gpr_value(true, c
->bc
.index_gpr
, s
, false);
729 // FIXME probably we can relax it a bit
730 c
->flags
|= NF_DONT_HOIST
| NF_DONT_MOVE
;
733 if (flags
& CF_EMIT
) {
734 // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
735 c
->src
.push_back(sh
->get_special_value(SV_GEOMETRY_EMIT
));
736 c
->dst
.push_back(sh
->get_special_value(SV_GEOMETRY_EMIT
));
737 if (sh
->target
== TARGET_ES
) {
738 // For ES shaders this is an export
739 c
->flags
|= NF_DONT_KILL
;
746 cf_node
*cf_next
= sh
->create_cf();
748 ++cf_next
->bc
.rw_gpr
;
750 // FIXME is it correct?
751 cf_next
->bc
.array_base
+= cf_next
->bc
.elem_size
+ 1;
753 c
->insert_after(cf_next
);
757 c
->bc
.end_of_program
= eop
;
759 } else if (flags
& CF_EMIT
) {
760 c
->flags
|= NF_DONT_KILL
| NF_DONT_HOIST
| NF_DONT_MOVE
;
762 c
->src
.push_back(sh
->get_special_value(SV_GEOMETRY_EMIT
));
763 c
->dst
.push_back(sh
->get_special_value(SV_GEOMETRY_EMIT
));
767 assert(loop_stack
.empty());
771 int bc_parser::prepare_loop(cf_node
* c
) {
772 assert(c
->bc
.addr
-1 < cf_map
.size());
774 cf_node
*end
= cf_map
[c
->bc
.addr
- 1];
775 assert(end
->bc
.op
== CF_OP_LOOP_END
);
776 assert(c
->parent
== end
->parent
);
778 region_node
*reg
= sh
->create_region();
779 repeat_node
*rep
= sh
->create_repeat(reg
);
782 c
->insert_before(reg
);
783 rep
->move(c
, end
->next
);
785 reg
->src_loop
= true;
787 loop_stack
.push(reg
);
791 int bc_parser::prepare_if(cf_node
* c
) {
792 assert(c
->bc
.addr
-1 < cf_map
.size());
793 cf_node
*c_else
= NULL
, *end
= cf_map
[c
->bc
.addr
];
796 return 0; // not quite sure how this happens, malformed input?
799 sblog
<< "parsing JUMP @" << c
->bc
.id
;
803 if (end
->bc
.op
== CF_OP_ELSE
) {
805 sblog
<< " found ELSE : ";
811 end
= cf_map
[c_else
->bc
.addr
];
814 sblog
<< " no else\n";
820 if (c_else
->parent
!= c
->parent
)
823 if (end
&& end
->parent
!= c
->parent
)
826 region_node
*reg
= sh
->create_region();
828 depart_node
*dep2
= sh
->create_depart(reg
);
829 depart_node
*dep
= sh
->create_depart(reg
);
830 if_node
*n_if
= sh
->create_if();
832 c
->insert_before(reg
);
835 dep
->move(c_else
, end
);
839 dep
->push_front(n_if
);
840 n_if
->push_back(dep2
);
842 n_if
->cond
= sh
->get_special_value(SV_EXEC_MASK
);
848 } // namespace r600_sb