2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
43 class hw_encoding_format
;
57 virtual void write(const char *s
) = 0;
59 sb_ostream
& operator <<(const char *s
) {
64 sb_ostream
& operator <<(const std::string
& s
) {
65 return *this << s
.c_str();
68 sb_ostream
& operator <<(void *p
) {
74 sb_ostream
& operator <<(char c
) {
80 sb_ostream
& operator <<(int n
) {
86 sb_ostream
& operator <<(unsigned n
) {
92 sb_ostream
& operator <<(double d
) {
94 snprintf(b
, 32, "%g", d
);
98 // print as field of specified width, right aligned
99 void print_w(int n
, int width
) {
101 sprintf(f
, "%%%dd", width
);
102 snprintf(b
, 256, f
, n
);
106 // print as field of specified width, left aligned
107 void print_wl(int n
, int width
) {
109 sprintf(f
, "%%-%dd", width
);
110 snprintf(b
, 256, f
, n
);
114 // print as field of specified width, left aligned
115 void print_wl(const std::string
&s
, int width
) {
118 while (l
++ < width
) {
123 // print int as field of specified width, right aligned, zero-padded
124 void print_zw(int n
, int width
) {
126 sprintf(f
, "%%0%dd", width
);
127 snprintf(b
, 256, f
, n
);
131 // print int as field of specified width, right aligned, zero-padded, hex
132 void print_zw_hex(int n
, int width
) {
134 sprintf(f
, "%%0%dx", width
);
135 snprintf(b
, 256, f
, n
);
140 class sb_ostringstream
: public sb_ostream
{
143 sb_ostringstream() : data() {}
145 virtual void write(const char *s
) {
149 void clear() { data
.clear(); }
151 const char* c_str() { return data
.c_str(); }
152 std::string
& str() { return data
; }
155 class sb_log
: public sb_ostream
{
158 sb_log() : o(stderr
) {}
160 virtual void write(const char *s
) {
183 enum sb_hw_class_bits
190 HB_R6R7
= (HB_R6
| HB_R7
),
191 HB_EGCM
= (HB_EG
| HB_CM
),
192 HB_R6R7EG
= (HB_R6
| HB_R7
| HB_EG
),
193 HB_R7EGCM
= (HB_R7
| HB_EG
| HB_CM
),
195 HB_ALL
= (HB_R6
| HB_R7
| HB_EG
| HB_CM
)
246 MAX_ALU_LITERALS
= 4,
255 ALU_SRC_LDS_OQ_A
= 219,
256 ALU_SRC_LDS_OQ_B
= 220,
257 ALU_SRC_LDS_OQ_A_POP
= 221,
258 ALU_SRC_LDS_OQ_B_POP
= 222,
259 ALU_SRC_LDS_DIRECT_A
= 223,
260 ALU_SRC_LDS_DIRECT_B
= 224,
261 ALU_SRC_TIME_HI
= 227,
262 ALU_SRC_TIME_LO
= 228,
263 ALU_SRC_MASK_HI
= 229,
264 ALU_SRC_MASK_LO
= 230,
265 ALU_SRC_HW_WAVE_ID
= 231,
266 ALU_SRC_SIMD_ID
= 232,
268 ALU_SRC_HW_THREADGRP_ID
= 234,
269 ALU_SRC_WAVE_ID_IN_GRP
= 235,
270 ALU_SRC_NUM_THREADGRP_WAVES
= 236,
271 ALU_SRC_HW_ALU_ODD
= 237,
272 ALU_SRC_LOOP_IDX
= 238,
273 ALU_SRC_PARAM_BASE_ADDR
= 240,
274 ALU_SRC_NEW_PRIM_MASK
= 241,
275 ALU_SRC_PRIM_MASK_HI
= 242,
276 ALU_SRC_PRIM_MASK_LO
= 243,
277 ALU_SRC_1_DBL_L
= 244,
278 ALU_SRC_1_DBL_M
= 245,
279 ALU_SRC_0_5_DBL_L
= 246,
280 ALU_SRC_0_5_DBL_M
= 247,
284 ALU_SRC_M_1_INT
= 251,
286 ALU_SRC_LITERAL
= 253,
290 ALU_SRC_PARAM_OFFSET
= 448
293 enum alu_predicate_select
309 enum alu_index_mode
{
317 INDEX_GLOBAL_AR_X
= 6
320 enum alu_cayman_mova_dst
{
325 CM_MOVADST_CG0
, // clause-global byte 0
331 enum alu_cayman_exec_mask_op
{
355 enum alu_kcache_mode
{
362 enum alu_kcache_index_mode
{
399 enum sched_queue_id
{
415 literal(int32_t i
= 0) : i(i
) {}
416 literal(uint32_t u
) : u(u
) {}
417 literal(float f
) : f(f
) {}
418 literal(double f
) : f(f
) {}
419 operator uint32_t() const { return u
; }
420 bool operator ==(literal l
) { return u
== l
.u
; }
421 bool operator ==(int v_int
) { return i
== v_int
; }
422 bool operator ==(unsigned v_uns
) { return u
== v_uns
; }
432 // TODO optimize bc structures
441 const cf_op_info
* op_ptr
;
446 unsigned alt_const
:1;
447 unsigned uses_waterfall
:1;
451 unsigned pop_count
:3;
452 unsigned call_count
:6;
453 unsigned whole_quad_mode
:1;
454 unsigned valid_pixel_mode
:1;
456 unsigned jumptable_sel
:3;
459 unsigned end_of_program
:1;
461 unsigned array_base
:13;
462 unsigned elem_size
:2;
463 unsigned index_gpr
:7;
468 unsigned burst_count
:4;
472 unsigned array_size
:12;
473 unsigned comp_mask
:4;
477 unsigned rat_index_mode
:2;
479 void set_op(unsigned op
) { this->op
= op
; op_ptr
= r600_isa_cf(op
); }
481 bool is_alu_extended() {
482 assert(op_ptr
->flags
& CF_ALU
);
483 return kc
[2].mode
!= KC_LOCK_NONE
|| kc
[3].mode
!= KC_LOCK_NONE
||
484 kc
[0].index_mode
!= KC_INDEX_NONE
|| kc
[1].index_mode
!= KC_INDEX_NONE
||
485 kc
[2].index_mode
!= KC_INDEX_NONE
|| kc
[3].index_mode
!= KC_INDEX_NONE
;
500 const alu_op_info
* op_ptr
;
510 unsigned bank_swizzle
:3;
512 unsigned index_mode
:3;
516 unsigned fog_merge
:1;
517 unsigned write_mask
:1;
518 unsigned update_exec_mask
:1;
519 unsigned update_pred
:1;
523 unsigned lds_idx_offset
:6;
525 alu_op_flags slot_flags
;
527 void set_op(unsigned op
) {
529 op_ptr
= r600_isa_alu(op
);
534 const fetch_op_info
* op_ptr
;
537 unsigned bc_frac_mode
:1;
538 unsigned fetch_whole_quad
:1;
539 unsigned resource_id
:8;
543 unsigned src_rel_global
:1; /* for GDS ops */
548 unsigned dst_rel_global
:1; /* for GDS ops */
551 unsigned alt_const
:1;
554 unsigned resource_index_mode
:2;
555 unsigned sampler_index_mode
:2;
557 unsigned coord_type
[4];
562 unsigned sampler_id
:5;
565 unsigned fetch_type
:2;
566 unsigned mega_fetch_count
:6;
567 unsigned coalesced_read
:1;
568 unsigned structured_read
:2;
571 unsigned data_format
:6;
572 unsigned format_comp_all
:1;
573 unsigned num_format_all
:2;
574 unsigned semantic_id
:8;
575 unsigned srf_mode_all
:1;
576 unsigned use_const_fields
:1;
578 unsigned const_buf_no_stride
:1;
579 unsigned endian_swap
:2;
580 unsigned mega_fetch
:1;
582 unsigned src2_gpr
:7; /* for GDS */
583 void set_op(unsigned op
) { this->op
= op
; op_ptr
= r600_isa_fetch(op
); }
586 struct shader_stats
{
591 unsigned cf
; // clause instructions not included
593 unsigned alu_clauses
;
594 unsigned fetch_clauses
;
598 unsigned shaders
; // number of shaders (for accumulated stats)
600 shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
601 fetch_clauses(), fetch(), alu_groups(), shaders() {}
603 void collect(node
*n
);
604 void accumulate(shader_stats
&s
);
606 void dump_diff(shader_stats
&s
);
613 shader_stats src_stats
, opt_stats
;
618 sb_hw_class hw_class
;
620 unsigned alu_temp_gprs
;
623 unsigned vtx_src_num
;
627 bool r6xx_gpr_index_workaround
;
629 bool stack_workaround_8xx
;
630 bool stack_workaround_9xx
;
632 unsigned wavefront_size
;
633 unsigned stack_entry_size
;
635 static unsigned dump_pass
;
636 static unsigned dump_stat
;
638 static unsigned dry_run
;
639 static unsigned no_fallback
;
640 static unsigned safe_math
;
642 static unsigned dskip_start
;
643 static unsigned dskip_end
;
644 static unsigned dskip_mode
;
646 sb_context() : src_stats(), opt_stats(), isa(0),
647 hw_chip(HW_CHIP_UNKNOWN
), hw_class(HW_CLASS_UNKNOWN
) {}
649 int init(r600_isa
*isa
, sb_hw_chip chip
, sb_hw_class cclass
);
651 bool is_r600() {return hw_class
== HW_CLASS_R600
;}
652 bool is_r700() {return hw_class
== HW_CLASS_R700
;}
653 bool is_evergreen() {return hw_class
== HW_CLASS_EVERGREEN
;}
654 bool is_cayman() {return hw_class
== HW_CLASS_CAYMAN
;}
655 bool is_egcm() {return hw_class
>= HW_CLASS_EVERGREEN
;}
657 bool needs_8xx_stack_workaround() {
662 case HW_CHIP_CYPRESS
:
663 case HW_CHIP_JUNIPER
:
670 bool needs_9xx_stack_workaround() {
674 sb_hw_class_bits
hw_class_bit() {
676 case HW_CLASS_R600
:return HB_R6
;
677 case HW_CLASS_R700
:return HB_R7
;
678 case HW_CLASS_EVERGREEN
:return HB_EG
;
679 case HW_CLASS_CAYMAN
:return HB_CM
;
680 default: assert(!"unknown hw class"); return (sb_hw_class_bits
)0;
685 unsigned cf_opcode(unsigned op
) {
686 return r600_isa_cf_opcode(isa
->hw_class
, op
);
689 unsigned alu_opcode(unsigned op
) {
690 return r600_isa_alu_opcode(isa
->hw_class
, op
);
693 unsigned alu_slots(unsigned op
) {
694 return r600_isa_alu_slots(isa
->hw_class
, op
);
697 unsigned alu_slots(const alu_op_info
* op_ptr
) {
698 return op_ptr
->slots
[isa
->hw_class
];
701 unsigned alu_slots_mask(const alu_op_info
* op_ptr
) {
703 unsigned slot_flags
= alu_slots(op_ptr
);
704 if (slot_flags
& AF_V
)
706 if (!is_cayman() && (slot_flags
& AF_S
))
711 unsigned fetch_opcode(unsigned op
) {
712 return r600_isa_fetch_opcode(isa
->hw_class
, op
);
715 bool is_kcache_sel(unsigned sel
) {
716 return ((sel
>= 128 && sel
< 192) || (sel
>= 256 && sel
< 320));
719 const char * get_hw_class_name();
720 const char * get_hw_chip_name();
724 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
725 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
736 bc_decoder(sb_context
&sctx
, uint32_t *data
, unsigned size
)
737 : ctx(sctx
), dw(data
), ndw(size
) {}
739 int decode_cf(unsigned &i
, bc_cf
&bc
);
740 int decode_alu(unsigned &i
, bc_alu
&bc
);
741 int decode_fetch(unsigned &i
, bc_fetch
&bc
);
744 int decode_cf_alu(unsigned &i
, bc_cf
&bc
);
745 int decode_cf_exp(unsigned &i
, bc_cf
&bc
);
746 int decode_cf_mem(unsigned &i
, bc_cf
&bc
);
748 int decode_fetch_vtx(unsigned &i
, bc_fetch
&bc
);
749 int decode_fetch_gds(unsigned &i
, bc_fetch
&bc
);
752 // bytecode format definition
754 class hw_encoding_format
{
755 const sb_hw_class_bits hw_target
; //FIXME: debug - remove after testing
756 hw_encoding_format();
760 hw_encoding_format(sb_hw_class_bits hw
)
761 : hw_target(hw
), value(0) {}
762 hw_encoding_format(uint32_t v
, sb_hw_class_bits hw
)
763 : hw_target(hw
), value(v
) {}
764 uint32_t get_value(sb_hw_class_bits hw
) const {
765 assert((hw
& hw_target
) == hw
);
770 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \
771 class fmt##_##hwset : public hw_encoding_format {\
772 typedef fmt##_##hwset thistype; \
774 fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
775 fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
777 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
779 #define BC_FORMAT_END(fmt) };
781 // bytecode format field definition
783 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
784 thistype & name(unsigned v) { \
785 value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
788 unsigned get_##name() const { \
789 return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
792 #define BC_RSRVD(fmt, last_bit, first_bit)
794 // CLAMP macro defined elsewhere interferes with bytecode field name
796 #include "sb_bc_fmt_def.inc"
798 #undef BC_FORMAT_BEGIN
809 r600_shader
*pshader
;
820 alu_node
*slots
[2][5];
823 typedef std::vector
<cf_node
*> id_cf_map
;
826 typedef std::stack
<region_node
*> region_stack
;
827 region_stack loop_stack
;
831 // Note: currently relies on input emitting SET_CF in same basic block as uses
832 value
*cf_index_value
[2];
836 bc_parser(sb_context
&sctx
, r600_bytecode
*bc
, r600_shader
* pshader
) :
837 ctx(sctx
), dec(), bc(bc
), pshader(pshader
),
838 dw(), bc_ndw(), max_cf(),
839 sh(), error(), slots(), cgroup(),
840 cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
845 shader
* get_shader() { assert(!error
); return sh
; }
853 int decode_cf(unsigned &i
, bool &eop
);
855 int decode_alu_clause(cf_node
*cf
);
856 int decode_alu_group(cf_node
* cf
, unsigned &i
, unsigned &gcnt
);
858 int decode_fetch_clause(cf_node
*cf
);
861 int prepare_alu_clause(cf_node
*cf
);
862 int prepare_alu_group(cf_node
* cf
, alu_group_node
*g
);
863 int prepare_fetch_clause(cf_node
*cf
);
865 int prepare_loop(cf_node
*c
);
866 int prepare_if(cf_node
*c
);
868 void save_set_cf_index(value
*val
, unsigned idx
);
869 value
*get_cf_index_value(unsigned idx
);
870 void save_mova(alu_node
*mova
);
871 alu_node
*get_mova();
878 typedef std::vector
<uint32_t> bc_vector
;
879 sb_hw_class_bits hw_class_bit
;
887 bytecode(sb_hw_class_bits hw
, unsigned rdw
= 256)
888 : hw_class_bit(hw
), pos(0) { bc
.reserve(rdw
); }
890 unsigned ndw() { return bc
.size(); }
892 void write_data(uint32_t* dst
) {
893 std::copy(bc
.begin(), bc
.end(), dst
);
896 void align(unsigned a
) {
897 unsigned size
= bc
.size();
898 size
= (size
+ a
- 1) & ~(a
-1);
902 void set_size(unsigned sz
) {
903 assert(sz
>= bc
.size());
907 void seek(unsigned p
) {
916 unsigned get_pos() { return pos
; }
917 uint32_t *data() { return &bc
[0]; }
919 bytecode
& operator <<(uint32_t v
) {
928 bytecode
& operator <<(const hw_encoding_format
&e
) {
929 *this << e
.get_value(hw_class_bit
);
933 bytecode
& operator <<(const bytecode
&b
) {
934 bc
.insert(bc
.end(), b
.bc
.begin(), b
.bc
.end());
938 uint32_t at(unsigned dw_id
) { return bc
.at(dw_id
); }
950 bc_builder(shader
&s
);
952 bytecode
& get_bytecode() { assert(!error
); return bb
; }
956 int build_cf(cf_node
*n
);
958 int build_cf_alu(cf_node
*n
);
959 int build_cf_mem(cf_node
*n
);
960 int build_cf_exp(cf_node
*n
);
962 int build_alu_clause(cf_node
*n
);
963 int build_alu_group(alu_group_node
*n
);
964 int build_alu(alu_node
*n
);
966 int build_fetch_clause(cf_node
*n
);
967 int build_fetch_tex(fetch_node
*n
);
968 int build_fetch_vtx(fetch_node
*n
);
971 } // namespace r600_sb
973 #endif /* SB_BC_H_ */