2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
45 class hw_encoding_format
;
58 virtual void write(const char *s
) = 0;
60 sb_ostream
& operator <<(const char *s
) {
65 sb_ostream
& operator <<(const std::string
& s
) {
66 return *this << s
.c_str();
69 sb_ostream
& operator <<(void *p
) {
75 sb_ostream
& operator <<(char c
) {
81 sb_ostream
& operator <<(int n
) {
87 sb_ostream
& operator <<(unsigned n
) {
93 sb_ostream
& operator <<(double d
) {
95 snprintf(b
, 32, "%g", d
);
99 // print as field of specified width, right aligned
100 void print_w(int n
, int width
) {
102 sprintf(f
, "%%%dd", width
);
103 snprintf(b
, 256, f
, n
);
107 // print as field of specified width, left aligned
108 void print_wl(int n
, int width
) {
110 sprintf(f
, "%%-%dd", width
);
111 snprintf(b
, 256, f
, n
);
115 // print as field of specified width, left aligned
116 void print_wl(const std::string
&s
, int width
) {
119 while (l
++ < width
) {
124 // print int as field of specified width, right aligned, zero-padded
125 void print_zw(int n
, int width
) {
127 sprintf(f
, "%%0%dd", width
);
128 snprintf(b
, 256, f
, n
);
132 // print int as field of specified width, right aligned, zero-padded, hex
133 void print_zw_hex(int n
, int width
) {
135 sprintf(f
, "%%0%dx", width
);
136 snprintf(b
, 256, f
, n
);
141 class sb_ostringstream
: public sb_ostream
{
144 sb_ostringstream() : data() {}
146 virtual void write(const char *s
) {
150 void clear() { data
.clear(); }
152 const char* c_str() { return data
.c_str(); }
153 std::string
& str() { return data
; }
156 class sb_log
: public sb_ostream
{
159 sb_log() : o(stderr
) {}
161 virtual void write(const char *s
) {
182 enum sb_hw_class_bits
189 HB_R6R7
= (HB_R6
| HB_R7
),
190 HB_EGCM
= (HB_EG
| HB_CM
),
191 HB_R6R7EG
= (HB_R6
| HB_R7
| HB_EG
),
192 HB_R7EGCM
= (HB_R7
| HB_EG
| HB_CM
),
194 HB_ALL
= (HB_R6
| HB_R7
| HB_EG
| HB_CM
)
245 MAX_ALU_LITERALS
= 4,
254 ALU_SRC_LDS_OQ_A
= 219,
255 ALU_SRC_LDS_OQ_B
= 220,
256 ALU_SRC_LDS_OQ_A_POP
= 221,
257 ALU_SRC_LDS_OQ_B_POP
= 222,
258 ALU_SRC_LDS_DIRECT_A
= 223,
259 ALU_SRC_LDS_DIRECT_B
= 224,
260 ALU_SRC_TIME_HI
= 227,
261 ALU_SRC_TIME_LO
= 228,
262 ALU_SRC_MASK_HI
= 229,
263 ALU_SRC_MASK_LO
= 230,
264 ALU_SRC_HW_WAVE_ID
= 231,
265 ALU_SRC_SIMD_ID
= 232,
267 ALU_SRC_HW_THREADGRP_ID
= 234,
268 ALU_SRC_WAVE_ID_IN_GRP
= 235,
269 ALU_SRC_NUM_THREADGRP_WAVES
= 236,
270 ALU_SRC_HW_ALU_ODD
= 237,
271 ALU_SRC_LOOP_IDX
= 238,
272 ALU_SRC_PARAM_BASE_ADDR
= 240,
273 ALU_SRC_NEW_PRIM_MASK
= 241,
274 ALU_SRC_PRIM_MASK_HI
= 242,
275 ALU_SRC_PRIM_MASK_LO
= 243,
276 ALU_SRC_1_DBL_L
= 244,
277 ALU_SRC_1_DBL_M
= 245,
278 ALU_SRC_0_5_DBL_L
= 246,
279 ALU_SRC_0_5_DBL_M
= 247,
283 ALU_SRC_M_1_INT
= 251,
285 ALU_SRC_LITERAL
= 253,
289 ALU_SRC_PARAM_OFFSET
= 448
292 enum alu_predicate_select
308 enum alu_index_mode
{
316 INDEX_GLOBAL_AR_X
= 6
319 enum alu_cayman_mova_dst
{
324 CM_MOVADST_CG0
, // clause-global byte 0
330 enum alu_cayman_exec_mask_op
{
354 enum alu_kcache_mode
{
361 enum alu_kcache_index_mode
{
398 enum sched_queue_id
{
414 literal(int32_t i
= 0) : i(i
) {}
415 literal(uint32_t u
) : u(u
) {}
416 literal(float f
) : f(f
) {}
417 literal(double f
) : f(f
) {}
418 operator uint32_t() const { return u
; }
419 bool operator ==(literal l
) { return u
== l
.u
; }
420 bool operator ==(int v_int
) { return i
== v_int
; }
421 bool operator ==(unsigned v_uns
) { return u
== v_uns
; }
431 // TODO optimize bc structures
440 const cf_op_info
* op_ptr
;
445 unsigned alt_const
:1;
446 unsigned uses_waterfall
:1;
450 unsigned pop_count
:3;
451 unsigned call_count
:6;
452 unsigned whole_quad_mode
:1;
453 unsigned valid_pixel_mode
:1;
455 unsigned jumptable_sel
:3;
458 unsigned end_of_program
:1;
460 unsigned array_base
:13;
461 unsigned elem_size
:2;
462 unsigned index_gpr
:7;
467 unsigned burst_count
:4;
471 unsigned array_size
:12;
472 unsigned comp_mask
:4;
476 unsigned rat_index_mode
:2;
478 void set_op(unsigned op
) { this->op
= op
; op_ptr
= r600_isa_cf(op
); }
480 bool is_alu_extended() {
481 assert(op_ptr
->flags
& CF_ALU
);
482 return kc
[2].mode
!= KC_LOCK_NONE
|| kc
[3].mode
!= KC_LOCK_NONE
;
497 const alu_op_info
* op_ptr
;
507 unsigned bank_swizzle
:3;
509 unsigned index_mode
:3;
513 unsigned fog_merge
:1;
514 unsigned write_mask
:1;
515 unsigned update_exec_mask
:1;
516 unsigned update_pred
:1;
520 alu_op_flags slot_flags
;
522 void set_op(unsigned op
) {
524 op_ptr
= r600_isa_alu(op
);
529 const fetch_op_info
* op_ptr
;
532 unsigned bc_frac_mode
:1;
533 unsigned fetch_whole_quad
:1;
534 unsigned resource_id
:8;
544 unsigned alt_const
:1;
547 unsigned resource_index_mode
:2;
548 unsigned sampler_index_mode
:2;
550 unsigned coord_type
[4];
555 unsigned sampler_id
:5;
558 unsigned fetch_type
:2;
559 unsigned mega_fetch_count
:6;
560 unsigned coalesced_read
:1;
561 unsigned structured_read
:2;
564 unsigned data_format
:6;
565 unsigned format_comp_all
:1;
566 unsigned num_format_all
:2;
567 unsigned semantic_id
:8;
568 unsigned srf_mode_all
:1;
569 unsigned use_const_fields
:1;
571 unsigned const_buf_no_stride
:1;
572 unsigned endian_swap
:2;
573 unsigned mega_fetch
:1;
575 void set_op(unsigned op
) { this->op
= op
; op_ptr
= r600_isa_fetch(op
); }
578 struct shader_stats
{
583 unsigned cf
; // clause instructions not included
585 unsigned alu_clauses
;
586 unsigned fetch_clauses
;
590 unsigned shaders
; // number of shaders (for accumulated stats)
592 shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
593 fetch_clauses(), fetch(), alu_groups(), shaders() {}
595 void collect(node
*n
);
596 void accumulate(shader_stats
&s
);
598 void dump_diff(shader_stats
&s
);
605 shader_stats src_stats
, opt_stats
;
610 sb_hw_class hw_class
;
612 unsigned alu_temp_gprs
;
615 unsigned vtx_src_num
;
619 bool stack_workaround_8xx
;
620 bool stack_workaround_9xx
;
622 unsigned wavefront_size
;
623 unsigned stack_entry_size
;
625 static unsigned dump_pass
;
626 static unsigned dump_stat
;
628 static unsigned dry_run
;
629 static unsigned no_fallback
;
630 static unsigned safe_math
;
632 static unsigned dskip_start
;
633 static unsigned dskip_end
;
634 static unsigned dskip_mode
;
636 sb_context() : src_stats(), opt_stats(), isa(0),
637 hw_chip(HW_CHIP_UNKNOWN
), hw_class(HW_CLASS_UNKNOWN
) {}
639 int init(r600_isa
*isa
, sb_hw_chip chip
, sb_hw_class cclass
);
641 bool is_r600() {return hw_class
== HW_CLASS_R600
;}
642 bool is_r700() {return hw_class
== HW_CLASS_R700
;}
643 bool is_evergreen() {return hw_class
== HW_CLASS_EVERGREEN
;}
644 bool is_cayman() {return hw_class
== HW_CLASS_CAYMAN
;}
645 bool is_egcm() {return hw_class
>= HW_CLASS_EVERGREEN
;}
647 bool needs_8xx_stack_workaround() {
652 case HW_CHIP_CYPRESS
:
653 case HW_CHIP_JUNIPER
:
660 bool needs_9xx_stack_workaround() {
664 sb_hw_class_bits
hw_class_bit() {
666 case HW_CLASS_R600
:return HB_R6
;
667 case HW_CLASS_R700
:return HB_R7
;
668 case HW_CLASS_EVERGREEN
:return HB_EG
;
669 case HW_CLASS_CAYMAN
:return HB_CM
;
670 default: assert(!"unknown hw class"); return (sb_hw_class_bits
)0;
675 unsigned cf_opcode(unsigned op
) {
676 return r600_isa_cf_opcode(isa
->hw_class
, op
);
679 unsigned alu_opcode(unsigned op
) {
680 return r600_isa_alu_opcode(isa
->hw_class
, op
);
683 unsigned alu_slots(unsigned op
) {
684 return r600_isa_alu_slots(isa
->hw_class
, op
);
687 unsigned alu_slots(const alu_op_info
* op_ptr
) {
688 return op_ptr
->slots
[isa
->hw_class
];
691 unsigned alu_slots_mask(const alu_op_info
* op_ptr
) {
693 unsigned slot_flags
= alu_slots(op_ptr
);
694 if (slot_flags
& AF_V
)
696 if (!is_cayman() && (slot_flags
& AF_S
))
701 unsigned fetch_opcode(unsigned op
) {
702 return r600_isa_fetch_opcode(isa
->hw_class
, op
);
705 bool is_kcache_sel(unsigned sel
) {
706 return ((sel
>= 128 && sel
< 192) || (sel
>= 256 && sel
< 320));
709 const char * get_hw_class_name();
710 const char * get_hw_chip_name();
714 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
715 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
726 bc_decoder(sb_context
&sctx
, uint32_t *data
, unsigned size
)
727 : ctx(sctx
), dw(data
), ndw(size
) {}
729 int decode_cf(unsigned &i
, bc_cf
&bc
);
730 int decode_alu(unsigned &i
, bc_alu
&bc
);
731 int decode_fetch(unsigned &i
, bc_fetch
&bc
);
734 int decode_cf_alu(unsigned &i
, bc_cf
&bc
);
735 int decode_cf_exp(unsigned &i
, bc_cf
&bc
);
736 int decode_cf_mem(unsigned &i
, bc_cf
&bc
);
738 int decode_fetch_vtx(unsigned &i
, bc_fetch
&bc
);
741 // bytecode format definition
743 class hw_encoding_format
{
744 const sb_hw_class_bits hw_target
; //FIXME: debug - remove after testing
745 hw_encoding_format();
749 hw_encoding_format(sb_hw_class_bits hw
)
750 : hw_target(hw
), value(0) {}
751 hw_encoding_format(uint32_t v
, sb_hw_class_bits hw
)
752 : hw_target(hw
), value(v
) {}
753 uint32_t get_value(sb_hw_class_bits hw
) const {
754 assert((hw
& hw_target
) == hw
);
759 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \
760 class fmt##_##hwset : public hw_encoding_format {\
761 typedef fmt##_##hwset thistype; \
763 fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
764 fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
766 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
768 #define BC_FORMAT_END(fmt) };
770 // bytecode format field definition
772 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
773 thistype & name(unsigned v) { \
774 value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
777 unsigned get_##name() const { \
778 return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
781 #define BC_RSRVD(fmt, last_bit, first_bit)
783 // CLAMP macro defined elsewhere interferes with bytecode field name
786 #include "sb_bc_fmt_def.inc"
788 #undef BC_FORMAT_BEGIN
799 r600_shader
*pshader
;
810 alu_node
*slots
[2][5];
813 typedef std::vector
<cf_node
*> id_cf_map
;
816 typedef std::stack
<region_node
*> region_stack
;
817 region_stack loop_stack
;
823 bc_parser(sb_context
&sctx
, r600_bytecode
*bc
, r600_shader
* pshader
) :
824 ctx(sctx
), dec(), bc(bc
), pshader(pshader
),
825 dw(), bc_ndw(), max_cf(),
826 sh(), error(), slots(), cgroup(),
827 cf_map(), loop_stack(), gpr_reladdr() { }
832 shader
* get_shader() { assert(!error
); return sh
; }
840 int decode_cf(unsigned &i
, bool &eop
);
842 int decode_alu_clause(cf_node
*cf
);
843 int decode_alu_group(cf_node
* cf
, unsigned &i
, unsigned &gcnt
);
845 int decode_fetch_clause(cf_node
*cf
);
848 int prepare_alu_clause(cf_node
*cf
);
849 int prepare_alu_group(cf_node
* cf
, alu_group_node
*g
);
850 int prepare_fetch_clause(cf_node
*cf
);
852 int prepare_loop(cf_node
*c
);
853 int prepare_if(cf_node
*c
);
861 typedef std::vector
<uint32_t> bc_vector
;
862 sb_hw_class_bits hw_class_bit
;
870 bytecode(sb_hw_class_bits hw
, unsigned rdw
= 256)
871 : hw_class_bit(hw
), pos(0) { bc
.reserve(rdw
); }
873 unsigned ndw() { return bc
.size(); }
875 void write_data(uint32_t* dst
) {
876 std::copy(bc
.begin(), bc
.end(), dst
);
879 void align(unsigned a
) {
880 unsigned size
= bc
.size();
881 size
= (size
+ a
- 1) & ~(a
-1);
885 void set_size(unsigned sz
) {
886 assert(sz
>= bc
.size());
890 void seek(unsigned p
) {
899 unsigned get_pos() { return pos
; }
900 uint32_t *data() { return &bc
[0]; }
902 bytecode
& operator <<(uint32_t v
) {
911 bytecode
& operator <<(const hw_encoding_format
&e
) {
912 *this << e
.get_value(hw_class_bit
);
916 bytecode
& operator <<(const bytecode
&b
) {
917 bc
.insert(bc
.end(), b
.bc
.begin(), b
.bc
.end());
921 uint32_t at(unsigned dw_id
) { return bc
.at(dw_id
); }
933 bc_builder(shader
&s
);
935 bytecode
& get_bytecode() { assert(!error
); return bb
; }
939 int build_cf(cf_node
*n
);
941 int build_cf_alu(cf_node
*n
);
942 int build_cf_mem(cf_node
*n
);
943 int build_cf_exp(cf_node
*n
);
945 int build_alu_clause(cf_node
*n
);
946 int build_alu_group(alu_group_node
*n
);
947 int build_alu(alu_node
*n
);
949 int build_fetch_clause(cf_node
*n
);
950 int build_fetch_tex(fetch_node
*n
);
951 int build_fetch_vtx(fetch_node
*n
);
954 } // namespace r600_sb
956 #endif /* SB_BC_H_ */