2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 #define PACKED __attribute__((__packed__))
34 /* size of largest OPC field of all the instruction categories: */
37 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
43 OPC_JUMP
= _OPC(0, 2),
44 OPC_CALL
= _OPC(0, 3),
46 OPC_KILL
= _OPC(0, 5),
48 OPC_EMIT
= _OPC(0, 7),
50 OPC_CHMASK
= _OPC(0, 9),
51 OPC_CHSH
= _OPC(0, 10),
52 OPC_FLOW_REV
= _OPC(0, 11),
58 OPC_ADD_F
= _OPC(2, 0),
59 OPC_MIN_F
= _OPC(2, 1),
60 OPC_MAX_F
= _OPC(2, 2),
61 OPC_MUL_F
= _OPC(2, 3),
62 OPC_SIGN_F
= _OPC(2, 4),
63 OPC_CMPS_F
= _OPC(2, 5),
64 OPC_ABSNEG_F
= _OPC(2, 6),
65 OPC_CMPV_F
= _OPC(2, 7),
67 OPC_FLOOR_F
= _OPC(2, 9),
68 OPC_CEIL_F
= _OPC(2, 10),
69 OPC_RNDNE_F
= _OPC(2, 11),
70 OPC_RNDAZ_F
= _OPC(2, 12),
71 OPC_TRUNC_F
= _OPC(2, 13),
73 OPC_ADD_U
= _OPC(2, 16),
74 OPC_ADD_S
= _OPC(2, 17),
75 OPC_SUB_U
= _OPC(2, 18),
76 OPC_SUB_S
= _OPC(2, 19),
77 OPC_CMPS_U
= _OPC(2, 20),
78 OPC_CMPS_S
= _OPC(2, 21),
79 OPC_MIN_U
= _OPC(2, 22),
80 OPC_MIN_S
= _OPC(2, 23),
81 OPC_MAX_U
= _OPC(2, 24),
82 OPC_MAX_S
= _OPC(2, 25),
83 OPC_ABSNEG_S
= _OPC(2, 26),
85 OPC_AND_B
= _OPC(2, 28),
86 OPC_OR_B
= _OPC(2, 29),
87 OPC_NOT_B
= _OPC(2, 30),
88 OPC_XOR_B
= _OPC(2, 31),
90 OPC_CMPV_U
= _OPC(2, 33),
91 OPC_CMPV_S
= _OPC(2, 34),
93 OPC_MUL_U
= _OPC(2, 48),
94 OPC_MUL_S
= _OPC(2, 49),
95 OPC_MULL_U
= _OPC(2, 50),
96 OPC_BFREV_B
= _OPC(2, 51),
97 OPC_CLZ_S
= _OPC(2, 52),
98 OPC_CLZ_B
= _OPC(2, 53),
99 OPC_SHL_B
= _OPC(2, 54),
100 OPC_SHR_B
= _OPC(2, 55),
101 OPC_ASHR_B
= _OPC(2, 56),
102 OPC_BARY_F
= _OPC(2, 57),
103 OPC_MGEN_B
= _OPC(2, 58),
104 OPC_GETBIT_B
= _OPC(2, 59),
105 OPC_SETRM
= _OPC(2, 60),
106 OPC_CBITS_B
= _OPC(2, 61),
107 OPC_SHB
= _OPC(2, 62),
108 OPC_MSAD
= _OPC(2, 63),
111 OPC_MAD_U16
= _OPC(3, 0),
112 OPC_MADSH_U16
= _OPC(3, 1),
113 OPC_MAD_S16
= _OPC(3, 2),
114 OPC_MADSH_M16
= _OPC(3, 3), /* should this be .s16? */
115 OPC_MAD_U24
= _OPC(3, 4),
116 OPC_MAD_S24
= _OPC(3, 5),
117 OPC_MAD_F16
= _OPC(3, 6),
118 OPC_MAD_F32
= _OPC(3, 7),
119 OPC_SEL_B16
= _OPC(3, 8),
120 OPC_SEL_B32
= _OPC(3, 9),
121 OPC_SEL_S16
= _OPC(3, 10),
122 OPC_SEL_S32
= _OPC(3, 11),
123 OPC_SEL_F16
= _OPC(3, 12),
124 OPC_SEL_F32
= _OPC(3, 13),
125 OPC_SAD_S16
= _OPC(3, 14),
126 OPC_SAD_S32
= _OPC(3, 15),
129 OPC_RCP
= _OPC(4, 0),
130 OPC_RSQ
= _OPC(4, 1),
131 OPC_LOG2
= _OPC(4, 2),
132 OPC_EXP2
= _OPC(4, 3),
133 OPC_SIN
= _OPC(4, 4),
134 OPC_COS
= _OPC(4, 5),
135 OPC_SQRT
= _OPC(4, 6),
139 OPC_ISAM
= _OPC(5, 0),
140 OPC_ISAML
= _OPC(5, 1),
141 OPC_ISAMM
= _OPC(5, 2),
142 OPC_SAM
= _OPC(5, 3),
143 OPC_SAMB
= _OPC(5, 4),
144 OPC_SAML
= _OPC(5, 5),
145 OPC_SAMGQ
= _OPC(5, 6),
146 OPC_GETLOD
= _OPC(5, 7),
147 OPC_CONV
= _OPC(5, 8),
148 OPC_CONVM
= _OPC(5, 9),
149 OPC_GETSIZE
= _OPC(5, 10),
150 OPC_GETBUF
= _OPC(5, 11),
151 OPC_GETPOS
= _OPC(5, 12),
152 OPC_GETINFO
= _OPC(5, 13),
153 OPC_DSX
= _OPC(5, 14),
154 OPC_DSY
= _OPC(5, 15),
155 OPC_GATHER4R
= _OPC(5, 16),
156 OPC_GATHER4G
= _OPC(5, 17),
157 OPC_GATHER4B
= _OPC(5, 18),
158 OPC_GATHER4A
= _OPC(5, 19),
159 OPC_SAMGP0
= _OPC(5, 20),
160 OPC_SAMGP1
= _OPC(5, 21),
161 OPC_SAMGP2
= _OPC(5, 22),
162 OPC_SAMGP3
= _OPC(5, 23),
163 OPC_DSXPP_1
= _OPC(5, 24),
164 OPC_DSYPP_1
= _OPC(5, 25),
165 OPC_RGETPOS
= _OPC(5, 26),
166 OPC_RGETINFO
= _OPC(5, 27),
169 OPC_LDG
= _OPC(6, 0), /* load-global */
170 OPC_LDL
= _OPC(6, 1),
171 OPC_LDP
= _OPC(6, 2),
172 OPC_STG
= _OPC(6, 3), /* store-global */
173 OPC_STL
= _OPC(6, 4),
174 OPC_STP
= _OPC(6, 5),
175 OPC_STI
= _OPC(6, 6),
176 OPC_G2L
= _OPC(6, 7),
177 OPC_L2G
= _OPC(6, 8),
178 OPC_PREFETCH
= _OPC(6, 9),
179 OPC_LDLW
= _OPC(6, 10),
180 OPC_STLW
= _OPC(6, 11),
181 OPC_RESFMT
= _OPC(6, 14),
182 OPC_RESINFO
= _OPC(6, 15),
183 OPC_ATOMIC_ADD
= _OPC(6, 16),
184 OPC_ATOMIC_SUB
= _OPC(6, 17),
185 OPC_ATOMIC_XCHG
= _OPC(6, 18),
186 OPC_ATOMIC_INC
= _OPC(6, 19),
187 OPC_ATOMIC_DEC
= _OPC(6, 20),
188 OPC_ATOMIC_CMPXCHG
= _OPC(6, 21),
189 OPC_ATOMIC_MIN
= _OPC(6, 22),
190 OPC_ATOMIC_MAX
= _OPC(6, 23),
191 OPC_ATOMIC_AND
= _OPC(6, 24),
192 OPC_ATOMIC_OR
= _OPC(6, 25),
193 OPC_ATOMIC_XOR
= _OPC(6, 26),
194 OPC_LDGB
= _OPC(6, 27),
195 OPC_STGB
= _OPC(6, 28),
196 OPC_STIB
= _OPC(6, 29),
197 OPC_LDC
= _OPC(6, 30),
198 OPC_LDLV
= _OPC(6, 31),
201 OPC_BAR
= _OPC(7, 0),
202 OPC_FENCE
= _OPC(7, 1),
204 /* meta instructions (category -1): */
205 /* placeholder instr to mark shader inputs: */
206 OPC_META_INPUT
= _OPC(-1, 0),
207 /* The "fan-in" and "fan-out" instructions are used for keeping
208 * track of instructions that write to multiple dst registers
209 * (fan-out) like texture sample instructions, or read multiple
210 * consecutive scalar registers (fan-in) (bary.f, texture samp)
212 OPC_META_FO
= _OPC(-1, 2),
213 OPC_META_FI
= _OPC(-1, 3),
217 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
218 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
228 TYPE_S8
= 7, // XXX I assume?
231 static inline uint32_t type_size(type_t type
)
246 assert(0); /* invalid type */
251 static inline int type_float(type_t type
)
253 return (type
== TYPE_F32
) || (type
== TYPE_F16
);
256 static inline int type_uint(type_t type
)
258 return (type
== TYPE_U32
) || (type
== TYPE_U16
) || (type
== TYPE_U8
);
261 static inline int type_sint(type_t type
)
263 return (type
== TYPE_S32
) || (type
== TYPE_S16
) || (type
== TYPE_S8
);
266 typedef union PACKED
{
267 /* normal gpr or const src register: */
272 /* for immediate val: */
273 int32_t iim_val
: 11;
274 /* to make compiler happy: */
276 uint32_t dummy10
: 10;
277 int32_t idummy10
: 10;
278 uint32_t dummy11
: 11;
279 uint32_t dummy12
: 12;
280 uint32_t dummy13
: 13;
284 /* special registers: */
285 #define REG_A0 61 /* address register */
286 #define REG_P0 62 /* predicate register */
288 static inline int reg_special(reg_t reg
)
290 return (reg
.num
== REG_A0
) || (reg
.num
== REG_P0
);
293 typedef struct PACKED
{
298 uint32_t dummy1
: 16;
302 uint32_t dummy1
: 12;
318 uint32_t jmp_tgt
: 1;
320 uint32_t opc_cat
: 3;
323 typedef struct PACKED
{
326 /* for normal src register: */
329 /* at least low bit of pad must be zero or it will
330 * look like a address relative src
334 /* for address relative: */
337 uint32_t src_rel_c
: 1;
338 uint32_t src_rel
: 1;
339 uint32_t unknown
: 20;
353 uint32_t dst_type
: 3;
354 uint32_t dst_rel
: 1;
355 uint32_t src_type
: 3;
359 uint32_t pos_inf
: 1;
360 uint32_t must_be_0
: 2;
361 uint32_t jmp_tgt
: 1;
363 uint32_t opc_cat
: 3;
366 typedef struct PACKED
{
371 uint32_t must_be_zero1
: 2;
372 uint32_t src1_im
: 1; /* immediate */
373 uint32_t src1_neg
: 1; /* negate */
374 uint32_t src1_abs
: 1; /* absolute value */
378 uint32_t src1_c
: 1; /* relative-const */
379 uint32_t src1_rel
: 1; /* relative address */
380 uint32_t must_be_zero
: 1;
385 uint32_t src1_c
: 1; /* const */
393 uint32_t must_be_zero2
: 2;
394 uint32_t src2_im
: 1; /* immediate */
395 uint32_t src2_neg
: 1; /* negate */
396 uint32_t src2_abs
: 1; /* absolute value */
400 uint32_t src2_c
: 1; /* relative-const */
401 uint32_t src2_rel
: 1; /* relative address */
402 uint32_t must_be_zero
: 1;
407 uint32_t src2_c
: 1; /* const */
416 uint32_t src1_r
: 1; /* doubles as nop0 if repeat==0 */
418 uint32_t ul
: 1; /* dunno */
419 uint32_t dst_half
: 1; /* or widen/narrow.. ie. dst hrN <-> rN */
422 uint32_t src2_r
: 1; /* doubles as nop1 if repeat==0 */
423 uint32_t full
: 1; /* not half */
425 uint32_t jmp_tgt
: 1;
427 uint32_t opc_cat
: 3;
430 typedef struct PACKED
{
435 uint32_t must_be_zero1
: 2;
437 uint32_t src1_neg
: 1;
438 uint32_t src2_r
: 1; /* doubles as nop1 if repeat==0 */
443 uint32_t src1_rel
: 1;
444 uint32_t must_be_zero
: 1;
457 uint32_t must_be_zero2
: 2;
459 uint32_t src2_neg
: 1;
460 uint32_t src3_neg
: 1;
465 uint32_t src3_rel
: 1;
466 uint32_t must_be_zero
: 1;
480 uint32_t src1_r
: 1; /* doubles as nop0 if repeat==0 */
483 uint32_t dst_half
: 1; /* or widen/narrow.. ie. dst hrN <-> rN */
486 uint32_t jmp_tgt
: 1;
488 uint32_t opc_cat
: 3;
491 static inline bool instr_cat3_full(instr_cat3_t
*cat3
)
493 switch (_OPC(3, cat3
->opc
)) {
501 case OPC_SAD_S32
: // really??
508 typedef struct PACKED
{
513 uint32_t must_be_zero1
: 2;
514 uint32_t src_im
: 1; /* immediate */
515 uint32_t src_neg
: 1; /* negate */
516 uint32_t src_abs
: 1; /* absolute value */
520 uint32_t src_c
: 1; /* relative-const */
521 uint32_t src_rel
: 1; /* relative address */
522 uint32_t must_be_zero
: 1;
527 uint32_t src_c
: 1; /* const */
531 uint32_t dummy1
: 16; /* seem to be ignored */
540 uint32_t dst_half
: 1; /* or widen/narrow.. ie. dst hrN <-> rN */
541 uint32_t dummy2
: 5; /* seem to be ignored */
542 uint32_t full
: 1; /* not half */
544 uint32_t jmp_tgt
: 1;
546 uint32_t opc_cat
: 3;
549 typedef struct PACKED
{
554 uint32_t full
: 1; /* not half */
557 uint32_t dummy1
: 4; /* seem to be ignored */
563 uint32_t full
: 1; /* not half */
570 /* same in either case: */
571 // XXX I think, confirm this
573 uint32_t full
: 1; /* not half */
581 uint32_t wrmask
: 4; /* write-mask */
583 uint32_t dummy2
: 1; /* seems to be ignored */
588 uint32_t is_s2en
: 1;
593 uint32_t jmp_tgt
: 1;
595 uint32_t opc_cat
: 3;
598 /* dword0 encoding for src_off: [src1 + off], src2: */
599 typedef struct PACKED
{
601 uint32_t mustbe1
: 1;
604 uint32_t src1_im
: 1;
605 uint32_t src2_im
: 1;
612 /* dword0 encoding for !src_off: [src1], src2 */
613 typedef struct PACKED
{
615 uint32_t mustbe0
: 1;
617 uint32_t ignore0
: 8;
618 uint32_t src1_im
: 1;
619 uint32_t src2_im
: 1;
626 /* dword1 encoding for dst_off: */
627 typedef struct PACKED
{
631 /* note: there is some weird stuff going on where sometimes
632 * cat6->a.off is involved.. but that seems like a bug in
633 * the blob, since it is used even if !cat6->src_off
634 * It would make sense for there to be some more bits to
635 * bring us to 11 bits worth of offset, but not sure..
638 uint32_t mustbe1
: 1;
643 /* dword1 encoding for !dst_off: */
644 typedef struct PACKED
{
649 uint32_t mustbe0
: 1;
654 /* ldgb and atomics..
656 * ldgb: pad0=0, pad3=1
657 * atomic .g: pad0=1, pad3=1
660 typedef struct PACKED
{
666 uint32_t type_size
: 2;
668 uint32_t src1_im
: 1;
669 uint32_t src2_im
: 1;
674 uint32_t mustbe0
: 1;
675 uint32_t src_ssbo
: 8;
676 uint32_t pad2
: 3; // type
679 uint32_t pad4
: 10; // opc/jmp_tgt/sync/opc_cat
682 /* stgb, pad0=0, pad3=2
684 typedef struct PACKED
{
686 uint32_t mustbe1
: 1; // ???
690 uint32_t type_size
: 2;
692 uint32_t src2_im
: 1;
697 uint32_t src3_im
: 1;
698 uint32_t dst_ssbo
: 8;
699 uint32_t pad2
: 3; // type
701 uint32_t pad4
: 10; // opc/jmp_tgt/sync/opc_cat
704 typedef union PACKED
{
709 instr_cat6ldgb_t ldgb
;
710 instr_cat6stgb_t stgb
;
713 uint32_t src_off
: 1;
718 uint32_t dst_off
: 1;
721 uint32_t g
: 1; /* or in some cases it means dst immed */
724 uint32_t jmp_tgt
: 1;
726 uint32_t opc_cat
: 3;
731 * For atomic ops (which return a value):
733 * pad1=1, pad2=c, pad3=0, pad4=3
734 * src1 - vecN offset/coords
735 * src2.x - is actually dest register
736 * src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
737 * and src2.z is 'data'
739 * For stib (which does not return a value):
740 * pad1=0, pad2=c, pad3=0, pad4=2
741 * src1 - vecN offset/coords
742 * src2 - value to store
744 * for ldc (load from UBO using descriptor):
745 * pad1=0, pad2=8, pad3=0, pad4=2
747 typedef struct PACKED
{
752 uint32_t type_size
: 2;
755 uint32_t src1
: 8; /* coordinate/offset */
759 uint32_t pad3
: 1; //mustbe0 ?? or zero means imm vs reg for ssbo??
760 uint32_t ssbo
: 8; /* ssbo/image binding point */
763 uint32_t jmp_tgt
: 1;
765 uint32_t opc_cat
: 3;
768 typedef struct PACKED
{
774 uint32_t ss
: 1; /* maybe in the encoding, but blob only uses (sy) */
776 uint32_t w
: 1; /* write */
777 uint32_t r
: 1; /* read */
778 uint32_t l
: 1; /* local */
779 uint32_t g
: 1; /* global */
780 uint32_t opc
: 4; /* presumed, but only a couple known OPCs */
781 uint32_t jmp_tgt
: 1; /* (jp) */
782 uint32_t sync
: 1; /* (sy) */
783 uint32_t opc_cat
: 3;
786 typedef union PACKED
{
794 instr_cat6_a6xx_t cat6_a6xx
;
802 uint32_t ss
: 1; /* cat1-cat4 (cat0??) and cat7 (?) */
803 uint32_t ul
: 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
805 uint32_t jmp_tgt
: 1;
807 uint32_t opc_cat
: 3;
812 static inline uint32_t instr_repeat(instr_t
*instr
)
814 switch (instr
->opc_cat
) {
815 case 0: return instr
->cat0
.repeat
;
816 case 1: return instr
->cat1
.repeat
;
817 case 2: return instr
->cat2
.repeat
;
818 case 3: return instr
->cat3
.repeat
;
819 case 4: return instr
->cat4
.repeat
;
824 static inline bool instr_sat(instr_t
*instr
)
826 switch (instr
->opc_cat
) {
827 case 2: return instr
->cat2
.sat
;
828 case 3: return instr
->cat3
.sat
;
829 case 4: return instr
->cat4
.sat
;
830 default: return false;
834 static inline uint32_t instr_opc(instr_t
*instr
, unsigned gpu_id
)
836 switch (instr
->opc_cat
) {
837 case 0: return instr
->cat0
.opc
;
839 case 2: return instr
->cat2
.opc
;
840 case 3: return instr
->cat3
.opc
;
841 case 4: return instr
->cat4
.opc
;
842 case 5: return instr
->cat5
.opc
;
844 // TODO not sure if this is the best way to figure
845 // out if new vs old encoding, but it kinda seems
847 if ((gpu_id
>= 600) && (instr
->cat6
.opc
== 0))
848 return instr
->cat6_a6xx
.opc
;
849 return instr
->cat6
.opc
;
850 case 7: return instr
->cat7
.opc
;
855 static inline bool is_mad(opc_t opc
)
870 static inline bool is_madsh(opc_t opc
)
881 static inline bool is_atomic(opc_t opc
)
886 case OPC_ATOMIC_XCHG
:
889 case OPC_ATOMIC_CMPXCHG
:
901 static inline bool is_ssbo(opc_t opc
)
915 int disasm_a3xx(uint32_t *dwords
, int sizedwords
, int level
, FILE *out
, unsigned gpu_id
);
917 #endif /* INSTR_A3XX_H_ */