2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 #define PACKED __attribute__((__packed__))
34 void ir3_assert_handler(const char *expr
, const char *file
, int line
,
35 const char *func
) __attribute__((weak
)) __attribute__ ((__noreturn__
));
37 /* A wrapper for assert() that allows overriding handling of a failed
38 * assert. This is needed for tools like crashdec which can want to
39 * attempt to disassemble memory that might not actually be valid
42 #define ir3_assert(expr) do { \
44 if (ir3_assert_handler) { \
45 ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
50 /* size of largest OPC field of all the instruction categories: */
53 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
59 OPC_JUMP
= _OPC(0, 2),
60 OPC_CALL
= _OPC(0, 3),
62 OPC_KILL
= _OPC(0, 5),
64 OPC_EMIT
= _OPC(0, 7),
66 OPC_CHMASK
= _OPC(0, 9),
67 OPC_CHSH
= _OPC(0, 10),
68 OPC_FLOW_REV
= _OPC(0, 11),
70 OPC_BKT
= _OPC(0, 16),
71 OPC_STKS
= _OPC(0, 17),
72 OPC_STKR
= _OPC(0, 18),
73 OPC_XSET
= _OPC(0, 19),
74 OPC_XCLR
= _OPC(0, 20),
75 OPC_GETONE
= _OPC(0, 21),
76 OPC_DBG
= _OPC(0, 22),
77 OPC_SHPS
= _OPC(0, 23), /* shader prologue start */
78 OPC_SHPE
= _OPC(0, 24), /* shader prologue end */
80 OPC_PREDT
= _OPC(0, 29), /* predicated true */
81 OPC_PREDF
= _OPC(0, 30), /* predicated false */
82 OPC_PREDE
= _OPC(0, 31), /* predicated end */
88 OPC_ADD_F
= _OPC(2, 0),
89 OPC_MIN_F
= _OPC(2, 1),
90 OPC_MAX_F
= _OPC(2, 2),
91 OPC_MUL_F
= _OPC(2, 3),
92 OPC_SIGN_F
= _OPC(2, 4),
93 OPC_CMPS_F
= _OPC(2, 5),
94 OPC_ABSNEG_F
= _OPC(2, 6),
95 OPC_CMPV_F
= _OPC(2, 7),
97 OPC_FLOOR_F
= _OPC(2, 9),
98 OPC_CEIL_F
= _OPC(2, 10),
99 OPC_RNDNE_F
= _OPC(2, 11),
100 OPC_RNDAZ_F
= _OPC(2, 12),
101 OPC_TRUNC_F
= _OPC(2, 13),
102 /* 14-15 - invalid */
103 OPC_ADD_U
= _OPC(2, 16),
104 OPC_ADD_S
= _OPC(2, 17),
105 OPC_SUB_U
= _OPC(2, 18),
106 OPC_SUB_S
= _OPC(2, 19),
107 OPC_CMPS_U
= _OPC(2, 20),
108 OPC_CMPS_S
= _OPC(2, 21),
109 OPC_MIN_U
= _OPC(2, 22),
110 OPC_MIN_S
= _OPC(2, 23),
111 OPC_MAX_U
= _OPC(2, 24),
112 OPC_MAX_S
= _OPC(2, 25),
113 OPC_ABSNEG_S
= _OPC(2, 26),
115 OPC_AND_B
= _OPC(2, 28),
116 OPC_OR_B
= _OPC(2, 29),
117 OPC_NOT_B
= _OPC(2, 30),
118 OPC_XOR_B
= _OPC(2, 31),
120 OPC_CMPV_U
= _OPC(2, 33),
121 OPC_CMPV_S
= _OPC(2, 34),
122 /* 35-47 - invalid */
123 OPC_MUL_U24
= _OPC(2, 48), /* 24b mul into 32b result */
124 OPC_MUL_S24
= _OPC(2, 49), /* 24b mul into 32b result with sign extension */
125 OPC_MULL_U
= _OPC(2, 50),
126 OPC_BFREV_B
= _OPC(2, 51),
127 OPC_CLZ_S
= _OPC(2, 52),
128 OPC_CLZ_B
= _OPC(2, 53),
129 OPC_SHL_B
= _OPC(2, 54),
130 OPC_SHR_B
= _OPC(2, 55),
131 OPC_ASHR_B
= _OPC(2, 56),
132 OPC_BARY_F
= _OPC(2, 57),
133 OPC_MGEN_B
= _OPC(2, 58),
134 OPC_GETBIT_B
= _OPC(2, 59),
135 OPC_SETRM
= _OPC(2, 60),
136 OPC_CBITS_B
= _OPC(2, 61),
137 OPC_SHB
= _OPC(2, 62),
138 OPC_MSAD
= _OPC(2, 63),
141 OPC_MAD_U16
= _OPC(3, 0),
142 OPC_MADSH_U16
= _OPC(3, 1),
143 OPC_MAD_S16
= _OPC(3, 2),
144 OPC_MADSH_M16
= _OPC(3, 3), /* should this be .s16? */
145 OPC_MAD_U24
= _OPC(3, 4),
146 OPC_MAD_S24
= _OPC(3, 5),
147 OPC_MAD_F16
= _OPC(3, 6),
148 OPC_MAD_F32
= _OPC(3, 7),
149 OPC_SEL_B16
= _OPC(3, 8),
150 OPC_SEL_B32
= _OPC(3, 9),
151 OPC_SEL_S16
= _OPC(3, 10),
152 OPC_SEL_S32
= _OPC(3, 11),
153 OPC_SEL_F16
= _OPC(3, 12),
154 OPC_SEL_F32
= _OPC(3, 13),
155 OPC_SAD_S16
= _OPC(3, 14),
156 OPC_SAD_S32
= _OPC(3, 15),
159 OPC_RCP
= _OPC(4, 0),
160 OPC_RSQ
= _OPC(4, 1),
161 OPC_LOG2
= _OPC(4, 2),
162 OPC_EXP2
= _OPC(4, 3),
163 OPC_SIN
= _OPC(4, 4),
164 OPC_COS
= _OPC(4, 5),
165 OPC_SQRT
= _OPC(4, 6),
166 /* NOTE that these are 8+opc from their highp equivs, so it's possible
167 * that the high order bit in the opc field has been repurposed for
168 * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
169 * still use the same opc as highp
171 OPC_HRSQ
= _OPC(4, 9),
172 OPC_HLOG2
= _OPC(4, 10),
173 OPC_HEXP2
= _OPC(4, 11),
176 OPC_ISAM
= _OPC(5, 0),
177 OPC_ISAML
= _OPC(5, 1),
178 OPC_ISAMM
= _OPC(5, 2),
179 OPC_SAM
= _OPC(5, 3),
180 OPC_SAMB
= _OPC(5, 4),
181 OPC_SAML
= _OPC(5, 5),
182 OPC_SAMGQ
= _OPC(5, 6),
183 OPC_GETLOD
= _OPC(5, 7),
184 OPC_CONV
= _OPC(5, 8),
185 OPC_CONVM
= _OPC(5, 9),
186 OPC_GETSIZE
= _OPC(5, 10),
187 OPC_GETBUF
= _OPC(5, 11),
188 OPC_GETPOS
= _OPC(5, 12),
189 OPC_GETINFO
= _OPC(5, 13),
190 OPC_DSX
= _OPC(5, 14),
191 OPC_DSY
= _OPC(5, 15),
192 OPC_GATHER4R
= _OPC(5, 16),
193 OPC_GATHER4G
= _OPC(5, 17),
194 OPC_GATHER4B
= _OPC(5, 18),
195 OPC_GATHER4A
= _OPC(5, 19),
196 OPC_SAMGP0
= _OPC(5, 20),
197 OPC_SAMGP1
= _OPC(5, 21),
198 OPC_SAMGP2
= _OPC(5, 22),
199 OPC_SAMGP3
= _OPC(5, 23),
200 OPC_DSXPP_1
= _OPC(5, 24),
201 OPC_DSYPP_1
= _OPC(5, 25),
202 OPC_RGETPOS
= _OPC(5, 26),
203 OPC_RGETINFO
= _OPC(5, 27),
204 /* cat5 meta instructions, placed above the cat5 opc field's size */
205 OPC_DSXPP_MACRO
= _OPC(5, 32),
206 OPC_DSYPP_MACRO
= _OPC(5, 33),
209 OPC_LDG
= _OPC(6, 0), /* load-global */
210 OPC_LDL
= _OPC(6, 1),
211 OPC_LDP
= _OPC(6, 2),
212 OPC_STG
= _OPC(6, 3), /* store-global */
213 OPC_STL
= _OPC(6, 4),
214 OPC_STP
= _OPC(6, 5),
215 OPC_LDIB
= _OPC(6, 6),
216 OPC_G2L
= _OPC(6, 7),
217 OPC_L2G
= _OPC(6, 8),
218 OPC_PREFETCH
= _OPC(6, 9),
219 OPC_LDLW
= _OPC(6, 10),
220 OPC_STLW
= _OPC(6, 11),
221 OPC_RESFMT
= _OPC(6, 14),
222 OPC_RESINFO
= _OPC(6, 15),
223 OPC_ATOMIC_ADD
= _OPC(6, 16),
224 OPC_ATOMIC_SUB
= _OPC(6, 17),
225 OPC_ATOMIC_XCHG
= _OPC(6, 18),
226 OPC_ATOMIC_INC
= _OPC(6, 19),
227 OPC_ATOMIC_DEC
= _OPC(6, 20),
228 OPC_ATOMIC_CMPXCHG
= _OPC(6, 21),
229 OPC_ATOMIC_MIN
= _OPC(6, 22),
230 OPC_ATOMIC_MAX
= _OPC(6, 23),
231 OPC_ATOMIC_AND
= _OPC(6, 24),
232 OPC_ATOMIC_OR
= _OPC(6, 25),
233 OPC_ATOMIC_XOR
= _OPC(6, 26),
234 OPC_LDGB
= _OPC(6, 27),
235 OPC_STGB
= _OPC(6, 28),
236 OPC_STIB
= _OPC(6, 29),
237 OPC_LDC
= _OPC(6, 30),
238 OPC_LDLV
= _OPC(6, 31),
241 OPC_BAR
= _OPC(7, 0),
242 OPC_FENCE
= _OPC(7, 1),
244 /* meta instructions (category -1): */
245 /* placeholder instr to mark shader inputs: */
246 OPC_META_INPUT
= _OPC(-1, 0),
247 /* The "collect" and "split" instructions are used for keeping
248 * track of instructions that write to multiple dst registers
249 * (split) like texture sample instructions, or read multiple
250 * consecutive scalar registers (collect) (bary.f, texture samp)
252 * A "split" extracts a scalar component from a vecN, and a
253 * "collect" gathers multiple scalar components into a vecN
255 OPC_META_SPLIT
= _OPC(-1, 2),
256 OPC_META_COLLECT
= _OPC(-1, 3),
258 /* placeholder for texture fetches that run before FS invocation
261 OPC_META_TEX_PREFETCH
= _OPC(-1, 4),
265 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
266 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
268 const char *disasm_a3xx_instr_name(opc_t opc
);
278 TYPE_S8
= 7, // XXX I assume?
281 static inline uint32_t type_size(type_t type
)
296 ir3_assert(0); /* invalid type */
301 static inline int type_float(type_t type
)
303 return (type
== TYPE_F32
) || (type
== TYPE_F16
);
306 static inline int type_uint(type_t type
)
308 return (type
== TYPE_U32
) || (type
== TYPE_U16
) || (type
== TYPE_U8
);
311 static inline int type_sint(type_t type
)
313 return (type
== TYPE_S32
) || (type
== TYPE_S16
) || (type
== TYPE_S8
);
316 typedef union PACKED
{
317 /* normal gpr or const src register: */
322 /* for immediate val: */
323 int32_t iim_val
: 11;
324 /* to make compiler happy: */
326 uint32_t dummy10
: 10;
327 int32_t idummy10
: 10;
328 uint32_t dummy11
: 11;
329 uint32_t dummy12
: 12;
330 uint32_t dummy13
: 13;
332 int32_t idummy13
: 13;
342 static inline uint32_t regid(int num
, int comp
)
344 return (num
<< 2) | (comp
& 0x3);
347 #define INVALID_REG regid(63, 0)
348 #define VALIDREG(r) ((r) != INVALID_REG)
349 #define CONDREG(r, val) COND(VALIDREG(r), (val))
351 /* special registers: */
352 #define REG_A0 61 /* address register */
353 #define REG_P0 62 /* predicate register */
355 static inline int reg_special(reg_t reg
)
357 return (reg
.num
== REG_A0
) || (reg
.num
== REG_P0
);
361 BRANCH_PLAIN
= 0, /* br */
362 BRANCH_OR
= 1, /* brao */
363 BRANCH_AND
= 2, /* braa */
364 BRANCH_CONST
= 3, /* brac */
365 BRANCH_ANY
= 4, /* bany */
366 BRANCH_ALL
= 5, /* ball */
367 BRANCH_X
= 6, /* brax ??? */
370 typedef struct PACKED
{
375 uint32_t dummy1
: 16;
379 uint32_t dummy1
: 12;
387 uint32_t idx
: 5; /* brac.N index */
388 uint32_t brtype
: 3; /* branch type, see brtype_t */
395 uint32_t opc_hi
: 1; /* at least one bit */
398 uint32_t comp0
: 2; /* component for first src */
400 uint32_t jmp_tgt
: 1;
402 uint32_t opc_cat
: 3;
405 typedef struct PACKED
{
408 /* for normal src register: */
411 /* at least low bit of pad must be zero or it will
412 * look like a address relative src
416 /* for address relative: */
419 uint32_t src_rel_c
: 1;
420 uint32_t src_rel
: 1;
421 uint32_t unknown
: 20;
435 uint32_t dst_type
: 3;
436 uint32_t dst_rel
: 1;
437 uint32_t src_type
: 3;
441 uint32_t pos_inf
: 1;
442 uint32_t must_be_0
: 2;
443 uint32_t jmp_tgt
: 1;
445 uint32_t opc_cat
: 3;
448 typedef struct PACKED
{
453 uint32_t must_be_zero1
: 2;
454 uint32_t src1_im
: 1; /* immediate */
455 uint32_t src1_neg
: 1; /* negate */
456 uint32_t src1_abs
: 1; /* absolute value */
460 uint32_t src1_c
: 1; /* relative-const */
461 uint32_t src1_rel
: 1; /* relative address */
462 uint32_t must_be_zero
: 1;
467 uint32_t src1_c
: 1; /* const */
475 uint32_t must_be_zero2
: 2;
476 uint32_t src2_im
: 1; /* immediate */
477 uint32_t src2_neg
: 1; /* negate */
478 uint32_t src2_abs
: 1; /* absolute value */
482 uint32_t src2_c
: 1; /* relative-const */
483 uint32_t src2_rel
: 1; /* relative address */
484 uint32_t must_be_zero
: 1;
489 uint32_t src2_c
: 1; /* const */
498 uint32_t src1_r
: 1; /* doubles as nop0 if repeat==0 */
500 uint32_t ul
: 1; /* dunno */
501 uint32_t dst_half
: 1; /* or widen/narrow.. ie. dst hrN <-> rN */
504 uint32_t src2_r
: 1; /* doubles as nop1 if repeat==0 */
505 uint32_t full
: 1; /* not half */
507 uint32_t jmp_tgt
: 1;
509 uint32_t opc_cat
: 3;
512 typedef struct PACKED
{
517 uint32_t must_be_zero1
: 2;
519 uint32_t src1_neg
: 1;
520 uint32_t src2_r
: 1; /* doubles as nop1 if repeat==0 */
525 uint32_t src1_rel
: 1;
526 uint32_t must_be_zero
: 1;
539 uint32_t must_be_zero2
: 2;
541 uint32_t src2_neg
: 1;
542 uint32_t src3_neg
: 1;
547 uint32_t src3_rel
: 1;
548 uint32_t must_be_zero
: 1;
562 uint32_t src1_r
: 1; /* doubles as nop0 if repeat==0 */
565 uint32_t dst_half
: 1; /* or widen/narrow.. ie. dst hrN <-> rN */
568 uint32_t jmp_tgt
: 1;
570 uint32_t opc_cat
: 3;
573 static inline bool instr_cat3_full(instr_cat3_t
*cat3
)
575 switch (_OPC(3, cat3
->opc
)) {
583 case OPC_SAD_S32
: // really??
590 typedef struct PACKED
{
595 uint32_t must_be_zero1
: 2;
596 uint32_t src_im
: 1; /* immediate */
597 uint32_t src_neg
: 1; /* negate */
598 uint32_t src_abs
: 1; /* absolute value */
602 uint32_t src_c
: 1; /* relative-const */
603 uint32_t src_rel
: 1; /* relative address */
604 uint32_t must_be_zero
: 1;
609 uint32_t src_c
: 1; /* const */
613 uint32_t dummy1
: 16; /* seem to be ignored */
622 uint32_t dst_half
: 1; /* or widen/narrow.. ie. dst hrN <-> rN */
623 uint32_t dummy2
: 5; /* seem to be ignored */
624 uint32_t full
: 1; /* not half */
626 uint32_t jmp_tgt
: 1;
628 uint32_t opc_cat
: 3;
631 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
632 * if so, how to get the (base, index) pair for both sampler and texture.
633 * There is a single base embedded in the instruction, which is always used
637 /* Use traditional GL binding model, get texture and sampler index
638 * from src3 which is not presumed to be uniform. This is
639 * backwards-compatible with earlier generations, where this field was
640 * always 0 and nonuniform-indexed sampling always worked.
644 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
645 * and texture index come from src3 which is presumed to be uniform.
647 CAT5_BINDLESS_A1_UNIFORM
= 1,
649 /* The texture and sampler share the same base, and the sampler and
650 * texture index come from src3 which is *not* presumed to be uniform.
652 CAT5_BINDLESS_NONUNIFORM
= 2,
654 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
655 * and texture index come from src3 which is *not* presumed to be
658 CAT5_BINDLESS_A1_NONUNIFORM
= 3,
660 /* Use traditional GL binding model, get texture and sampler index
661 * from src3 which is presumed to be uniform.
665 /* The texture and sampler share the same base, and the sampler and
666 * texture index come from src3 which is presumed to be uniform.
668 CAT5_BINDLESS_UNIFORM
= 5,
670 /* The texture and sampler share the same base, get sampler index from low
671 * 4 bits of src3 and texture index from high 4 bits.
673 CAT5_BINDLESS_IMM
= 6,
675 /* The sampler base comes from the low 3 bits of a1.x, and the texture
676 * index comes from the next 8 bits of a1.x. The sampler index is an
679 CAT5_BINDLESS_A1_IMM
= 7,
682 typedef struct PACKED
{
687 uint32_t full
: 1; /* not half */
690 uint32_t dummy1
: 4; /* seem to be ignored */
696 uint32_t full
: 1; /* not half */
700 uint32_t base_hi
: 2;
702 uint32_t desc_mode
: 3;
704 /* same in either case: */
705 // XXX I think, confirm this
707 uint32_t full
: 1; /* not half */
716 uint32_t wrmask
: 4; /* write-mask */
718 uint32_t base_lo
: 1; /* used with bindless */
723 uint32_t is_s2en_bindless
: 1;
728 uint32_t jmp_tgt
: 1;
730 uint32_t opc_cat
: 3;
733 /* dword0 encoding for src_off: [src1 + off], src2: */
734 typedef struct PACKED
{
736 uint32_t mustbe1
: 1;
739 uint32_t src1_im
: 1;
740 uint32_t src2_im
: 1;
747 /* dword0 encoding for !src_off: [src1], src2 */
748 typedef struct PACKED
{
750 uint32_t mustbe0
: 1;
753 uint32_t ignore0
: 8;
754 uint32_t src1_im
: 1;
755 uint32_t src2_im
: 1;
762 /* dword1 encoding for dst_off: */
763 typedef struct PACKED
{
765 uint32_t dw0_pad1
: 9;
766 int32_t off_high
: 5;
767 uint32_t dw0_pad2
: 18;
770 uint32_t mustbe1
: 1;
775 /* dword1 encoding for !dst_off: */
776 typedef struct PACKED
{
781 uint32_t mustbe0
: 1;
786 /* ldgb and atomics..
788 * ldgb: pad0=0, pad3=1
789 * atomic .g: pad0=1, pad3=1
792 typedef struct PACKED
{
798 uint32_t type_size
: 2;
800 uint32_t src1_im
: 1;
801 uint32_t src2_im
: 1;
806 uint32_t mustbe0
: 1;
807 uint32_t src_ssbo
: 8;
808 uint32_t pad2
: 3; // type
810 uint32_t src_ssbo_im
: 1;
811 uint32_t pad4
: 10; // opc/jmp_tgt/sync/opc_cat
814 /* stgb, pad0=0, pad3=2
816 typedef struct PACKED
{
818 uint32_t mustbe1
: 1; // ???
822 uint32_t type_size
: 2;
824 uint32_t src2_im
: 1;
829 uint32_t src3_im
: 1;
830 uint32_t dst_ssbo
: 8;
831 uint32_t pad2
: 3; // type
833 uint32_t pad4
: 10; // opc/jmp_tgt/sync/opc_cat
836 typedef union PACKED
{
841 instr_cat6ldgb_t ldgb
;
842 instr_cat6stgb_t stgb
;
845 uint32_t src_off
: 1;
850 uint32_t dst_off
: 1;
853 uint32_t g
: 1; /* or in some cases it means dst immed */
856 uint32_t jmp_tgt
: 1;
858 uint32_t opc_cat
: 3;
862 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
865 /* Use old GL binding model with an immediate index. */
872 /* Use the bindless model, with an immediate index.
874 CAT6_BINDLESS_IMM
= 4,
876 /* Use the bindless model, with a uniform register index.
878 CAT6_BINDLESS_UNIFORM
= 5,
880 /* Use the bindless model, with a register index that isn't guaranteed
881 * to be uniform. This presumably checks if the indices are equal and
882 * splits up the load/store, because it works the way you would
885 CAT6_BINDLESS_NONUNIFORM
= 6,
889 * For atomic ops (which return a value):
891 * pad1=1, pad3=c, pad5=3
892 * src1 - vecN offset/coords
893 * src2.x - is actually dest register
894 * src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
895 * and src2.z is 'data'
897 * For stib (which does not return a value):
898 * pad1=0, pad3=c, pad5=2
899 * src1 - vecN offset/coords
900 * src2 - value to store
903 * pad1=1, pad3=c, pad5=2
904 * src1 - vecN offset/coords
906 * for ldc (load from UBO using descriptor):
907 * pad1=0, pad3=8, pad5=2
909 * pad2 and pad5 are only observed to be 0.
911 typedef struct PACKED
{
916 uint32_t desc_mode
: 3;
919 uint32_t type_size
: 2;
922 uint32_t src1
: 8; /* coordinate/offset */
925 uint32_t src2
: 8; /* or the dst for load instructions */
926 uint32_t pad4
: 1; //mustbe0 ??
927 uint32_t ssbo
: 8; /* ssbo/image binding point */
930 uint32_t jmp_tgt
: 1;
932 uint32_t opc_cat
: 3;
935 typedef struct PACKED
{
941 uint32_t ss
: 1; /* maybe in the encoding, but blob only uses (sy) */
943 uint32_t w
: 1; /* write */
944 uint32_t r
: 1; /* read */
945 uint32_t l
: 1; /* local */
946 uint32_t g
: 1; /* global */
947 uint32_t opc
: 4; /* presumed, but only a couple known OPCs */
948 uint32_t jmp_tgt
: 1; /* (jp) */
949 uint32_t sync
: 1; /* (sy) */
950 uint32_t opc_cat
: 3;
953 typedef union PACKED
{
961 instr_cat6_a6xx_t cat6_a6xx
;
969 uint32_t ss
: 1; /* cat1-cat4 (cat0??) and cat7 (?) */
970 uint32_t ul
: 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
972 uint32_t jmp_tgt
: 1;
974 uint32_t opc_cat
: 3;
979 static inline uint32_t instr_repeat(instr_t
*instr
)
981 switch (instr
->opc_cat
) {
982 case 0: return instr
->cat0
.repeat
;
983 case 1: return instr
->cat1
.repeat
;
984 case 2: return instr
->cat2
.repeat
;
985 case 3: return instr
->cat3
.repeat
;
986 case 4: return instr
->cat4
.repeat
;
991 static inline bool instr_sat(instr_t
*instr
)
993 switch (instr
->opc_cat
) {
994 case 2: return instr
->cat2
.sat
;
995 case 3: return instr
->cat3
.sat
;
996 case 4: return instr
->cat4
.sat
;
997 default: return false;
1001 /* We can probably drop the gpu_id arg, but keeping it for now so we can
1002 * assert if we see something we think should be new encoding on an older
1005 static inline bool is_cat6_legacy(instr_t
*instr
, unsigned gpu_id
)
1007 instr_cat6_a6xx_t
*cat6
= &instr
->cat6_a6xx
;
1009 /* At least one of these two bits is pad in all the possible
1010 * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
1011 * cmdstream traces I have indicates that the pad bit is zero
1012 * in all cases. So we can use this to detect new encoding:
1014 if ((cat6
->pad3
& 0x8) && (cat6
->pad5
& 0x2)) {
1015 ir3_assert(gpu_id
>= 600);
1016 ir3_assert(instr
->cat6
.opc
== 0);
1023 static inline uint32_t instr_opc(instr_t
*instr
, unsigned gpu_id
)
1025 switch (instr
->opc_cat
) {
1026 case 0: return instr
->cat0
.opc
| instr
->cat0
.opc_hi
<< 4;
1028 case 2: return instr
->cat2
.opc
;
1029 case 3: return instr
->cat3
.opc
;
1030 case 4: return instr
->cat4
.opc
;
1031 case 5: return instr
->cat5
.opc
;
1033 if (!is_cat6_legacy(instr
, gpu_id
))
1034 return instr
->cat6_a6xx
.opc
;
1035 return instr
->cat6
.opc
;
1036 case 7: return instr
->cat7
.opc
;
1041 static inline bool is_mad(opc_t opc
)
1056 static inline bool is_madsh(opc_t opc
)
1067 static inline bool is_atomic(opc_t opc
)
1070 case OPC_ATOMIC_ADD
:
1071 case OPC_ATOMIC_SUB
:
1072 case OPC_ATOMIC_XCHG
:
1073 case OPC_ATOMIC_INC
:
1074 case OPC_ATOMIC_DEC
:
1075 case OPC_ATOMIC_CMPXCHG
:
1076 case OPC_ATOMIC_MIN
:
1077 case OPC_ATOMIC_MAX
:
1078 case OPC_ATOMIC_AND
:
1080 case OPC_ATOMIC_XOR
:
1087 static inline bool is_ssbo(opc_t opc
)
1101 static inline bool is_isam(opc_t opc
)
1114 static inline bool is_cat2_float(opc_t opc
)
1137 static inline bool is_cat3_float(opc_t opc
)
1150 #endif /* INSTR_A3XX_H_ */