#include <stdbool.h>
#include <assert.h>
+void ir3_assert_handler(const char *expr, const char *file, int line,
+ const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
+
+/* A wrapper for assert() that allows overriding handling of a failed
+ * assert. This is needed for tools like crashdec which can want to
+ * attempt to disassemble memory that might not actually be valid
+ * instructions.
+ */
+#define ir3_assert(expr) do { \
+ if (!(expr)) { \
+ if (ir3_assert_handler) { \
+ ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
+ } \
+ assert(expr); \
+ } \
+ } while (0)
/* size of largest OPC field of all the instruction categories: */
#define NOPC_BITS 6
typedef enum {
/* category 0: */
OPC_NOP = _OPC(0, 0),
- OPC_BR = _OPC(0, 1),
+ OPC_B = _OPC(0, 1),
OPC_JUMP = _OPC(0, 2),
OPC_CALL = _OPC(0, 3),
OPC_RET = _OPC(0, 4),
OPC_CHSH = _OPC(0, 10),
OPC_FLOW_REV = _OPC(0, 11),
+ OPC_BKT = _OPC(0, 16),
+ OPC_STKS = _OPC(0, 17),
+ OPC_STKR = _OPC(0, 18),
+ OPC_XSET = _OPC(0, 19),
+ OPC_XCLR = _OPC(0, 20),
+ OPC_GETONE = _OPC(0, 21),
+ OPC_DBG = _OPC(0, 22),
+ OPC_SHPS = _OPC(0, 23), /* shader prologue start */
+ OPC_SHPE = _OPC(0, 24), /* shader prologue end */
+
+ OPC_PREDT = _OPC(0, 29), /* predicated true */
+ OPC_PREDF = _OPC(0, 30), /* predicated false */
+ OPC_PREDE = _OPC(0, 31), /* predicated end */
+
/* category 1: */
OPC_MOV = _OPC(1, 0),
OPC_CMPV_U = _OPC(2, 33),
OPC_CMPV_S = _OPC(2, 34),
/* 35-47 - invalid */
- OPC_MUL_U = _OPC(2, 48),
- OPC_MUL_S = _OPC(2, 49),
+ OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
+ OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
OPC_MULL_U = _OPC(2, 50),
OPC_BFREV_B = _OPC(2, 51),
OPC_CLZ_S = _OPC(2, 52),
OPC_SIN = _OPC(4, 4),
OPC_COS = _OPC(4, 5),
OPC_SQRT = _OPC(4, 6),
- // 7-63 - invalid
+ /* NOTE that these are 8+opc from their highp equivs, so it's possible
+ * that the high order bit in the opc field has been repurposed for
+ * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
+ * still use the same opc as highp
+ */
+ OPC_HRSQ = _OPC(4, 9),
+ OPC_HLOG2 = _OPC(4, 10),
+ OPC_HEXP2 = _OPC(4, 11),
/* category 5: */
OPC_ISAM = _OPC(5, 0),
OPC_DSYPP_1 = _OPC(5, 25),
OPC_RGETPOS = _OPC(5, 26),
OPC_RGETINFO = _OPC(5, 27),
+ /* cat5 meta instructions, placed above the cat5 opc field's size */
+ OPC_DSXPP_MACRO = _OPC(5, 32),
+ OPC_DSYPP_MACRO = _OPC(5, 33),
/* category 6: */
OPC_LDG = _OPC(6, 0), /* load-global */
/* meta instructions (category -1): */
/* placeholder instr to mark shader inputs: */
OPC_META_INPUT = _OPC(-1, 0),
- /* The "fan-in" and "fan-out" instructions are used for keeping
+ /* The "collect" and "split" instructions are used for keeping
* track of instructions that write to multiple dst registers
- * (fan-out) like texture sample instructions, or read multiple
- * consecutive scalar registers (fan-in) (bary.f, texture samp)
+ * (split) like texture sample instructions, or read multiple
+ * consecutive scalar registers (collect) (bary.f, texture samp)
+ *
+ * A "split" extracts a scalar component from a vecN, and a
+ * "collect" gathers multiple scalar components into a vecN
*/
- OPC_META_FO = _OPC(-1, 2),
- OPC_META_FI = _OPC(-1, 3),
+ OPC_META_SPLIT = _OPC(-1, 2),
+ OPC_META_COLLECT = _OPC(-1, 3),
+
+ /* placeholder for texture fetches that run before FS invocation
+ * starts:
+ */
+ OPC_META_TEX_PREFETCH = _OPC(-1, 4),
} opc_t;
#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
+const char *disasm_a3xx_instr_name(opc_t opc);
+
typedef enum {
TYPE_F16 = 0,
TYPE_F32 = 1,
case TYPE_S8:
return 8;
default:
- assert(0); /* invalid type */
+ ir3_assert(0); /* invalid type */
return 0;
}
}
uint32_t dummy12 : 12;
uint32_t dummy13 : 13;
uint32_t dummy8 : 8;
+ int32_t idummy13 : 13;
+ int32_t idummy8 : 8;
} reg_t;
+/* comp:
+ * 0 - x
+ * 1 - y
+ * 2 - z
+ * 3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+ return (num << 2) | (comp & 0x3);
+}
+
+#define INVALID_REG regid(63, 0)
+#define VALIDREG(r) ((r) != INVALID_REG)
+#define CONDREG(r, val) COND(VALIDREG(r), (val))
+
/* special registers: */
#define REG_A0 61 /* address register */
#define REG_P0 62 /* predicate register */
return (reg.num == REG_A0) || (reg.num == REG_P0);
}
+typedef enum {
+ BRANCH_PLAIN = 0, /* br */
+ BRANCH_OR = 1, /* brao */
+ BRANCH_AND = 2, /* braa */
+ BRANCH_CONST = 3, /* brac */
+ BRANCH_ANY = 4, /* bany */
+ BRANCH_ALL = 5, /* ball */
+ BRANCH_X = 6, /* brax ??? */
+} brtype_t;
+
typedef struct PACKED {
/* dword0: */
union PACKED {
};
/* dword1: */
- uint32_t dummy2 : 8;
+ uint32_t idx : 5; /* brac.N index */
+ uint32_t brtype : 3; /* branch type, see brtype_t */
uint32_t repeat : 3;
uint32_t dummy3 : 1;
uint32_t ss : 1;
- uint32_t dummy4 : 7;
- uint32_t inv : 1;
- uint32_t comp : 2;
+ uint32_t inv1 : 1;
+ uint32_t comp1 : 2;
+ uint32_t eq : 1;
+ uint32_t opc_hi : 1; /* at least one bit */
+ uint32_t dummy4 : 2;
+ uint32_t inv0 : 1;
+ uint32_t comp0 : 2; /* component for first src */
uint32_t opc : 4;
uint32_t jmp_tgt : 1;
uint32_t sync : 1;
uint32_t opc_cat : 3;
} instr_cat4_t;
+/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
+ * if so, how to get the (base, index) pair for both sampler and texture.
+ * There is a single base embedded in the instruction, which is always used
+ * for the texture.
+ */
+typedef enum {
+ /* Use traditional GL binding model, get texture and sampler index
+ * from src3 which is not presumed to be uniform. This is
+ * backwards-compatible with earlier generations, where this field was
+ * always 0 and nonuniform-indexed sampling always worked.
+ */
+ CAT5_NONUNIFORM = 0,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+ * and texture index come from src3 which is presumed to be uniform.
+ */
+ CAT5_BINDLESS_A1_UNIFORM = 1,
+
+ /* The texture and sampler share the same base, and the sampler and
+ * texture index come from src3 which is *not* presumed to be uniform.
+ */
+ CAT5_BINDLESS_NONUNIFORM = 2,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+ * and texture index come from src3 which is *not* presumed to be
+ * uniform.
+ */
+ CAT5_BINDLESS_A1_NONUNIFORM = 3,
+
+ /* Use traditional GL binding model, get texture and sampler index
+ * from src3 which is presumed to be uniform.
+ */
+ CAT5_UNIFORM = 4,
+
+ /* The texture and sampler share the same base, and the sampler and
+ * texture index come from src3 which is presumed to be uniform.
+ */
+ CAT5_BINDLESS_UNIFORM = 5,
+
+ /* The texture and sampler share the same base, get sampler index from low
+ * 4 bits of src3 and texture index from high 4 bits.
+ */
+ CAT5_BINDLESS_IMM = 6,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the texture
+ * index comes from the next 8 bits of a1.x. The sampler index is an
+ * immediate in src3.
+ */
+ CAT5_BINDLESS_A1_IMM = 7,
+} cat5_desc_mode_t;
+
typedef struct PACKED {
/* dword0: */
union PACKED {
} norm;
/* s2en case: */
struct PACKED {
- uint32_t full : 1; /* not half */
- uint32_t src1 : 8;
- uint32_t src2 : 11;
- uint32_t dummy1 : 1;
- uint32_t src3 : 8;
- uint32_t dummy2 : 3;
- } s2en;
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 8;
+ uint32_t dummy1 : 2;
+ uint32_t base_hi : 2;
+ uint32_t src3 : 8;
+ uint32_t desc_mode : 3;
+ } s2en_bindless;
/* same in either case: */
// XXX I think, confirm this
struct PACKED {
uint32_t full : 1; /* not half */
uint32_t src1 : 8;
- uint32_t pad : 23;
+ uint32_t src2 : 8;
+ uint32_t pad : 15;
};
};
/* dword1: */
- uint32_t dst : 8;
- uint32_t wrmask : 4; /* write-mask */
- uint32_t type : 3;
- uint32_t dummy2 : 1; /* seems to be ignored */
- uint32_t is_3d : 1;
-
- uint32_t is_a : 1;
- uint32_t is_s : 1;
- uint32_t is_s2en : 1;
- uint32_t is_o : 1;
- uint32_t is_p : 1;
-
- uint32_t opc : 5;
- uint32_t jmp_tgt : 1;
- uint32_t sync : 1;
- uint32_t opc_cat : 3;
+ uint32_t dst : 8;
+ uint32_t wrmask : 4; /* write-mask */
+ uint32_t type : 3;
+ uint32_t base_lo : 1; /* used with bindless */
+ uint32_t is_3d : 1;
+
+ uint32_t is_a : 1;
+ uint32_t is_s : 1;
+ uint32_t is_s2en_bindless : 1;
+ uint32_t is_o : 1;
+ uint32_t is_p : 1;
+
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
} instr_cat5_t;
/* dword0 encoding for src_off: [src1 + off], src2: */
typedef struct PACKED {
/* dword0: */
uint32_t mustbe0 : 1;
- uint32_t src1 : 13;
+ uint32_t src1 : 8;
+ uint32_t pad : 5;
uint32_t ignore0 : 8;
uint32_t src1_im : 1;
uint32_t src2_im : 1;
/* dword1 encoding for dst_off: */
typedef struct PACKED {
/* dword0: */
- uint32_t dword0;
+ uint32_t dw0_pad1 : 9;
+ int32_t off_high : 5;
+ uint32_t dw0_pad2 : 18;
- /* note: there is some weird stuff going on where sometimes
- * cat6->a.off is involved.. but that seems like a bug in
- * the blob, since it is used even if !cat6->src_off
- * It would make sense for there to be some more bits to
- * bring us to 11 bits worth of offset, but not sure..
- */
- int32_t off : 8;
+ uint32_t off : 8;
uint32_t mustbe1 : 1;
uint32_t dst : 8;
uint32_t pad1 : 15;
uint32_t src_ssbo : 8;
uint32_t pad2 : 3; // type
uint32_t g : 1;
- uint32_t pad3 : 1;
+ uint32_t src_ssbo_im : 1;
uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
} instr_cat6ldgb_t;
};
} instr_cat6_t;
+/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
+ */
+typedef enum {
+ /* Use old GL binding model with an immediate index. */
+ CAT6_IMM = 0,
+
+ CAT6_UNIFORM = 1,
+
+ CAT6_NONUNIFORM = 2,
+
+ /* Use the bindless model, with an immediate index.
+ */
+ CAT6_BINDLESS_IMM = 4,
+
+ /* Use the bindless model, with a uniform register index.
+ */
+ CAT6_BINDLESS_UNIFORM = 5,
+
+ /* Use the bindless model, with a register index that isn't guaranteed
+ * to be uniform. This presumably checks if the indices are equal and
+ * splits up the load/store, because it works the way you would
+ * expect.
+ */
+ CAT6_BINDLESS_NONUNIFORM = 6,
+} cat6_desc_mode_t;
+
/**
* For atomic ops (which return a value):
*
- * pad1=1, pad2=c, pad3=0, pad4=3
+ * pad1=1, pad3=c, pad5=3
* src1 - vecN offset/coords
* src2.x - is actually dest register
* src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
* and src2.z is 'data'
*
* For stib (which does not return a value):
- * pad1=0, pad2=c, pad3=0, pad4=2
+ * pad1=0, pad3=c, pad5=2
* src1 - vecN offset/coords
* src2 - value to store
*
* For ldib:
- * pad1=1, pad2=c, pad3=0, pad4=2
+ * pad1=1, pad3=c, pad5=2
* src1 - vecN offset/coords
*
* for ldc (load from UBO using descriptor):
- * pad1=0, pad2=8, pad3=0, pad4=2
+ * pad1=0, pad3=8, pad5=2
+ *
+ * pad2 and pad5 are only observed to be 0.
*/
typedef struct PACKED {
/* dword0: */
- uint32_t pad1 : 9;
+ uint32_t pad1 : 1;
+ uint32_t base : 3;
+ uint32_t pad2 : 2;
+ uint32_t desc_mode : 3;
uint32_t d : 2;
uint32_t typed : 1;
uint32_t type_size : 2;
uint32_t opc : 5;
- uint32_t pad2 : 5;
+ uint32_t pad3 : 5;
uint32_t src1 : 8; /* coordinate/offset */
/* dword1: */
uint32_t src2 : 8; /* or the dst for load instructions */
- uint32_t pad3 : 1; //mustbe0 ?? or zero means imm vs reg for ssbo??
+ uint32_t pad4 : 1; //mustbe0 ??
uint32_t ssbo : 8; /* ssbo/image binding point */
uint32_t type : 3;
- uint32_t pad4 : 7;
+ uint32_t pad5 : 7;
uint32_t jmp_tgt : 1;
uint32_t sync : 1;
uint32_t opc_cat : 3;
* cmdstream traces I have indicates that the pad bit is zero
* in all cases. So we can use this to detect new encoding:
*/
- if ((cat6->pad2 & 0x8) && (cat6->pad4 & 0x2)) {
- assert(gpu_id >= 600);
- assert(instr->cat6.opc == 0);
+ if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
+ ir3_assert(gpu_id >= 600);
+ ir3_assert(instr->cat6.opc == 0);
return false;
}
static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
{
switch (instr->opc_cat) {
- case 0: return instr->cat0.opc;
+ case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4;
case 1: return 0;
case 2: return instr->cat2.opc;
case 3: return instr->cat3.opc;
}
}
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
+
+static inline bool is_cat2_float(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ADD_F:
+ case OPC_MIN_F:
+ case OPC_MAX_F:
+ case OPC_MUL_F:
+ case OPC_SIGN_F:
+ case OPC_CMPS_F:
+ case OPC_ABSNEG_F:
+ case OPC_CMPV_F:
+ case OPC_FLOOR_F:
+ case OPC_CEIL_F:
+ case OPC_RNDNE_F:
+ case OPC_RNDAZ_F:
+ case OPC_TRUNC_F:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static inline bool is_cat3_float(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ case OPC_SEL_F16:
+ case OPC_SEL_F32:
+ return true;
+ default:
+ return false;
+ }
+}
#endif /* INSTR_A3XX_H_ */