#include <string.h>
#include "bifrost.h"
-#include "bifrost_ops.h"
#include "disassemble.h"
+#include "bi_print.h"
#include "util/macros.h"
// return bits (high, lo]
uint64_t reg_bits;
};
-struct bifrost_regs {
- unsigned uniform_const : 8;
- unsigned reg2 : 6;
- unsigned reg3 : 6;
- unsigned reg0 : 5;
- unsigned reg1 : 6;
- unsigned ctrl : 4;
-};
-
static unsigned get_reg0(struct bifrost_regs regs)
{
if (regs.ctrl == 0)
return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;
}
-enum bifrost_reg_write_unit {
- REG_WRITE_NONE = 0, // don't write
- REG_WRITE_TWO, // write using reg2
- REG_WRITE_THREE, // write using reg3
-};
-
// this represents the decoded version of the ctrl register field.
struct bifrost_reg_ctrl {
bool read_reg0;
};
struct fma_op_info {
+ bool extended;
unsigned op;
char name[30];
enum fma_src_type src_type;
ADD_FADD16,
ADD_FMINMAX16,
ADD_THREE_SRC,
+ ADD_SHIFT,
ADD_FADDMscale,
ADD_FCMP,
ADD_FCMP16,
bool has_data_reg;
};
-struct bifrost_tex_ctrl {
- unsigned sampler_index : 4; // also used to signal indirects
- unsigned tex_index : 7;
- bool no_merge_index : 1; // whether to merge (direct) sampler & texture indices
- bool filter : 1; // use the usual filtering pipeline (0 for texelFetch & textureGather)
- unsigned unk0 : 2;
- bool texel_offset : 1; // *Offset()
- bool is_shadow : 1;
- bool is_array : 1;
- unsigned tex_type : 2; // 2D, 3D, Cube, Buffer
- bool compute_lod : 1; // 0 for *Lod()
- bool not_supply_lod : 1; // 0 for *Lod() or when a bias is applied
- bool calc_gradients : 1; // 0 for *Grad()
- unsigned unk1 : 1;
- unsigned result_type : 4; // integer, unsigned, float TODO: why is this 4 bits?
- unsigned unk2 : 4;
-};
-
-struct bifrost_dual_tex_ctrl {
- unsigned sampler_index0 : 2;
- unsigned unk0 : 2;
- unsigned tex_index0 : 2;
- unsigned sampler_index1 : 2;
- unsigned tex_index1 : 2;
- unsigned unk1 : 22;
-};
-
-enum branch_bit_size {
- BR_SIZE_32 = 0,
- BR_SIZE_16XX = 1,
- BR_SIZE_16YY = 2,
- // For the above combinations of bitsize and location, an extra bit is
- // encoded via comparing the sources. The only possible source of ambiguity
- // would be if the sources were the same, but then the branch condition
- // would be always true or always false anyways, so we can ignore it. But
- // this no longer works when comparing the y component to the x component,
- // since it's valid to compare the y component of a source against its own
- // x component. Instead, the extra bit is encoded via an extra bitsize.
- BR_SIZE_16YX0 = 3,
- BR_SIZE_16YX1 = 4,
- BR_SIZE_32_AND_16X = 5,
- BR_SIZE_32_AND_16Y = 6,
- // Used for comparisons with zero and always-true, see below. I think this
- // only works for integer comparisons.
- BR_SIZE_ZERO = 7,
-};
-
void dump_header(FILE *fp, struct bifrost_header header, bool verbose);
void dump_instr(FILE *fp, const struct bifrost_alu_inst *instr,
struct bifrost_regs next_regs, uint64_t *consts,
void dump_header(FILE *fp, struct bifrost_header header, bool verbose)
{
+ fprintf(fp, "id(%du) ", header.scoreboard_index);
+
if (header.clause_type != 0) {
- fprintf(fp, "id(%du) ", header.scoreboard_index);
+ const char *name = bi_clause_type_name(header.clause_type);
+
+ if (name[0] == '?')
+ fprintf(fp, "unk%u ", header.clause_type);
+ else
+ fprintf(fp, "%s ", name);
}
if (header.scoreboard_deps != 0) {
}
}
-static void dump_output_mod(FILE *fp, unsigned mod)
-{
- switch (mod) {
- case 0:
- break;
- case 1:
- fprintf(fp, ".clamp_0_inf");
- break; // max(out, 0)
- case 2:
- fprintf(fp, ".clamp_m1_1");
- break; // clamp(out, -1, 1)
- case 3:
- fprintf(fp, ".clamp_0_1");
- break; // clamp(out, 0, 1)
- default:
- break;
- }
-}
-
-static void dump_minmax_mode(FILE *fp, unsigned mod)
-{
- switch (mod) {
- case 0:
- /* Same as fmax() and fmin() -- return the other number if any
- * number is NaN. Also always return +0 if one argument is +0 and
- * the other is -0.
- */
- break;
- case 1:
- /* Instead of never returning a NaN, always return one. The
- * "greater"/"lesser" NaN is always returned, first by checking the
- * sign and then the mantissa bits.
- */
- fprintf(fp, ".nan_wins");
- break;
- case 2:
- /* For max, implement src0 > src1 ? src0 : src1
- * For min, implement src0 < src1 ? src0 : src1
- *
- * This includes handling NaN's and signedness of 0 differently
- * from above, since +0 and -0 compare equal and comparisons always
- * return false for NaN's. As a result, this mode is *not*
- * commutative.
- */
- fprintf(fp, ".src1_wins");
- break;
- case 3:
- /* For max, implement src0 < src1 ? src1 : src0
- * For min, implement src0 > src1 ? src1 : src0
- */
- fprintf(fp, ".src0_wins");
- break;
- default:
- break;
- }
-}
-
-static void dump_round_mode(FILE *fp, unsigned mod)
-{
- switch (mod) {
- case 0:
- /* roundTiesToEven, the IEEE default. */
- break;
- case 1:
- /* roundTowardPositive in the IEEE spec. */
- fprintf(fp, ".round_pos");
- break;
- case 2:
- /* roundTowardNegative in the IEEE spec. */
- fprintf(fp, ".round_neg");
- break;
- case 3:
- /* roundTowardZero in the IEEE spec. */
- fprintf(fp, ".round_zero");
- break;
- default:
- break;
- }
-}
-
-static const char *
-csel_cond_name(enum bifrost_csel_cond cond)
-{
- switch (cond) {
- case BIFROST_FEQ_F: return "feq.f";
- case BIFROST_FGT_F: return "fgt.f";
- case BIFROST_FGE_F: return "fge.f";
- case BIFROST_IEQ_F: return "ieq.f";
- case BIFROST_IGT_I: return "igt.i";
- case BIFROST_IGE_I: return "uge.i";
- case BIFROST_UGT_I: return "ugt.i";
- case BIFROST_UGE_I: return "uge.i";
- default: return "invalid";
- }
-}
-
static const struct fma_op_info FMAOpInfos[] = {
- { 0x00000, "FMA.f32", FMA_FMA },
- { 0x40000, "MAX.f32", FMA_FMINMAX },
- { 0x44000, "MIN.f32", FMA_FMINMAX },
- { 0x48000, "FCMP.GL", FMA_FCMP },
- { 0x4c000, "FCMP.D3D", FMA_FCMP },
- { 0x4ff98, "ADD.i32", FMA_TWO_SRC },
- { 0x4ffd8, "SUB.i32", FMA_TWO_SRC },
- { 0x4fff0, "SUBB.i32", FMA_TWO_SRC },
- { 0x50000, "FMA_MSCALE", FMA_FMA_MSCALE },
- { 0x58000, "ADD.f32", FMA_FADD },
- { 0x5c000, "CSEL4", FMA_CSEL4 },
- { 0x5d8d0, "ICMP.D3D.GT.v2i16", FMA_TWO_SRC },
- { 0x5d9d0, "UCMP.D3D.GT.v2i16", FMA_TWO_SRC },
- { 0x5dad0, "ICMP.D3D.GE.v2i16", FMA_TWO_SRC },
- { 0x5dbd0, "UCMP.D3D.GE.v2i16", FMA_TWO_SRC },
- { 0x5dcd0, "ICMP.D3D.EQ.v2i16", FMA_TWO_SRC },
- { 0x5de40, "ICMP.GL.GT.i32", FMA_TWO_SRC }, // src0 > src1 ? 1 : 0
- { 0x5de48, "ICMP.GL.GE.i32", FMA_TWO_SRC },
- { 0x5de50, "UCMP.GL.GT.i32", FMA_TWO_SRC },
- { 0x5de58, "UCMP.GL.GE.i32", FMA_TWO_SRC },
- { 0x5de60, "ICMP.GL.EQ.i32", FMA_TWO_SRC },
- { 0x5dec0, "ICMP.D3D.GT.i32", FMA_TWO_SRC }, // src0 > src1 ? ~0 : 0
- { 0x5dec8, "ICMP.D3D.GE.i32", FMA_TWO_SRC },
- { 0x5ded0, "UCMP.D3D.GT.i32", FMA_TWO_SRC },
- { 0x5ded8, "UCMP.D3D.GE.i32", FMA_TWO_SRC },
- { 0x5dee0, "ICMP.D3D.EQ.i32", FMA_TWO_SRC },
- { 0x60000, "RSHIFT_NAND", FMA_SHIFT },
- { 0x61000, "RSHIFT_AND", FMA_SHIFT },
- { 0x62000, "LSHIFT_NAND", FMA_SHIFT },
- { 0x63000, "LSHIFT_AND", FMA_SHIFT }, // (src0 << src2) & src1
- { 0x64000, "RSHIFT_XOR", FMA_SHIFT },
- { 0x65200, "LSHIFT_ADD.i32", FMA_THREE_SRC },
- { 0x65600, "LSHIFT_SUB.i32", FMA_THREE_SRC }, // (src0 << src2) - src1
- { 0x65a00, "LSHIFT_RSUB.i32", FMA_THREE_SRC }, // src1 - (src0 << src2)
- { 0x65e00, "RSHIFT_ADD.i32", FMA_THREE_SRC },
- { 0x66200, "RSHIFT_SUB.i32", FMA_THREE_SRC },
- { 0x66600, "RSHIFT_RSUB.i32", FMA_THREE_SRC },
- { 0x66a00, "ARSHIFT_ADD.i32", FMA_THREE_SRC },
- { 0x66e00, "ARSHIFT_SUB.i32", FMA_THREE_SRC },
- { 0x67200, "ARSHIFT_RSUB.i32", FMA_THREE_SRC },
- { 0x80000, "FMA.v2f16", FMA_FMA16 },
- { 0xc0000, "MAX.v2f16", FMA_FMINMAX16 },
- { 0xc4000, "MIN.v2f16", FMA_FMINMAX16 },
- { 0xc8000, "FCMP.GL", FMA_FCMP16 },
- { 0xcc000, "FCMP.D3D", FMA_FCMP16 },
- { 0xcf900, "ADD.v2i16", FMA_TWO_SRC },
- { 0xcfc10, "ADDC.i32", FMA_TWO_SRC },
- { 0xcfd80, "ADD.i32.i16.X", FMA_TWO_SRC },
- { 0xcfd90, "ADD.i32.u16.X", FMA_TWO_SRC },
- { 0xcfdc0, "ADD.i32.i16.Y", FMA_TWO_SRC },
- { 0xcfdd0, "ADD.i32.u16.Y", FMA_TWO_SRC },
- { 0xd8000, "ADD.v2f16", FMA_FADD16 },
- { 0xdc000, "CSEL4.v16", FMA_CSEL4 },
- { 0xdd000, "F32_TO_F16", FMA_TWO_SRC },
- { 0xe0046, "F16_TO_I16.XX", FMA_ONE_SRC },
- { 0xe0047, "F16_TO_U16.XX", FMA_ONE_SRC },
- { 0xe004e, "F16_TO_I16.YX", FMA_ONE_SRC },
- { 0xe004f, "F16_TO_U16.YX", FMA_ONE_SRC },
- { 0xe0056, "F16_TO_I16.XY", FMA_ONE_SRC },
- { 0xe0057, "F16_TO_U16.XY", FMA_ONE_SRC },
- { 0xe005e, "F16_TO_I16.YY", FMA_ONE_SRC },
- { 0xe005f, "F16_TO_U16.YY", FMA_ONE_SRC },
- { 0xe00c0, "I16_TO_F16.XX", FMA_ONE_SRC },
- { 0xe00c1, "U16_TO_F16.XX", FMA_ONE_SRC },
- { 0xe00c8, "I16_TO_F16.YX", FMA_ONE_SRC },
- { 0xe00c9, "U16_TO_F16.YX", FMA_ONE_SRC },
- { 0xe00d0, "I16_TO_F16.XY", FMA_ONE_SRC },
- { 0xe00d1, "U16_TO_F16.XY", FMA_ONE_SRC },
- { 0xe00d8, "I16_TO_F16.YY", FMA_ONE_SRC },
- { 0xe00d9, "U16_TO_F16.YY", FMA_ONE_SRC },
- { 0xe0136, "F32_TO_I32", FMA_ONE_SRC },
- { 0xe0137, "F32_TO_U32", FMA_ONE_SRC },
- { 0xe0178, "I32_TO_F32", FMA_ONE_SRC },
- { 0xe0179, "U32_TO_F32", FMA_ONE_SRC },
- { 0xe0198, "I16_TO_I32.X", FMA_ONE_SRC },
- { 0xe0199, "U16_TO_U32.X", FMA_ONE_SRC },
- { 0xe019a, "I16_TO_I32.Y", FMA_ONE_SRC },
- { 0xe019b, "U16_TO_U32.Y", FMA_ONE_SRC },
- { 0xe019c, "I16_TO_F32.X", FMA_ONE_SRC },
- { 0xe019d, "U16_TO_F32.X", FMA_ONE_SRC },
- { 0xe019e, "I16_TO_F32.Y", FMA_ONE_SRC },
- { 0xe019f, "U16_TO_F32.Y", FMA_ONE_SRC },
- { 0xe01a2, "F16_TO_F32.X", FMA_ONE_SRC },
- { 0xe01a3, "F16_TO_F32.Y", FMA_ONE_SRC },
- { 0xe032c, "NOP", FMA_ONE_SRC },
- { 0xe032d, "MOV", FMA_ONE_SRC },
- { 0xe032f, "SWZ.YY.v2i16", FMA_ONE_SRC },
- { 0xe0345, "LOG_FREXPM", FMA_ONE_SRC },
- { 0xe0365, "FRCP_FREXPM", FMA_ONE_SRC },
- { 0xe0375, "FSQRT_FREXPM", FMA_ONE_SRC },
- { 0xe038d, "FRCP_FREXPE", FMA_ONE_SRC },
- { 0xe03a5, "FSQRT_FREXPE", FMA_ONE_SRC },
- { 0xe03ad, "FRSQ_FREXPE", FMA_ONE_SRC },
- { 0xe03c5, "LOG_FREXPE", FMA_ONE_SRC },
- { 0xe03fa, "CLZ", FMA_ONE_SRC },
- { 0xe0b80, "IMAX3", FMA_THREE_SRC },
- { 0xe0bc0, "UMAX3", FMA_THREE_SRC },
- { 0xe0c00, "IMIN3", FMA_THREE_SRC },
- { 0xe0c40, "UMIN3", FMA_THREE_SRC },
- { 0xe0ec5, "ROUND", FMA_ONE_SRC },
- { 0xe0f40, "CSEL", FMA_THREE_SRC }, // src2 != 0 ? src1 : src0
- { 0xe0fc0, "MUX.i32", FMA_THREE_SRC }, // see ADD comment
- { 0xe1805, "ROUNDEVEN", FMA_ONE_SRC },
- { 0xe1845, "CEIL", FMA_ONE_SRC },
- { 0xe1885, "FLOOR", FMA_ONE_SRC },
- { 0xe18c5, "TRUNC", FMA_ONE_SRC },
- { 0xe19b0, "ATAN_LDEXP.Y.f32", FMA_TWO_SRC },
- { 0xe19b8, "ATAN_LDEXP.X.f32", FMA_TWO_SRC },
- { 0xe1c80, "LSHIFT_ADD_LOW32.u32", FMA_SHIFT_ADD64 },
- { 0xe1cc0, "LSHIFT_ADD_LOW32.i64", FMA_SHIFT_ADD64 },
- { 0xe1d80, "LSHIFT_ADD_LOW32.i32", FMA_SHIFT_ADD64 },
- { 0xe1e00, "SEL.XX.i16", FMA_TWO_SRC },
- { 0xe1e08, "SEL.YX.i16", FMA_TWO_SRC },
- { 0xe1e10, "SEL.XY.i16", FMA_TWO_SRC },
- { 0xe1e18, "SEL.YY.i16", FMA_TWO_SRC },
- { 0xe7800, "IMAD", FMA_THREE_SRC },
- { 0xe78db, "POPCNT", FMA_ONE_SRC },
+ { false, 0x00000, "FMA.f32", FMA_FMA },
+ { false, 0x40000, "MAX.f32", FMA_FMINMAX },
+ { false, 0x44000, "MIN.f32", FMA_FMINMAX },
+ { false, 0x48000, "FCMP.GL", FMA_FCMP },
+ { false, 0x4c000, "FCMP.D3D", FMA_FCMP },
+ { false, 0x4ff98, "ADD.i32", FMA_TWO_SRC },
+ { false, 0x4ffd8, "SUB.i32", FMA_TWO_SRC },
+ { false, 0x4fff0, "SUBB.i32", FMA_TWO_SRC },
+ { false, 0x50000, "FMA_MSCALE", FMA_FMA_MSCALE },
+ { false, 0x58000, "ADD.f32", FMA_FADD },
+ { false, 0x5c000, "CSEL4", FMA_CSEL4 },
+ { false, 0x5d8d0, "ICMP.D3D.GT.v2i16", FMA_TWO_SRC },
+ { false, 0x5d9d0, "UCMP.D3D.GT.v2i16", FMA_TWO_SRC },
+ { false, 0x5dad0, "ICMP.D3D.GE.v2i16", FMA_TWO_SRC },
+ { false, 0x5dbd0, "UCMP.D3D.GE.v2i16", FMA_TWO_SRC },
+ { false, 0x5dcd0, "ICMP.D3D.EQ.v2i16", FMA_TWO_SRC },
+ { false, 0x5de40, "ICMP.GL.GT.i32", FMA_TWO_SRC }, // src0 > src1 ? 1 : 0
+ { false, 0x5de48, "ICMP.GL.GE.i32", FMA_TWO_SRC },
+ { false, 0x5de50, "UCMP.GL.GT.i32", FMA_TWO_SRC },
+ { false, 0x5de58, "UCMP.GL.GE.i32", FMA_TWO_SRC },
+ { false, 0x5de60, "ICMP.GL.EQ.i32", FMA_TWO_SRC },
+ { false, 0x5dec0, "ICMP.D3D.GT.i32", FMA_TWO_SRC }, // src0 > src1 ? ~0 : 0
+ { false, 0x5dec8, "ICMP.D3D.GE.i32", FMA_TWO_SRC },
+ { false, 0x5ded0, "UCMP.D3D.GT.i32", FMA_TWO_SRC },
+ { false, 0x5ded8, "UCMP.D3D.GE.i32", FMA_TWO_SRC },
+ { false, 0x5dee0, "ICMP.D3D.EQ.i32", FMA_TWO_SRC },
+ { false, 0x60000, "RSHIFT_NAND", FMA_SHIFT },
+ { false, 0x61000, "RSHIFT_AND", FMA_SHIFT },
+ { false, 0x62000, "LSHIFT_NAND", FMA_SHIFT },
+ { false, 0x63000, "LSHIFT_AND", FMA_SHIFT }, // (src0 << src2) & src1
+ { false, 0x64000, "RSHIFT_XOR", FMA_SHIFT },
+ { false, 0x65200, "LSHIFT_ADD.i32", FMA_THREE_SRC },
+ { false, 0x65600, "LSHIFT_SUB.i32", FMA_THREE_SRC }, // (src0 << src2) - src1
+ { false, 0x65a00, "LSHIFT_RSUB.i32", FMA_THREE_SRC }, // src1 - (src0 << src2)
+ { false, 0x65e00, "RSHIFT_ADD.i32", FMA_THREE_SRC },
+ { false, 0x66200, "RSHIFT_SUB.i32", FMA_THREE_SRC },
+ { false, 0x66600, "RSHIFT_RSUB.i32", FMA_THREE_SRC },
+ { false, 0x66a00, "ARSHIFT_ADD.i32", FMA_THREE_SRC },
+ { false, 0x66e00, "ARSHIFT_SUB.i32", FMA_THREE_SRC },
+ { false, 0x67200, "ARSHIFT_RSUB.i32", FMA_THREE_SRC },
+ { false, 0x80000, "FMA.v2f16", FMA_FMA16 },
+ { false, 0xc0000, "MAX.v2f16", FMA_FMINMAX16 },
+ { false, 0xc4000, "MIN.v2f16", FMA_FMINMAX16 },
+ { false, 0xc8000, "FCMP.GL", FMA_FCMP16 },
+ { false, 0xcc000, "FCMP.D3D", FMA_FCMP16 },
+ { false, 0xcf900, "ADD.v2i16", FMA_TWO_SRC },
+ { false, 0xcfc10, "ADDC.i32", FMA_TWO_SRC },
+ { false, 0xcfd80, "ADD.i32.i16.X", FMA_TWO_SRC },
+ { false, 0xcfd90, "ADD.i32.u16.X", FMA_TWO_SRC },
+ { false, 0xcfdc0, "ADD.i32.i16.Y", FMA_TWO_SRC },
+ { false, 0xcfdd0, "ADD.i32.u16.Y", FMA_TWO_SRC },
+ { false, 0xd8000, "ADD.v2f16", FMA_FADD16 },
+ { false, 0xdc000, "CSEL4.v16", FMA_CSEL4 },
+ { false, 0xdd000, "F32_TO_F16", FMA_TWO_SRC },
+
+ /* TODO: Combine to bifrost_fma_f2i_i2f16 */
+ { true, 0x00046, "F16_TO_I16.XX", FMA_ONE_SRC },
+ { true, 0x00047, "F16_TO_U16.XX", FMA_ONE_SRC },
+ { true, 0x0004e, "F16_TO_I16.YX", FMA_ONE_SRC },
+ { true, 0x0004f, "F16_TO_U16.YX", FMA_ONE_SRC },
+ { true, 0x00056, "F16_TO_I16.XY", FMA_ONE_SRC },
+ { true, 0x00057, "F16_TO_U16.XY", FMA_ONE_SRC },
+ { true, 0x0005e, "F16_TO_I16.YY", FMA_ONE_SRC },
+ { true, 0x0005f, "F16_TO_U16.YY", FMA_ONE_SRC },
+ { true, 0x000c0, "I16_TO_F16.XX", FMA_ONE_SRC },
+ { true, 0x000c1, "U16_TO_F16.XX", FMA_ONE_SRC },
+ { true, 0x000c8, "I16_TO_F16.YX", FMA_ONE_SRC },
+ { true, 0x000c9, "U16_TO_F16.YX", FMA_ONE_SRC },
+ { true, 0x000d0, "I16_TO_F16.XY", FMA_ONE_SRC },
+ { true, 0x000d1, "U16_TO_F16.XY", FMA_ONE_SRC },
+ { true, 0x000d8, "I16_TO_F16.YY", FMA_ONE_SRC },
+ { true, 0x000d9, "U16_TO_F16.YY", FMA_ONE_SRC },
+
+ { true, 0x00136, "F32_TO_I32", FMA_ONE_SRC },
+ { true, 0x00137, "F32_TO_U32", FMA_ONE_SRC },
+ { true, 0x00178, "I32_TO_F32", FMA_ONE_SRC },
+ { true, 0x00179, "U32_TO_F32", FMA_ONE_SRC },
+
+ /* TODO: cleanup to use bifrost_fma_int16_to_32 */
+ { true, 0x00198, "I16_TO_I32.X", FMA_ONE_SRC },
+ { true, 0x00199, "U16_TO_U32.X", FMA_ONE_SRC },
+ { true, 0x0019a, "I16_TO_I32.Y", FMA_ONE_SRC },
+ { true, 0x0019b, "U16_TO_U32.Y", FMA_ONE_SRC },
+ { true, 0x0019c, "I16_TO_F32.X", FMA_ONE_SRC },
+ { true, 0x0019d, "U16_TO_F32.X", FMA_ONE_SRC },
+ { true, 0x0019e, "I16_TO_F32.Y", FMA_ONE_SRC },
+ { true, 0x0019f, "U16_TO_F32.Y", FMA_ONE_SRC },
+
+ { true, 0x001a2, "F16_TO_F32.X", FMA_ONE_SRC },
+ { true, 0x001a3, "F16_TO_F32.Y", FMA_ONE_SRC },
+
+ { true, 0x0032c, "NOP", FMA_ONE_SRC },
+ { true, 0x0032d, "MOV", FMA_ONE_SRC },
+ { true, 0x0032f, "SWZ.YY.v2i16", FMA_ONE_SRC },
+ { true, 0x00345, "LOG_FREXPM", FMA_ONE_SRC },
+ { true, 0x00365, "FRCP_FREXPM", FMA_ONE_SRC },
+ { true, 0x00375, "FSQRT_FREXPM", FMA_ONE_SRC },
+ { true, 0x0038d, "FRCP_FREXPE", FMA_ONE_SRC },
+ { true, 0x003a5, "FSQRT_FREXPE", FMA_ONE_SRC },
+ { true, 0x003ad, "FRSQ_FREXPE", FMA_ONE_SRC },
+ { true, 0x003c5, "LOG_FREXPE", FMA_ONE_SRC },
+ { true, 0x003fa, "CLZ", FMA_ONE_SRC },
+ { true, 0x00b80, "IMAX3", FMA_THREE_SRC },
+ { true, 0x00bc0, "UMAX3", FMA_THREE_SRC },
+ { true, 0x00c00, "IMIN3", FMA_THREE_SRC },
+ { true, 0x00c40, "UMIN3", FMA_THREE_SRC },
+ { true, 0x00ec2, "ROUND.v2f16", FMA_ONE_SRC },
+ { true, 0x00ec5, "ROUND.f32", FMA_ONE_SRC },
+ { true, 0x00f40, "CSEL", FMA_THREE_SRC }, // src2 != 0 ? src1 : src0
+ { true, 0x00fc0, "MUX.i32", FMA_THREE_SRC }, // see ADD comment
+ { true, 0x01802, "ROUNDEVEN.v2f16", FMA_ONE_SRC },
+ { true, 0x01805, "ROUNDEVEN.f32", FMA_ONE_SRC },
+ { true, 0x01842, "CEIL.v2f16", FMA_ONE_SRC },
+ { true, 0x01845, "CEIL.f32", FMA_ONE_SRC },
+ { true, 0x01882, "FLOOR.v2f16", FMA_ONE_SRC },
+ { true, 0x01885, "FLOOR.f32", FMA_ONE_SRC },
+ { true, 0x018c2, "TRUNC.v2f16", FMA_ONE_SRC },
+ { true, 0x018c5, "TRUNC.f32", FMA_ONE_SRC },
+ { true, 0x019b0, "ATAN_LDEXP.Y.f32", FMA_TWO_SRC },
+ { true, 0x019b8, "ATAN_LDEXP.X.f32", FMA_TWO_SRC },
+ { true, 0x01c80, "LSHIFT_ADD_LOW32.u32", FMA_SHIFT_ADD64 },
+ { true, 0x01cc0, "LSHIFT_ADD_LOW32.i64", FMA_SHIFT_ADD64 },
+ { true, 0x01d80, "LSHIFT_ADD_LOW32.i32", FMA_SHIFT_ADD64 },
+ { true, 0x01e00, "SEL.XX.i16", FMA_TWO_SRC },
+ { true, 0x01e08, "SEL.YX.i16", FMA_TWO_SRC },
+ { true, 0x01e10, "SEL.XY.i16", FMA_TWO_SRC },
+ { true, 0x01e18, "SEL.YY.i16", FMA_TWO_SRC },
+ { true, 0x01e80, "ADD_FREXPM.f32", FMA_TWO_SRC },
+ { true, 0x00800, "IMAD", FMA_THREE_SRC },
+ { true, 0x078db, "POPCNT", FMA_ONE_SRC },
};
-static struct fma_op_info find_fma_op_info(unsigned op)
+static struct fma_op_info find_fma_op_info(unsigned op, bool extended)
{
for (unsigned i = 0; i < ARRAY_SIZE(FMAOpInfos); i++) {
unsigned opCmp = ~0;
+
+ if (FMAOpInfos[i].extended != extended)
+ continue;
+
+ if (extended)
+ op &= ~0xe0000;
+
switch (FMAOpInfos[i].src_type) {
case FMA_ONE_SRC:
opCmp = op;
}
struct bifrost_fma_inst FMA;
memcpy((char *) &FMA, (char *) &word, sizeof(struct bifrost_fma_inst));
- struct fma_op_info info = find_fma_op_info(FMA.op);
+ struct fma_op_info info = find_fma_op_info(FMA.op, (FMA.op & 0xe0000) == 0xe0000);
fprintf(fp, "%s", info.name);
if (info.src_type == FMA_FADD ||
info.src_type == FMA_FADD16 ||
info.src_type == FMA_FMINMAX16 ||
info.src_type == FMA_FMA16) {
- dump_output_mod(fp, bits(FMA.op, 12, 14));
+ fprintf(fp, "%s", bi_output_mod_name(bits(FMA.op, 12, 14)));
switch (info.src_type) {
case FMA_FADD:
case FMA_FMA:
case FMA_FADD16:
case FMA_FMA16:
- dump_round_mode(fp, bits(FMA.op, 10, 12));
+ fprintf(fp, "%s", bi_round_mode_name(bits(FMA.op, 10, 12)));
break;
case FMA_FMINMAX:
case FMA_FMINMAX16:
- dump_minmax_mode(fp, bits(FMA.op, 10, 12));
+ fprintf(fp, "%s", bi_minmax_mode_name(bits(FMA.op, 10, 12)));
break;
default:
assert(0);
fprintf(fp, ".unk%d_mode", (int) (FMA.op >> 9) & 0x3);
}
} else {
- dump_output_mod(fp, bits(FMA.op, 9, 11));
+ fprintf(fp, "%s", bi_output_mod_name(bits(FMA.op, 9, 11)));
}
} else if (info.src_type == FMA_SHIFT) {
struct bifrost_shift_fma shift;
case FMA_CSEL4: {
struct bifrost_csel4 csel;
memcpy(&csel, &FMA, sizeof(csel));
- fprintf(fp, ".%s ", csel_cond_name(csel.cond));
+ fprintf(fp, ".%s ", bi_csel_cond_name(csel.cond));
dump_src(fp, csel.src0, regs, consts, true);
fprintf(fp, ", ");
{ 0x07ba5, "FSQRT_FREXPE", ADD_ONE_SRC },
{ 0x07bad, "FRSQ_FREXPE", ADD_ONE_SRC },
{ 0x07bc5, "FLOG_FREXPE", ADD_ONE_SRC },
- { 0x07d45, "CEIL", ADD_ONE_SRC },
- { 0x07d85, "FLOOR", ADD_ONE_SRC },
- { 0x07dc5, "TRUNC", ADD_ONE_SRC },
+ { 0x07d42, "CEIL.v2f16", ADD_ONE_SRC },
+ { 0x07d45, "CEIL.f32", ADD_ONE_SRC },
+ { 0x07d82, "FLOOR.v2f16", ADD_ONE_SRC },
+ { 0x07d85, "FLOOR.f32", ADD_ONE_SRC },
+ { 0x07dc2, "TRUNC.v2f16", ADD_ONE_SRC },
+ { 0x07dc5, "TRUNC.f32", ADD_ONE_SRC },
{ 0x07f18, "LSHIFT_ADD_HIGH32.i32", ADD_TWO_SRC },
- { 0x08000, "LD_ATTR.f16", ADD_LOAD_ATTR, true },
- { 0x08100, "LD_ATTR.v2f16", ADD_LOAD_ATTR, true },
- { 0x08200, "LD_ATTR.v3f16", ADD_LOAD_ATTR, true },
- { 0x08300, "LD_ATTR.v4f16", ADD_LOAD_ATTR, true },
- { 0x08400, "LD_ATTR.f32", ADD_LOAD_ATTR, true },
- { 0x08500, "LD_ATTR.v3f32", ADD_LOAD_ATTR, true },
- { 0x08600, "LD_ATTR.v3f32", ADD_LOAD_ATTR, true },
- { 0x08700, "LD_ATTR.v4f32", ADD_LOAD_ATTR, true },
- { 0x08800, "LD_ATTR.i32", ADD_LOAD_ATTR, true },
- { 0x08900, "LD_ATTR.v3i32", ADD_LOAD_ATTR, true },
- { 0x08a00, "LD_ATTR.v3i32", ADD_LOAD_ATTR, true },
- { 0x08b00, "LD_ATTR.v4i32", ADD_LOAD_ATTR, true },
- { 0x08c00, "LD_ATTR.u32", ADD_LOAD_ATTR, true },
- { 0x08d00, "LD_ATTR.v3u32", ADD_LOAD_ATTR, true },
- { 0x08e00, "LD_ATTR.v3u32", ADD_LOAD_ATTR, true },
- { 0x08f00, "LD_ATTR.v4u32", ADD_LOAD_ATTR, true },
+ { 0x08000, "LD_ATTR", ADD_LOAD_ATTR, true },
{ 0x0a000, "LD_VAR.32", ADD_VARYING_INTERP, true },
{ 0x0b000, "TEX", ADD_TEX_COMPACT, true },
{ 0x0c188, "LOAD.i32", ADD_TWO_SRC, true },
{ 0x0cbb8, "ST_SCRATCH.v3i32", ADD_TWO_SRC, true },
{ 0x0cc00, "FRCP_FAST.f32", ADD_ONE_SRC },
{ 0x0cc20, "FRSQ_FAST.f32", ADD_ONE_SRC },
+ { 0x0cc68, "FLOG2_U.f32", ADD_ONE_SRC },
+ { 0x0cd58, "FEXP2_FAST.f32", ADD_ONE_SRC },
{ 0x0ce00, "FRCP_TABLE", ADD_ONE_SRC },
{ 0x0ce10, "FRCP_FAST.f16.X", ADD_ONE_SRC },
{ 0x0ce20, "FRSQ_TABLE", ADD_ONE_SRC },
{ 0x17d90, "ADD.i32.u16.X", ADD_TWO_SRC },
{ 0x17dc0, "ADD.i32.i16.Y", ADD_TWO_SRC },
{ 0x17dd0, "ADD.i32.u16.Y", ADD_TWO_SRC },
- { 0x18000, "LD_VAR_ADDR.f16", ADD_VARYING_ADDRESS, true },
- { 0x18100, "LD_VAR_ADDR.f32", ADD_VARYING_ADDRESS, true },
- { 0x18200, "LD_VAR_ADDR.i32", ADD_VARYING_ADDRESS, true },
- { 0x18300, "LD_VAR_ADDR.u32", ADD_VARYING_ADDRESS, true },
+ { 0x17881, "ADD.i8", ADD_TWO_SRC },
+ { 0x18000, "LD_VAR_ADDR", ADD_VARYING_ADDRESS, true },
{ 0x19181, "DISCARD.FEQ.f32", ADD_TWO_SRC, true },
{ 0x19189, "DISCARD.FNE.f32", ADD_TWO_SRC, true },
{ 0x1918C, "DISCARD.GL.f32", ADD_TWO_SRC, true }, /* Consumes ICMP.GL/etc with fixed 0 argument */
{ 0x1952c, "BLEND", ADD_BLENDING, true },
{ 0x1a000, "LD_VAR.16", ADD_VARYING_INTERP, true },
{ 0x1ae60, "TEX", ADD_TEX, true },
- { 0x1c000, "RSHIFT_NAND.i32", ADD_THREE_SRC },
- { 0x1c300, "RSHIFT_OR.i32", ADD_THREE_SRC },
- { 0x1c400, "RSHIFT_AND.i32", ADD_THREE_SRC },
- { 0x1c700, "RSHIFT_NOR.i32", ADD_THREE_SRC },
- { 0x1c800, "LSHIFT_NAND.i32", ADD_THREE_SRC },
- { 0x1cb00, "LSHIFT_OR.i32", ADD_THREE_SRC },
- { 0x1cc00, "LSHIFT_AND.i32", ADD_THREE_SRC },
- { 0x1cf00, "LSHIFT_NOR.i32", ADD_THREE_SRC },
- { 0x1d000, "RSHIFT_XOR.i32", ADD_THREE_SRC },
- { 0x1d100, "RSHIFT_XNOR.i32", ADD_THREE_SRC },
- { 0x1d200, "LSHIFT_XOR.i32", ADD_THREE_SRC },
- { 0x1d300, "LSHIFT_XNOR.i32", ADD_THREE_SRC },
- { 0x1d400, "LSHIFT_ADD.i32", ADD_THREE_SRC },
- { 0x1d500, "LSHIFT_SUB.i32", ADD_THREE_SRC },
- { 0x1d500, "LSHIFT_RSUB.i32", ADD_THREE_SRC },
- { 0x1d700, "RSHIFT_ADD.i32", ADD_THREE_SRC },
- { 0x1d800, "RSHIFT_SUB.i32", ADD_THREE_SRC },
- { 0x1d900, "RSHIFT_RSUB.i32", ADD_THREE_SRC },
- { 0x1da00, "ARSHIFT_ADD.i32", ADD_THREE_SRC },
- { 0x1db00, "ARSHIFT_SUB.i32", ADD_THREE_SRC },
- { 0x1dc00, "ARSHIFT_RSUB.i32", ADD_THREE_SRC },
+ { 0x1c000, "RSHIFT_NAND.i32", ADD_SHIFT },
+ { 0x1c400, "RSHIFT_AND.i32", ADD_SHIFT },
+ { 0x1c800, "LSHIFT_NAND.i32", ADD_SHIFT },
+ { 0x1cc00, "LSHIFT_AND.i32", ADD_SHIFT },
+ { 0x1d000, "RSHIFT_XOR.i32", ADD_SHIFT },
+ { 0x1d400, "LSHIFT_ADD.i32", ADD_SHIFT },
+ { 0x1d800, "RSHIFT_SUB.i32", ADD_SHIFT },
{ 0x1dd18, "OR.i32", ADD_TWO_SRC },
{ 0x1dd20, "AND.i32", ADD_TWO_SRC },
{ 0x1dd60, "LSHIFT.i32", ADD_TWO_SRC },
case ADD_THREE_SRC:
opCmp = op & ~0x3f;
break;
+ case ADD_SHIFT:
+ opCmp = op & ~0x3ff;
+ break;
case ADD_TEX:
opCmp = op & ~0xf;
break;
opCmp = op & ~0x7ff;
break;
case ADD_VARYING_ADDRESS:
- opCmp = op & ~0xff;
+ opCmp = op & ~0xfff;
break;
case ADD_LOAD_ATTR:
- opCmp = op & ~0x7f;
- break;
case ADD_BRANCH:
opCmp = op & ~0xfff;
break;
// float16 seems like it doesn't support output modifiers
if (info.src_type == ADD_FADD || info.src_type == ADD_FMINMAX) {
// output modifiers
- dump_output_mod(fp, bits(ADD.op, 8, 10));
+ fprintf(fp, "%s", bi_output_mod_name(bits(ADD.op, 8, 10)));
if (info.src_type == ADD_FADD)
- dump_round_mode(fp, bits(ADD.op, 10, 12));
+ fprintf(fp, "%s", bi_round_mode_name(bits(ADD.op, 10, 12)));
else
- dump_minmax_mode(fp, bits(ADD.op, 10, 12));
+ fprintf(fp, "%s", bi_minmax_mode_name(bits(ADD.op, 10, 12)));
} else if (info.src_type == ADD_FCMP || info.src_type == ADD_FCMP16) {
dump_fcmp(fp, bits(ADD.op, 3, 6));
if (info.src_type == ADD_FCMP)
fprintf(fp, ".reuse");
if (ADD.op & 0x400)
fprintf(fp, ".flat");
- switch ((ADD.op >> 7) & 0x3) {
- case 0:
- fprintf(fp, ".per_frag");
- break;
- case 1:
- fprintf(fp, ".centroid");
- break;
- case 2:
- break;
- case 3:
- fprintf(fp, ".explicit");
- break;
- }
+ fprintf(fp, "%s", bi_interp_mode_name((ADD.op >> 7) & 0x3));
fprintf(fp, ".v%d", ((ADD.op >> 5) & 0x3) + 1);
} else if (info.src_type == ADD_BRANCH) {
- enum branch_code branchCode = (enum branch_code) ((ADD.op >> 6) & 0x3f);
+ enum bifrost_branch_code branchCode = (enum bifrost_branch_code) ((ADD.op >> 6) & 0x3f);
if (branchCode == BR_ALWAYS) {
// unconditional branch
} else {
- enum branch_cond cond = (enum branch_cond) ((ADD.op >> 6) & 0x7);
+ enum bifrost_branch_cond cond = (enum bifrost_branch_cond) ((ADD.op >> 6) & 0x7);
enum branch_bit_size size = (enum branch_bit_size) ((ADD.op >> 9) & 0x7);
bool portSwapped = (ADD.op & 0x7) < ADD.src0;
// See the comment in branch_bit_size
}
}
}
+ } else if (info.src_type == ADD_SHIFT) {
+ struct bifrost_shift_add shift;
+ memcpy(&shift, &ADD, sizeof(ADD));
+
+ if (shift.invert_1)
+ fprintf(fp, ".invert_1");
+
+ if (shift.invert_2)
+ fprintf(fp, ".invert_2");
+
+ if (shift.zero)
+ fprintf(fp, ".unk%u", shift.zero);
+ } else if (info.src_type == ADD_VARYING_ADDRESS) {
+ struct bifrost_ld_var_addr ld;
+ memcpy(&ld, &ADD, sizeof(ADD));
+ fprintf(fp, ".%s", bi_ldst_type_name(ld.type));
+ } else if (info.src_type == ADD_LOAD_ATTR) {
+ struct bifrost_ld_attr ld;
+ memcpy(&ld, &ADD, sizeof(ADD));
+
+ if (ld.channels)
+ fprintf(fp, ".v%d%s", ld.channels + 1, bi_ldst_type_name(ld.type));
+ else
+ fprintf(fp, ".%s", bi_ldst_type_name(ld.type));
}
+
fprintf(fp, " ");
struct bifrost_reg_ctrl next_ctrl = DecodeRegCtrl(fp, next_regs);
int tex_index;
int sampler_index;
bool dualTex = false;
+
+ fprintf(fp, "coords <");
+ dump_src(fp, ADD.src0, regs, consts, false);
+ fprintf(fp, ", ");
+ dump_src(fp, ADD.op & 0x7, regs, consts, false);
+ fprintf(fp, ">, ");
+
if (info.src_type == ADD_TEX_COMPACT) {
tex_index = (ADD.op >> 3) & 0x7;
sampler_index = (ADD.op >> 7) & 0x7;
struct bifrost_tex_ctrl ctrl;
memcpy((char *) &ctrl, (char *) &controlBits, sizeof(ctrl));
- // TODO: figure out what actually triggers dual-tex
+ /* Dual-tex triggered for adjacent texturing
+ * instructions with the same coordinates to different
+ * textures/samplers. Observed for the compact
+ * (2D/normal) case. */
+
if (ctrl.result_type == 9) {
struct bifrost_dual_tex_ctrl dualCtrl;
memcpy((char *) &dualCtrl, (char *) &controlBits, sizeof(ctrl));
break;
}
case ADD_LOAD_ATTR:
- fprintf(fp, "location:%d, ", (ADD.op >> 3) & 0xf);
+ fprintf(fp, "location:%d, ", (ADD.op >> 3) & 0x1f);
case ADD_TWO_SRC:
dump_src(fp, ADD.src0, regs, consts, false);
fprintf(fp, ", ");
fprintf(fp, ", ");
dump_src(fp, (ADD.op >> 3) & 0x7, regs, consts, false);
break;
+ case ADD_SHIFT: {
+ struct bifrost_shift_add shift;
+ memcpy(&shift, &ADD, sizeof(ADD));
+ dump_src(fp, shift.src0, regs, consts, false);
+ fprintf(fp, ", ");
+ dump_src(fp, shift.src1, regs, consts, false);
+ fprintf(fp, ", ");
+ dump_src(fp, shift.src2, regs, consts, false);
+ break;
+ }
case ADD_FADD:
case ADD_FMINMAX:
if (ADD.op & 0x10)
dump_16swizzle(fp, (ADD.op >> 8) & 0x3);
if (abs1 && abs2)
fprintf(fp, ")");
+ fprintf(fp, "/* %X */\n", (ADD.op >> 10) & 0x3); /* mode */
break;
}
case ADD_FADDMscale: {
dump_16swizzle(fp, (ADD.op >> 8) & 0x3);
break;
case ADD_BRANCH: {
- enum branch_code code = (enum branch_code) ((ADD.op >> 6) & 0x3f);
+ enum bifrost_branch_code code = (enum bifrost_branch_code) ((ADD.op >> 6) & 0x3f);
enum branch_bit_size size = (enum branch_bit_size) ((ADD.op >> 9) & 0x7);
if (code != BR_ALWAYS) {
dump_src(fp, ADD.src0, regs, consts, false);