#include "bifrost.h"
#include "disassemble.h"
+#include "bi_print.h"
#include "util/macros.h"
// return bits (high, lo]
void dump_header(FILE *fp, struct bifrost_header header, bool verbose)
{
+ fprintf(fp, "id(%du) ", header.scoreboard_index);
+
if (header.clause_type != 0) {
- fprintf(fp, "id(%du) ", header.scoreboard_index);
+ const char *name = bi_clause_type_name(header.clause_type);
+
+ if (name[0] == '?')
+ fprintf(fp, "unk%u ", header.clause_type);
+ else
+ fprintf(fp, "%s ", name);
}
if (header.scoreboard_deps != 0) {
}
}
-static void dump_output_mod(FILE *fp, unsigned mod)
-{
- switch (mod) {
- case BIFROST_NONE:
- break;
- case BIFROST_POS:
- fprintf(fp, ".clamp_0_inf");
- break; // max(out, 0)
- case BIFROST_SAT_SIGNED:
- fprintf(fp, ".clamp_m1_1");
- break; // clamp(out, -1, 1)
- case BIFROST_SAT:
- fprintf(fp, ".clamp_0_1");
- break; // clamp(out, 0, 1)
- default:
- break;
- }
-}
-
-static void dump_minmax_mode(FILE *fp, unsigned mod)
-{
- switch (mod) {
- case 0:
- /* Same as fmax() and fmin() -- return the other number if any
- * number is NaN. Also always return +0 if one argument is +0 and
- * the other is -0.
- */
- break;
- case 1:
- /* Instead of never returning a NaN, always return one. The
- * "greater"/"lesser" NaN is always returned, first by checking the
- * sign and then the mantissa bits.
- */
- fprintf(fp, ".nan_wins");
- break;
- case 2:
- /* For max, implement src0 > src1 ? src0 : src1
- * For min, implement src0 < src1 ? src0 : src1
- *
- * This includes handling NaN's and signedness of 0 differently
- * from above, since +0 and -0 compare equal and comparisons always
- * return false for NaN's. As a result, this mode is *not*
- * commutative.
- */
- fprintf(fp, ".src1_wins");
- break;
- case 3:
- /* For max, implement src0 < src1 ? src1 : src0
- * For min, implement src0 > src1 ? src1 : src0
- */
- fprintf(fp, ".src0_wins");
- break;
- default:
- break;
- }
-}
-
-static void dump_round_mode(FILE *fp, unsigned mod)
-{
- switch (mod) {
- case BIFROST_RTE:
- /* roundTiesToEven, the IEEE default. */
- break;
- case BIFROST_RTP:
- /* roundTowardPositive in the IEEE spec. */
- fprintf(fp, ".round_pos");
- break;
- case BIFROST_RTN:
- /* roundTowardNegative in the IEEE spec. */
- fprintf(fp, ".round_neg");
- break;
- case BIFROST_RTZ:
- /* roundTowardZero in the IEEE spec. */
- fprintf(fp, ".round_zero");
- break;
- default:
- break;
- }
-}
-
-static const char *
-csel_cond_name(enum bifrost_csel_cond cond)
-{
- switch (cond) {
- case BIFROST_FEQ_F: return "feq.f";
- case BIFROST_FGT_F: return "fgt.f";
- case BIFROST_FGE_F: return "fge.f";
- case BIFROST_IEQ_F: return "ieq.f";
- case BIFROST_IGT_I: return "igt.i";
- case BIFROST_IGE_I: return "uge.i";
- case BIFROST_UGT_I: return "ugt.i";
- case BIFROST_UGE_I: return "uge.i";
- default: return "invalid";
- }
-}
-
static const struct fma_op_info FMAOpInfos[] = {
{ false, 0x00000, "FMA.f32", FMA_FMA },
{ false, 0x40000, "MAX.f32", FMA_FMINMAX },
{ false, 0xd8000, "ADD.v2f16", FMA_FADD16 },
{ false, 0xdc000, "CSEL4.v16", FMA_CSEL4 },
{ false, 0xdd000, "F32_TO_F16", FMA_TWO_SRC },
+
+ /* TODO: Combine to bifrost_fma_f2i_i2f16 */
{ true, 0x00046, "F16_TO_I16.XX", FMA_ONE_SRC },
{ true, 0x00047, "F16_TO_U16.XX", FMA_ONE_SRC },
{ true, 0x0004e, "F16_TO_I16.YX", FMA_ONE_SRC },
{ true, 0x000d1, "U16_TO_F16.XY", FMA_ONE_SRC },
{ true, 0x000d8, "I16_TO_F16.YY", FMA_ONE_SRC },
{ true, 0x000d9, "U16_TO_F16.YY", FMA_ONE_SRC },
+
{ true, 0x00136, "F32_TO_I32", FMA_ONE_SRC },
{ true, 0x00137, "F32_TO_U32", FMA_ONE_SRC },
{ true, 0x00178, "I32_TO_F32", FMA_ONE_SRC },
{ true, 0x00179, "U32_TO_F32", FMA_ONE_SRC },
+
+ /* TODO: cleanup to use bifrost_fma_int16_to_32 */
{ true, 0x00198, "I16_TO_I32.X", FMA_ONE_SRC },
{ true, 0x00199, "U16_TO_U32.X", FMA_ONE_SRC },
{ true, 0x0019a, "I16_TO_I32.Y", FMA_ONE_SRC },
{ true, 0x0019d, "U16_TO_F32.X", FMA_ONE_SRC },
{ true, 0x0019e, "I16_TO_F32.Y", FMA_ONE_SRC },
{ true, 0x0019f, "U16_TO_F32.Y", FMA_ONE_SRC },
+
{ true, 0x001a2, "F16_TO_F32.X", FMA_ONE_SRC },
{ true, 0x001a3, "F16_TO_F32.Y", FMA_ONE_SRC },
+
{ true, 0x0032c, "NOP", FMA_ONE_SRC },
{ true, 0x0032d, "MOV", FMA_ONE_SRC },
{ true, 0x0032f, "SWZ.YY.v2i16", FMA_ONE_SRC },
{ true, 0x00bc0, "UMAX3", FMA_THREE_SRC },
{ true, 0x00c00, "IMIN3", FMA_THREE_SRC },
{ true, 0x00c40, "UMIN3", FMA_THREE_SRC },
- { true, 0x00ec5, "ROUND", FMA_ONE_SRC },
+ { true, 0x00ec2, "ROUND.v2f16", FMA_ONE_SRC },
+ { true, 0x00ec5, "ROUND.f32", FMA_ONE_SRC },
{ true, 0x00f40, "CSEL", FMA_THREE_SRC }, // src2 != 0 ? src1 : src0
{ true, 0x00fc0, "MUX.i32", FMA_THREE_SRC }, // see ADD comment
- { true, 0x01805, "ROUNDEVEN", FMA_ONE_SRC },
- { true, 0x01845, "CEIL", FMA_ONE_SRC },
- { true, 0x01885, "FLOOR", FMA_ONE_SRC },
- { true, 0x018c5, "TRUNC", FMA_ONE_SRC },
+ { true, 0x01802, "ROUNDEVEN.v2f16", FMA_ONE_SRC },
+ { true, 0x01805, "ROUNDEVEN.f32", FMA_ONE_SRC },
+ { true, 0x01842, "CEIL.v2f16", FMA_ONE_SRC },
+ { true, 0x01845, "CEIL.f32", FMA_ONE_SRC },
+ { true, 0x01882, "FLOOR.v2f16", FMA_ONE_SRC },
+ { true, 0x01885, "FLOOR.f32", FMA_ONE_SRC },
+ { true, 0x018c2, "TRUNC.v2f16", FMA_ONE_SRC },
+ { true, 0x018c5, "TRUNC.f32", FMA_ONE_SRC },
{ true, 0x019b0, "ATAN_LDEXP.Y.f32", FMA_TWO_SRC },
{ true, 0x019b8, "ATAN_LDEXP.X.f32", FMA_TWO_SRC },
{ true, 0x01c80, "LSHIFT_ADD_LOW32.u32", FMA_SHIFT_ADD64 },
{ true, 0x01e08, "SEL.YX.i16", FMA_TWO_SRC },
{ true, 0x01e10, "SEL.XY.i16", FMA_TWO_SRC },
{ true, 0x01e18, "SEL.YY.i16", FMA_TWO_SRC },
+ { true, 0x01e80, "ADD_FREXPM.f32", FMA_TWO_SRC },
{ true, 0x00800, "IMAD", FMA_THREE_SRC },
{ true, 0x078db, "POPCNT", FMA_ONE_SRC },
};
}
}
-static const char *
-bi_ldst_type_name(enum bifrost_ldst_type type)
-{
- switch (type) {
- case BIFROST_LDST_F16: return "f16";
- case BIFROST_LDST_F32: return "f32";
- case BIFROST_LDST_I32: return "i32";
- case BIFROST_LDST_U32: return "u32";
- default: return "invalid";
- }
-}
-
static void dump_fma(FILE *fp, uint64_t word, struct bifrost_regs regs, struct bifrost_regs next_regs, uint64_t *consts, bool verbose)
{
if (verbose) {
info.src_type == FMA_FADD16 ||
info.src_type == FMA_FMINMAX16 ||
info.src_type == FMA_FMA16) {
- dump_output_mod(fp, bits(FMA.op, 12, 14));
+ fprintf(fp, "%s", bi_output_mod_name(bits(FMA.op, 12, 14)));
switch (info.src_type) {
case FMA_FADD:
case FMA_FMA:
case FMA_FADD16:
case FMA_FMA16:
- dump_round_mode(fp, bits(FMA.op, 10, 12));
+ fprintf(fp, "%s", bi_round_mode_name(bits(FMA.op, 10, 12)));
break;
case FMA_FMINMAX:
case FMA_FMINMAX16:
- dump_minmax_mode(fp, bits(FMA.op, 10, 12));
+ fprintf(fp, "%s", bi_minmax_mode_name(bits(FMA.op, 10, 12)));
break;
default:
assert(0);
fprintf(fp, ".unk%d_mode", (int) (FMA.op >> 9) & 0x3);
}
} else {
- dump_output_mod(fp, bits(FMA.op, 9, 11));
+ fprintf(fp, "%s", bi_output_mod_name(bits(FMA.op, 9, 11)));
}
} else if (info.src_type == FMA_SHIFT) {
struct bifrost_shift_fma shift;
case FMA_CSEL4: {
struct bifrost_csel4 csel;
memcpy(&csel, &FMA, sizeof(csel));
- fprintf(fp, ".%s ", csel_cond_name(csel.cond));
+ fprintf(fp, ".%s ", bi_csel_cond_name(csel.cond));
dump_src(fp, csel.src0, regs, consts, true);
fprintf(fp, ", ");
{ 0x07ba5, "FSQRT_FREXPE", ADD_ONE_SRC },
{ 0x07bad, "FRSQ_FREXPE", ADD_ONE_SRC },
{ 0x07bc5, "FLOG_FREXPE", ADD_ONE_SRC },
- { 0x07d45, "CEIL", ADD_ONE_SRC },
- { 0x07d85, "FLOOR", ADD_ONE_SRC },
- { 0x07dc5, "TRUNC", ADD_ONE_SRC },
+ { 0x07d42, "CEIL.v2f16", ADD_ONE_SRC },
+ { 0x07d45, "CEIL.f32", ADD_ONE_SRC },
+ { 0x07d82, "FLOOR.v2f16", ADD_ONE_SRC },
+ { 0x07d85, "FLOOR.f32", ADD_ONE_SRC },
+ { 0x07dc2, "TRUNC.v2f16", ADD_ONE_SRC },
+ { 0x07dc5, "TRUNC.f32", ADD_ONE_SRC },
{ 0x07f18, "LSHIFT_ADD_HIGH32.i32", ADD_TWO_SRC },
{ 0x08000, "LD_ATTR", ADD_LOAD_ATTR, true },
{ 0x0a000, "LD_VAR.32", ADD_VARYING_INTERP, true },
{ 0x0cbb8, "ST_SCRATCH.v3i32", ADD_TWO_SRC, true },
{ 0x0cc00, "FRCP_FAST.f32", ADD_ONE_SRC },
{ 0x0cc20, "FRSQ_FAST.f32", ADD_ONE_SRC },
+ { 0x0cc68, "FLOG2_U.f32", ADD_ONE_SRC },
+ { 0x0cd58, "FEXP2_FAST.f32", ADD_ONE_SRC },
{ 0x0ce00, "FRCP_TABLE", ADD_ONE_SRC },
{ 0x0ce10, "FRCP_FAST.f16.X", ADD_ONE_SRC },
{ 0x0ce20, "FRSQ_TABLE", ADD_ONE_SRC },
{ 0x17d90, "ADD.i32.u16.X", ADD_TWO_SRC },
{ 0x17dc0, "ADD.i32.i16.Y", ADD_TWO_SRC },
{ 0x17dd0, "ADD.i32.u16.Y", ADD_TWO_SRC },
+ { 0x17881, "ADD.i8", ADD_TWO_SRC },
{ 0x18000, "LD_VAR_ADDR", ADD_VARYING_ADDRESS, true },
{ 0x19181, "DISCARD.FEQ.f32", ADD_TWO_SRC, true },
{ 0x19189, "DISCARD.FNE.f32", ADD_TWO_SRC, true },
// float16 seems like it doesn't support output modifiers
if (info.src_type == ADD_FADD || info.src_type == ADD_FMINMAX) {
// output modifiers
- dump_output_mod(fp, bits(ADD.op, 8, 10));
+ fprintf(fp, "%s", bi_output_mod_name(bits(ADD.op, 8, 10)));
if (info.src_type == ADD_FADD)
- dump_round_mode(fp, bits(ADD.op, 10, 12));
+ fprintf(fp, "%s", bi_round_mode_name(bits(ADD.op, 10, 12)));
else
- dump_minmax_mode(fp, bits(ADD.op, 10, 12));
+ fprintf(fp, "%s", bi_minmax_mode_name(bits(ADD.op, 10, 12)));
} else if (info.src_type == ADD_FCMP || info.src_type == ADD_FCMP16) {
dump_fcmp(fp, bits(ADD.op, 3, 6));
if (info.src_type == ADD_FCMP)
fprintf(fp, ".reuse");
if (ADD.op & 0x400)
fprintf(fp, ".flat");
- switch ((ADD.op >> 7) & 0x3) {
- case 0:
- fprintf(fp, ".per_frag");
- break;
- case 1:
- fprintf(fp, ".centroid");
- break;
- case 2:
- break;
- case 3:
- fprintf(fp, ".explicit");
- break;
- }
+ fprintf(fp, "%s", bi_interp_mode_name((ADD.op >> 7) & 0x3));
fprintf(fp, ".v%d", ((ADD.op >> 5) & 0x3) + 1);
} else if (info.src_type == ADD_BRANCH) {
enum bifrost_branch_code branchCode = (enum bifrost_branch_code) ((ADD.op >> 6) & 0x3f);
int tex_index;
int sampler_index;
bool dualTex = false;
+
+ fprintf(fp, "coords <");
+ dump_src(fp, ADD.src0, regs, consts, false);
+ fprintf(fp, ", ");
+ dump_src(fp, ADD.op & 0x7, regs, consts, false);
+ fprintf(fp, ">, ");
+
if (info.src_type == ADD_TEX_COMPACT) {
tex_index = (ADD.op >> 3) & 0x7;
sampler_index = (ADD.op >> 7) & 0x7;
struct bifrost_tex_ctrl ctrl;
memcpy((char *) &ctrl, (char *) &controlBits, sizeof(ctrl));
- // TODO: figure out what actually triggers dual-tex
+ /* Dual-tex triggered for adjacent texturing
+ * instructions with the same coordinates to different
+ * textures/samplers. Observed for the compact
+ * (2D/normal) case. */
+
if (ctrl.result_type == 9) {
struct bifrost_dual_tex_ctrl dualCtrl;
memcpy((char *) &dualCtrl, (char *) &controlBits, sizeof(ctrl));
dump_16swizzle(fp, (ADD.op >> 8) & 0x3);
if (abs1 && abs2)
fprintf(fp, ")");
+ fprintf(fp, "/* %X */\n", (ADD.op >> 10) & 0x3); /* mode */
break;
}
case ADD_FADDMscale: {