RETURN_PACKED(pack);
}
-static unsigned
-bi_pack_fmadd_min_f16(bi_instruction *ins, struct bi_registers *regs, bool FMA)
+static bool
+bi_pack_fp16_abs(bi_instruction *ins, struct bi_registers *regs, bool *flip)
{
- unsigned op =
- (!FMA) ? ((ins->op.minmax == BI_MINMAX_MIN) ?
- BIFROST_ADD_OP_FMIN16 : BIFROST_ADD_OP_FMAX16) :
- (ins->type == BI_ADD) ? BIFROST_FMA_OP_FADD16 :
- (ins->op.minmax == BI_MINMAX_MIN) ? BIFROST_FMA_OP_FMIN16 :
- BIFROST_FMA_OP_FMAX16;
-
/* Absolute values are packed in a quirky way. Let k = src1 < src0. Let
* l be an auxiliary bit we encode. Then the hardware determines:
*
unsigned abs_0 = ins->src_abs[0], abs_1 = ins->src_abs[1];
unsigned src_0 = bi_get_src(ins, regs, 0, true);
unsigned src_1 = bi_get_src(ins, regs, 1, true);
- bool l = false;
- bool flip = false;
assert(!(abs_0 && abs_1));
if (!abs_0 && !abs_1) {
/* Force k = 0 <===> NOT(src1 < src0) */
- flip = (src_1 < src_0);
+ *flip = (src_1 < src_0);
+ return false;
} else if (abs_0 && !abs_1) {
- l = src_1 >= src_0;
+ return src_1 >= src_0;
} else if (abs_1 && !abs_0) {
- flip = true;
- l = src_0 >= src_1;
+ *flip = true;
+ return src_0 >= src_1;
} else {
- flip = (src_0 >= src_1);
- l = true;
+ *flip = (src_0 >= src_1);
+ return true;
}
+}
+
+static unsigned
+bi_pack_fmadd_min_f16(bi_instruction *ins, struct bi_registers *regs, bool FMA)
+{
+ unsigned op =
+ (!FMA) ? ((ins->op.minmax == BI_MINMAX_MIN) ?
+ BIFROST_ADD_OP_FMIN16 : BIFROST_ADD_OP_FMAX16) :
+ (ins->type == BI_ADD) ? BIFROST_FMA_OP_FADD16 :
+ (ins->op.minmax == BI_MINMAX_MIN) ? BIFROST_FMA_OP_FMIN16 :
+ BIFROST_FMA_OP_FMAX16;
+
+ bool flip = false;
+ bool l = bi_pack_fp16_abs(ins, regs, &flip);
+ unsigned src_0 = bi_get_src(ins, regs, 0, true);
+ unsigned src_1 = bi_get_src(ins, regs, 1, true);
if (FMA) {
struct bifrost_fma_add_minmax16 pack = {
bool flip = false, invert = false;
enum bifrost_csel_cond cond =
- bi_cond_to_csel(ins->csel_cond, &flip, &invert, ins->src_types[0]);
+ bi_cond_to_csel(ins->cond, &flip, &invert, ins->src_types[0]);
unsigned size = nir_alu_type_get_type_size(ins->dest_type);
}
}
+static enum bifrost_fcmp_cond
+bi_fcmp_cond(enum bi_cond cond)
+{
+ switch (cond) {
+ case BI_COND_LT: return BIFROST_OLT;
+ case BI_COND_LE: return BIFROST_OLE;
+ case BI_COND_GE: return BIFROST_OGE;
+ case BI_COND_GT: return BIFROST_OGT;
+ case BI_COND_EQ: return BIFROST_OEQ;
+ case BI_COND_NE: return BIFROST_UNE;
+ default: unreachable("Unknown bi_cond");
+ }
+}
+
+/* a <?> b <==> b <flip(?)> a (TODO: NaN behaviour?) */
+
+static enum bifrost_fcmp_cond
+bi_flip_fcmp(enum bifrost_fcmp_cond cond)
+{
+ switch (cond) {
+ case BIFROST_OGT:
+ return BIFROST_OLT;
+ case BIFROST_OGE:
+ return BIFROST_OLE;
+ case BIFROST_OLT:
+ return BIFROST_OGT;
+ case BIFROST_OLE:
+ return BIFROST_OGE;
+ case BIFROST_OEQ:
+ case BIFROST_UNE:
+ return cond;
+ default:
+ unreachable("Unknown fcmp cond");
+ }
+}
+
+static unsigned
+bi_pack_fma_cmp(bi_instruction *ins, struct bi_registers *regs)
+{
+ nir_alu_type Tl = ins->src_types[0];
+ nir_alu_type Tr = ins->src_types[1];
+
+ if (Tl == nir_type_float32 || Tr == nir_type_float32) {
+ /* TODO: Mixed 32/16 cmp */
+ assert(Tl == Tr);
+
+ enum bifrost_fcmp_cond cond = bi_fcmp_cond(ins->cond);
+
+ /* Only src1 has neg, so we arrange:
+ * a < b --- native
+ * a < -b --- native
+ * -a < -b <===> a > b
+ * -a < b <===> a > -b
+ * TODO: Is this NaN-precise?
+ */
+
+ bool flip = ins->src_neg[0];
+ bool neg = ins->src_neg[0] ^ ins->src_neg[1];
+
+ if (flip)
+ cond = bi_flip_fcmp(cond);
+
+ struct bifrost_fma_fcmp pack = {
+ .src0 = bi_get_src(ins, regs, 0, true),
+ .src1 = bi_get_src(ins, regs, 1, true),
+ .src0_abs = ins->src_abs[0],
+ .src1_abs = ins->src_abs[1],
+ .src1_neg = neg,
+ .src_expand = 0,
+ .unk1 = 0,
+ .cond = cond,
+ .op = BIFROST_FMA_OP_FCMP_GL
+ };
+
+ RETURN_PACKED(pack);
+ } else if (Tl == nir_type_float16 && Tr == nir_type_float16) {
+ bool flip = false;
+ bool l = bi_pack_fp16_abs(ins, regs, &flip);
+ enum bifrost_fcmp_cond cond = bi_fcmp_cond(ins->cond);
+
+ if (flip)
+ cond = bi_flip_fcmp(cond);
+
+ struct bifrost_fma_fcmp16 pack = {
+ .src0 = bi_get_src(ins, regs, flip ? 1 : 0, true),
+ .src1 = bi_get_src(ins, regs, flip ? 0 : 1, true),
+ .src0_swizzle = bi_swiz16(ins, flip ? 1 : 0),
+ .src1_swizzle = bi_swiz16(ins, flip ? 0 : 1),
+ .abs1 = l,
+ .unk = 0,
+ .cond = cond,
+ .op = BIFROST_FMA_OP_FCMP_GL_16,
+ };
+
+ RETURN_PACKED(pack);
+ } else {
+ unreachable("Unknown cmp type");
+ }
+}
+
+
static unsigned
bi_pack_fma(bi_clause *clause, bi_bundle bundle, struct bi_registers *regs)
{
case BI_ADD:
return bi_pack_fma_addmin(bundle.fma, regs);
case BI_CMP:
+ return bi_pack_fma_cmp(bundle.fma, regs);
case BI_BITWISE:
return BIFROST_FMA_NOP;
case BI_CONVERT: