}
}
+/* There is an encoding restriction against FMA fp16 add/min/max
+ * having both sources with abs(..) with a duplicated source. This is
+ * due to the packing being order-sensitive, so the ports must end up distinct
+ * to handle both having abs(..). The swizzle doesn't matter here. Note
+ * BIR_INDEX_REGISTER generally should not be used pre-schedule (TODO: enforce
+ * this).
+ */
+
+static bool
+bi_ambiguous_abs(bi_instruction *ins)
+{
+ bool classy = bi_class_props[ins->type] & BI_NO_ABS_ABS_FP16_FMA;
+ bool typey = ins->dest_type == nir_type_float16;
+ bool absy = ins->src_abs[0] && ins->src_abs[1];
+
+ return classy && typey && absy;
+}
+
/* Eventually, we'll need a proper scheduling, grouping instructions
* into clauses and ordering/assigning grouped instructions to the
* appropriate FMA/ADD slots. Right now we do the dumbest possible
bi_clause *u = rzalloc(ctx, bi_clause);
u->bundle_count = 1;
- if (props & BI_SCHED_FMA)
+ /* Check for scheduling restrictions */
+
+ bool can_fma = props & BI_SCHED_FMA;
+ bool can_add = props & BI_SCHED_ADD;
+
+ can_fma &= !bi_ambiguous_abs(ins);
+
+ assert(can_fma || can_add);
+
+ if (can_fma)
u->bundles[0].fma = ins;
else
u->bundles[0].add = ins;
#include "compiler.h"
unsigned bi_class_props[BI_NUM_CLASSES] = {
- [BI_ADD] = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
+ [BI_ADD] = BI_GENERIC | BI_MODS | BI_SCHED_ALL | BI_NO_ABS_ABS_FP16_FMA,
[BI_ATEST] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD,
[BI_BRANCH] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD,
[BI_CMP] = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
[BI_LOAD_ATTR] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_VAR] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_VAR_ADDRESS] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_DATA_REG_DEST,
- [BI_MINMAX] = BI_GENERIC | BI_SCHED_ALL,
+ [BI_MINMAX] = BI_GENERIC | BI_SCHED_ALL | BI_NO_ABS_ABS_FP16_FMA,
[BI_MOV] = BI_SCHED_ALL,
[BI_FMOV] = BI_MODS | BI_SCHED_ALL,
[BI_SHIFT] = BI_SCHED_ALL,
#define BI_DATA_REG_SRC (1 << 9)
#define BI_DATA_REG_DEST (1 << 10)
+/* Quirk: cannot encode multiple abs on FMA in fp16 mode */
+#define BI_NO_ABS_ABS_FP16_FMA (1 << 11)
+
/* It can't get any worse than csel4... can it? */
#define BIR_SRC_COUNT 4