pan/bi: Handle fp16/abs scheduling restriction
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Mon, 30 Mar 2020 16:25:20 +0000 (12:25 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 31 Mar 2020 01:12:26 +0000 (01:12 +0000)
See previous commit for the packing side. Here we update the scheduler
to accomodate this. Note we don't actually hit this path yet, but it's
good to be proactive.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4382>

src/panfrost/bifrost/bi_schedule.c
src/panfrost/bifrost/bi_tables.c
src/panfrost/bifrost/compiler.h

index f112553c05a1065434d9f7a1437e89bfd1ed28d9..0f72fc12713524a3d348bd074bb9810d8fbaa0b3 100644 (file)
@@ -71,6 +71,24 @@ bi_clause_type_for_ins(bi_instruction *ins)
         }
 }
 
+/* There is an encoding restriction against FMA fp16 add/min/max
+ * having both sources with abs(..) with a duplicated source. This is
+ * due to the packing being order-sensitive, so the ports must end up distinct
+ * to handle both having abs(..). The swizzle doesn't matter here. Note
+ * BIR_INDEX_REGISTER generally should not be used pre-schedule (TODO: enforce
+ * this).
+ */
+
+static bool
+bi_ambiguous_abs(bi_instruction *ins)
+{
+        bool classy = bi_class_props[ins->type] & BI_NO_ABS_ABS_FP16_FMA;
+        bool typey = ins->dest_type == nir_type_float16;
+        bool absy = ins->src_abs[0] && ins->src_abs[1];
+
+        return classy && typey && absy;
+}
+
 /* Eventually, we'll need a proper scheduling, grouping instructions
  * into clauses and ordering/assigning grouped instructions to the
  * appropriate FMA/ADD slots. Right now we do the dumbest possible
@@ -95,7 +113,16 @@ bi_schedule(bi_context *ctx)
                         bi_clause *u = rzalloc(ctx, bi_clause);
                         u->bundle_count = 1;
 
-                        if (props & BI_SCHED_FMA)
+                        /* Check for scheduling restrictions */
+
+                        bool can_fma = props & BI_SCHED_FMA;
+                        bool can_add = props & BI_SCHED_ADD;
+
+                        can_fma &= !bi_ambiguous_abs(ins);
+
+                        assert(can_fma || can_add);
+
+                        if (can_fma)
                                 u->bundles[0].fma = ins;
                         else
                                 u->bundles[0].add = ins;
index 3926afc433559061d7f64f38ec84a26a630a2329..04beb55c71a95f6bd3ff5377d3d185cf13edfb68 100644 (file)
@@ -27,7 +27,7 @@
 #include "compiler.h"
 
 unsigned bi_class_props[BI_NUM_CLASSES] = {
-        [BI_ADD]               = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
+        [BI_ADD]               = BI_GENERIC | BI_MODS | BI_SCHED_ALL | BI_NO_ABS_ABS_FP16_FMA,
         [BI_ATEST]             = BI_SCHED_HI_LATENCY | BI_SCHED_ADD,
         [BI_BRANCH]            = BI_SCHED_HI_LATENCY | BI_SCHED_ADD,
         [BI_CMP]               = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
@@ -45,7 +45,7 @@ unsigned bi_class_props[BI_NUM_CLASSES] = {
         [BI_LOAD_ATTR]                 = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
         [BI_LOAD_VAR]          = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
         [BI_LOAD_VAR_ADDRESS]  = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_DATA_REG_DEST,
-        [BI_MINMAX]            = BI_GENERIC | BI_SCHED_ALL,
+        [BI_MINMAX]            = BI_GENERIC | BI_SCHED_ALL | BI_NO_ABS_ABS_FP16_FMA,
         [BI_MOV]               = BI_SCHED_ALL,
         [BI_FMOV]               = BI_MODS | BI_SCHED_ALL,
         [BI_SHIFT]             = BI_SCHED_ALL,
index 554cd4e93d730bb2029a9df4f27b1d09632be792..9cfd0c67d15894402eb019d05d8836d38934752b 100644 (file)
@@ -121,6 +121,9 @@ extern unsigned bi_class_props[BI_NUM_CLASSES];
 #define BI_DATA_REG_SRC (1 << 9)
 #define BI_DATA_REG_DEST (1 << 10)
 
+/* Quirk: cannot encode multiple abs on FMA in fp16 mode */
+#define BI_NO_ABS_ABS_FP16_FMA (1 << 11)
+
 /* It can't get any worse than csel4... can it? */
 #define BIR_SRC_COUNT 4