pan/bi: Skip over data registers in port assignment
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Wed, 18 Mar 2020 16:08:28 +0000 (12:08 -0400)
committerMarge Bot <eric+marge@anholt.net>
Thu, 19 Mar 2020 03:23:07 +0000 (03:23 +0000)
They bypass the usual mechanism entirely, let's add some props to
describe this and respect them.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4242>

src/panfrost/bifrost/bi_pack.c
src/panfrost/bifrost/bi_tables.c
src/panfrost/bifrost/compiler.h

index 377990f83be9c3ba90f0d05d2e560bf28281935d..65218b2301079529e192f8ed2b3a0e0e7b1432c2 100644 (file)
@@ -110,15 +110,28 @@ bi_assign_ports(bi_bundle now, bi_bundle prev)
 {
         struct bi_registers regs = { 0 };
 
+        /* We assign ports for the main register mechanism. Special ops
+         * use the data registers, which has its own mechanism entirely
+         * and thus gets skipped over here. */
+
+        unsigned read_dreg = now.add &&
+                bi_class_props[now.add->type] & BI_DATA_REG_SRC;
+
+        unsigned write_dreg = prev.add &&
+                bi_class_props[prev.add->type] & BI_DATA_REG_DEST;
+
         /* First, assign reads */
 
         if (now.fma)
                 bi_foreach_src(now.fma, src)
                         bi_assign_port_read(&regs, now.fma->src[src]);
 
-        if (now.add)
-                bi_foreach_src(now.add, src)
-                        bi_assign_port_read(&regs, now.add->src[src]);
+        if (now.add) {
+                bi_foreach_src(now.add, src) {
+                        if (!(src == 0 && read_dreg))
+                                bi_assign_port_read(&regs, now.add->src[src]);
+                }
+        }
 
         /* Next, assign writes */
 
@@ -127,7 +140,7 @@ bi_assign_ports(bi_bundle now, bi_bundle prev)
                 regs.write_fma = true;
         }
 
-        if (prev.add && prev.add->dest & BIR_INDEX_REGISTER) {
+        if (prev.add && prev.add->dest & BIR_INDEX_REGISTER && !write_dreg) {
                 unsigned r = prev.add->dest & ~BIR_INDEX_REGISTER;
 
                 if (regs.write_fma) {
index a0734e10ec97acdf416fa52d11e50760cdbcc8d9..07dde05e02738d4f11ac7f5653d10c2e0b620bec 100644 (file)
@@ -39,16 +39,16 @@ unsigned bi_class_props[BI_NUM_CLASSES] = {
         [BI_FMA]               = BI_ROUNDMODE | BI_SCHED_FMA,
         [BI_FREXP]             = BI_SCHED_ALL,
         [BI_ISUB]              = BI_GENERIC | BI_SCHED_ALL,
-        [BI_LOAD]              = BI_SCHED_HI_LATENCY | BI_VECTOR,
-        [BI_LOAD_UNIFORM]      = BI_SCHED_HI_LATENCY | BI_VECTOR,
-        [BI_LOAD_ATTR]                 = BI_SCHED_HI_LATENCY | BI_VECTOR,
-        [BI_LOAD_VAR]          = BI_SCHED_HI_LATENCY | BI_VECTOR,
+        [BI_LOAD]              = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_DEST,
+        [BI_LOAD_UNIFORM]      = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_DEST,
+        [BI_LOAD_ATTR]                 = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_DEST,
+        [BI_LOAD_VAR]          = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_DEST,
         [BI_LOAD_VAR_ADDRESS]  = BI_SCHED_HI_LATENCY,
         [BI_MINMAX]            = BI_GENERIC | BI_SCHED_ALL,
         [BI_MOV]               = BI_MODS | BI_SCHED_ALL,
         [BI_SHIFT]             = BI_SCHED_ALL,
-        [BI_STORE]             = BI_SCHED_HI_LATENCY | BI_VECTOR,
-        [BI_STORE_VAR]                 = BI_SCHED_HI_LATENCY | BI_VECTOR,
+        [BI_STORE]             = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_SRC,
+        [BI_STORE_VAR]                 = BI_SCHED_HI_LATENCY | BI_VECTOR | BI_DATA_REG_SRC,
         [BI_SPECIAL]           = BI_SCHED_ADD | BI_SCHED_SLOW,
         [BI_SWIZZLE]            = BI_SCHED_ALL | BI_SWIZZLABLE,
         [BI_TEX]               = BI_SCHED_HI_LATENCY | BI_VECTOR,
index b69c0fee5e7ad72d6b5aeddb89b786b5366ed090..2d3633502ec5d17f6bb4cf43bd6898f929a92fac 100644 (file)
@@ -113,6 +113,11 @@ extern unsigned bi_class_props[BI_NUM_CLASSES];
 /* Intrinsic is vectorized and should read 4 components regardless of writemask */
 #define BI_VECTOR (1 << 8)
 
+/* Use a data register for src0/dest respectively, bypassing the usual
+ * register accessor. Mutually exclusive. */
+#define BI_DATA_REG_SRC (1 << 9)
+#define BI_DATA_REG_DEST (1 << 10)
+
 /* It can't get any worse than csel4... can it? */
 #define BIR_SRC_COUNT 4