From a8639b91b5e90dc8cf40a683a16cd8c4cb51193c Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Date: Mon, 5 Aug 2019 09:19:39 -0700
Subject: [PATCH] pan/midgard: Pipe uniform mask through when spilling

This is a corner case that happens a lot with SSBOs. Basically, if we
only read a few components of a uniform, we need to only spill a few
components or otherwise we try to spill what we spilled and RA hangs.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
---
 src/panfrost/midgard/midgard_schedule.c     | 14 +++++++++++++-
 src/panfrost/midgard/mir_promote_uniforms.c | 18 +++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index f69e86e2f46..d7d8254bd6b 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -723,7 +723,7 @@ v_load_store_scratch(
        if (is_store) {
                 /* r0 = r26, r1 = r27 */
                 assert(srcdest == SSA_FIXED_REGISTER(26) || srcdest == SSA_FIXED_REGISTER(27));
-                ins.ssa_args.src[0] = (srcdest == SSA_FIXED_REGISTER(27)) ? SSA_FIXED_REGISTER(1) : SSA_FIXED_REGISTER(0);
+                ins.ssa_args.src[0] = srcdest;
         } else {
                 ins.ssa_args.dest = srcdest;
         }
@@ -803,6 +803,13 @@ static void mir_spill_register(
                 }
         }
 
+        /* For special reads, figure out how many components we need */
+        unsigned read_mask = 0;
+
+        mir_foreach_instr_global_safe(ctx, ins) {
+                read_mask |= mir_mask_of_read_components(ins, spill_node);
+        }
+
         /* Insert a load from TLS before the first consecutive
          * use of the node, rewriting to use spilled indices to
          * break up the live range. Or, for special, insert a
@@ -850,6 +857,11 @@ static void mir_spill_register(
                                         st = v_load_store_scratch(consecutive_index, spill_slot, false, 0xF);
                                 }
 
+                                /* Mask the load based on the component count
+                                 * actually needed to prvent RA loops */
+
+                                st.mask = read_mask;
+
                                 mir_insert_instruction_before(before, st);
                                // consecutive_skip = true;
                         } else {
diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c
index e8da834b2fa..9f5be37be2c 100644
--- a/src/panfrost/midgard/mir_promote_uniforms.c
+++ b/src/panfrost/midgard/mir_promote_uniforms.c
@@ -25,6 +25,7 @@
  */
 
 #include "compiler.h"
+#include "util/u_math.h"
 
 /* This pass promotes reads from uniforms from load/store ops to uniform
  * registers if it is beneficial to do so. Normally, this saves both
@@ -70,11 +71,26 @@ midgard_promote_uniforms(compiler_context *ctx, unsigned promoted_count)
                 bool needs_move = ins->ssa_args.dest & IS_REG;
                 needs_move |= mir_special_index(ctx, ins->ssa_args.dest);
 
+                /* Ensure this is a contiguous X-bound mask. It should be since
+                 * we haven't done RA and per-component masked UBO reads don't
+                 * make much sense. */
+
+                assert(((ins->mask + 1) & ins->mask) == 0);
+
+                /* Check the component count from the mask so we can setup a
+                 * swizzle appropriately when promoting. The idea is to ensure
+                 * the component count is preserved so RA can be smarter if we
+                 * need to spill */
+
+                unsigned nr_components = util_bitcount(ins->mask);
+
                 if (needs_move) {
                         midgard_instruction mov = v_mov(promoted, blank_alu_src, ins->ssa_args.dest);
+                        mov.mask = ins->mask;
                         mir_insert_instruction_before(ins, mov);
                 } else {
-                        mir_rewrite_index_src(ctx, ins->ssa_args.dest, promoted);
+                        mir_rewrite_index_src_swizzle(ctx, ins->ssa_args.dest,
+                                        promoted, swizzle_of(nr_components));
                 }
 
                 mir_remove_instruction(ins);
-- 
2.30.2