From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Date: Wed, 16 Oct 2019 16:25:32 +0000 (-0400)
Subject: pan/midgard: Report byte masks for read components
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e9202ff3cbbcceb691e6b64af5d379f0e8c03643;p=mesa.git

pan/midgard: Report byte masks for read components

Read component masks don't have a particular type associated, since the
type of the ALU operation may not match the type of the operands in
question. So let's generate byte masks instead, and update the rest of
the compiler to use byte masks when analyzing reads.

Preparation for mixed types.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
---

diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index 4b51aabb096..425a4dd85f1 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -512,7 +512,7 @@ bool mir_single_use(compiler_context *ctx, unsigned value);
 bool mir_special_index(compiler_context *ctx, unsigned idx);
 unsigned mir_use_count(compiler_context *ctx, unsigned value);
 bool mir_is_written_before(compiler_context *ctx, midgard_instruction *ins, unsigned node);
-unsigned mir_mask_of_read_components(midgard_instruction *ins, unsigned node);
+uint16_t mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node);
 unsigned mir_ubo_shift(midgard_load_store_op op);
 midgard_reg_mode mir_typesize(midgard_instruction *ins);
 uint16_t mir_from_bytemask(uint16_t bytemask, midgard_reg_mode mode);
diff --git a/src/panfrost/midgard/midgard_liveness.c b/src/panfrost/midgard/midgard_liveness.c
index 8320b918ed2..1aa4158e353 100644
--- a/src/panfrost/midgard/midgard_liveness.c
+++ b/src/panfrost/midgard/midgard_liveness.c
@@ -64,7 +64,8 @@ mir_liveness_ins_update(uint16_t *live, midgard_instruction *ins, unsigned max)
 
         mir_foreach_src(ins, src) {
                 unsigned node = ins->src[src];
-                unsigned mask = mir_mask_of_read_components(ins, node);
+                unsigned bytemask = mir_bytemask_of_read_components(ins, node);
+                unsigned mask = mir_from_bytemask(bytemask, midgard_reg_mode_32);
 
                 liveness_gen(live, node, max, mask);
         }
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c
index 199e9ef076e..51687defccb 100644
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -525,7 +525,7 @@ mir_lower_special_reads(compiler_context *ctx)
                                 } else {
                                         idx = spill_idx++;
                                         m = v_mov(i, blank_alu_src, idx);
-                                        m.mask = mir_mask_of_read_components(pre_use, i);
+                                        m.mask = mir_from_bytemask(mir_bytemask_of_read_components(pre_use, i), midgard_reg_mode_32);
                                         mir_insert_instruction_before(ctx, pre_use, m);
                                         mir_rewrite_index_src_single(pre_use, i, idx);
                                 }
diff --git a/src/panfrost/midgard/midgard_ra_pipeline.c b/src/panfrost/midgard/midgard_ra_pipeline.c
index 3fa3e9fbecd..a85383fc249 100644
--- a/src/panfrost/midgard/midgard_ra_pipeline.c
+++ b/src/panfrost/midgard/midgard_ra_pipeline.c
@@ -54,11 +54,11 @@ mir_pipeline_ins(
         unsigned node = ins->dest;
         unsigned read_mask = 0;
 
-        /* Analyze the bundle for a read mask */
+        /* Analyze the bundle for a per-byte read mask */
 
         for (unsigned i = 0; i < bundle->instruction_count; ++i) {
                 midgard_instruction *q = bundle->instructions[i];
-                read_mask |= mir_mask_of_read_components(q, node);
+                read_mask |= mir_bytemask_of_read_components(q, node);
 
                 /* The fragment colour can't be pipelined (well, it is
                  * pipelined in r0, but this is a delicate dance with
@@ -74,7 +74,7 @@ mir_pipeline_ins(
                 if (q->dest != node) continue;
 
                 /* Remove the written mask from the read requirements */
-                read_mask &= ~q->mask;
+                read_mask &= ~mir_bytemask(q);
         }
 
         /* Check for leftovers */
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index 5b1daed6d7e..341a746a8f4 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -125,7 +125,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                         unsigned src = instructions[i]->src[s];
 
                         if (src < node_count) {
-                                unsigned readmask = mir_mask_of_read_components(instructions[i], src);
+                                unsigned readmask = mir_from_bytemask(mir_bytemask_of_read_components(instructions[i], src), midgard_reg_mode_32);
                                 add_dependency(last_write, src, readmask, instructions, i);
                         }
                 }
@@ -140,7 +140,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                         unsigned src = instructions[i]->src[s];
 
                         if (src < node_count) {
-                                unsigned readmask = mir_mask_of_read_components(instructions[i], src);
+                                unsigned readmask = mir_from_bytemask(mir_bytemask_of_read_components(instructions[i], src), midgard_reg_mode_32);
                                 mark_access(last_read, src, readmask, i);
                         }
                 }
@@ -388,7 +388,7 @@ mir_adjust_constants(midgard_instruction *ins,
                 uint32_t *bundles = (uint32_t *) pred->constants;
                 uint32_t *constants = (uint32_t *) ins->constants;
                 unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
-                unsigned mask = mir_mask_of_read_components(ins, r_constant);
+                unsigned mask = mir_from_bytemask(mir_bytemask_of_read_components(ins, r_constant), midgard_reg_mode_32);
 
                 /* First, check if it fits */
                 unsigned count = DIV_ROUND_UP(pred->constant_count, sizeof(uint32_t));
@@ -1290,11 +1290,11 @@ static void mir_spill_register(
                 }
         }
 
-        /* For special reads, figure out how many components we need */
-        unsigned read_mask = 0;
+        /* For special reads, figure out how many bytes we need */
+        unsigned read_bytemask = 0;
 
         mir_foreach_instr_global_safe(ctx, ins) {
-                read_mask |= mir_mask_of_read_components(ins, spill_node);
+                read_bytemask |= mir_bytemask_of_read_components(ins, spill_node);
         }
 
         /* Insert a load from TLS before the first consecutive
@@ -1349,7 +1349,7 @@ static void mir_spill_register(
                                 /* Mask the load based on the component count
                                  * actually needed to prvent RA loops */
 
-                                st.mask = read_mask;
+                                st.mask = mir_from_bytemask(read_bytemask, midgard_reg_mode_32);
 
                                 mir_insert_instruction_before_scheduled(ctx, block, before, st);
                                // consecutive_skip = true;
diff --git a/src/panfrost/midgard/mir.c b/src/panfrost/midgard/mir.c
index e660ca25aa6..09868302a2e 100644
--- a/src/panfrost/midgard/mir.c
+++ b/src/panfrost/midgard/mir.c
@@ -533,19 +533,19 @@ mir_bytemask(midgard_instruction *ins)
  * will return a mask of Z/Y for r2
  */
 
-static unsigned
-mir_mask_of_read_components_single(unsigned swizzle, unsigned outmask)
+static uint16_t
+mir_bytemask_of_read_components_single(unsigned swizzle, unsigned inmask, midgard_reg_mode mode)
 {
-        unsigned mask = 0;
+        unsigned cmask = 0;
 
         for (unsigned c = 0; c < 4; ++c) {
-                if (!(outmask & (1 << c))) continue;
+                if (!(inmask & (1 << c))) continue;
 
                 unsigned comp = (swizzle >> (2*c)) & 3;
-                mask |= (1 << comp);
+                cmask |= (1 << comp);
         }
 
-        return mask;
+        return mir_to_bytemask(mode, cmask);
 }
 
 static unsigned
@@ -565,40 +565,39 @@ mir_source_count(midgard_instruction *ins)
         }
 }
 
-unsigned
-mir_mask_of_read_components(midgard_instruction *ins, unsigned node)
+uint16_t
+mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node)
 {
-        unsigned mask = 0;
+        uint16_t mask = 0;
 
         for (unsigned i = 0; i < mir_source_count(ins); ++i) {
                 if (ins->src[i] != node) continue;
 
                 /* Branch writeout uses all components */
                 if (ins->compact_branch && ins->writeout && (i == 0))
-                        return 0xF;
+                        return 0xFFFF;
 
-                /* Conditional branches read one component (TODO: multi branch??) */
+                /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
                 if (ins->compact_branch && !ins->prepacked_branch && ins->branch.conditional && (i == 0))
-                        return 0x1;
+                        return 0xF;
 
                 /* ALU ops act componentwise so we need to pay attention to
                  * their mask. Texture/ldst does not so we don't clamp source
                  * readmasks based on the writemask */
-                unsigned qmask = (ins->type == TAG_ALU_4) ? ins->mask : 0xF;
+                unsigned qmask = (ins->type == TAG_ALU_4) ? ins->mask : ~0;
 
                 /* Handle dot products and things */
                 if (ins->type == TAG_ALU_4 && !ins->compact_branch) {
-                        unsigned channel_override =
-                                GET_CHANNEL_COUNT(alu_opcode_props[ins->alu.op].props);
+                        unsigned props = alu_opcode_props[ins->alu.op].props;
+
+                        unsigned channel_override = GET_CHANNEL_COUNT(props);
 
                         if (channel_override)
                                 qmask = mask_of(channel_override);
                 }
 
                 unsigned swizzle = mir_get_swizzle(ins, i);
-                unsigned m = mir_mask_of_read_components_single(swizzle, qmask);
-
-               mask |= m;
+                mask |= mir_bytemask_of_read_components_single(swizzle, qmask, mir_srcsize(ins, i));
         }
 
         return mask;