pan/midgard: Report byte masks for read components
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Wed, 16 Oct 2019 16:25:32 +0000 (12:25 -0400)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Sun, 20 Oct 2019 12:02:31 +0000 (12:02 +0000)
Read component masks don't have a particular type associated, since the
type of the ALU operation may not match the type of the operands in
question. So let's generate byte masks instead, and update the rest of
the compiler to use byte masks when analyzing reads.

Preparation for mixed types.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/panfrost/midgard/compiler.h
src/panfrost/midgard/midgard_liveness.c
src/panfrost/midgard/midgard_ra.c
src/panfrost/midgard/midgard_ra_pipeline.c
src/panfrost/midgard/midgard_schedule.c
src/panfrost/midgard/mir.c

index 4b51aabb0962369bed4d4e2f1b311e1d5e120b03..425a4dd85f1c0547e0d24e2d147c480cb0d9dacf 100644 (file)
@@ -512,7 +512,7 @@ bool mir_single_use(compiler_context *ctx, unsigned value);
 bool mir_special_index(compiler_context *ctx, unsigned idx);
 unsigned mir_use_count(compiler_context *ctx, unsigned value);
 bool mir_is_written_before(compiler_context *ctx, midgard_instruction *ins, unsigned node);
-unsigned mir_mask_of_read_components(midgard_instruction *ins, unsigned node);
+uint16_t mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node);
 unsigned mir_ubo_shift(midgard_load_store_op op);
 midgard_reg_mode mir_typesize(midgard_instruction *ins);
 uint16_t mir_from_bytemask(uint16_t bytemask, midgard_reg_mode mode);
index 8320b918ed2863a2b535185db9954ca243d5886e..1aa4158e35359532a90114c4693320c3c99668aa 100644 (file)
@@ -64,7 +64,8 @@ mir_liveness_ins_update(uint16_t *live, midgard_instruction *ins, unsigned max)
 
         mir_foreach_src(ins, src) {
                 unsigned node = ins->src[src];
-                unsigned mask = mir_mask_of_read_components(ins, node);
+                unsigned bytemask = mir_bytemask_of_read_components(ins, node);
+                unsigned mask = mir_from_bytemask(bytemask, midgard_reg_mode_32);
 
                 liveness_gen(live, node, max, mask);
         }
index 199e9ef076ef526d6a49b1934f6a60a98828916f..51687defccbb92bb3fa66aebb7a95ac825e5fc41 100644 (file)
@@ -525,7 +525,7 @@ mir_lower_special_reads(compiler_context *ctx)
                                 } else {
                                         idx = spill_idx++;
                                         m = v_mov(i, blank_alu_src, idx);
-                                        m.mask = mir_mask_of_read_components(pre_use, i);
+                                        m.mask = mir_from_bytemask(mir_bytemask_of_read_components(pre_use, i), midgard_reg_mode_32);
                                         mir_insert_instruction_before(ctx, pre_use, m);
                                         mir_rewrite_index_src_single(pre_use, i, idx);
                                 }
index 3fa3e9fbecd4055dbfc85a5247cc930900f48f2b..a85383fc249c5f17c4d6b526e60fbe166d05dd70 100644 (file)
@@ -54,11 +54,11 @@ mir_pipeline_ins(
         unsigned node = ins->dest;
         unsigned read_mask = 0;
 
-        /* Analyze the bundle for a read mask */
+        /* Analyze the bundle for a per-byte read mask */
 
         for (unsigned i = 0; i < bundle->instruction_count; ++i) {
                 midgard_instruction *q = bundle->instructions[i];
-                read_mask |= mir_mask_of_read_components(q, node);
+                read_mask |= mir_bytemask_of_read_components(q, node);
 
                 /* The fragment colour can't be pipelined (well, it is
                  * pipelined in r0, but this is a delicate dance with
@@ -74,7 +74,7 @@ mir_pipeline_ins(
                 if (q->dest != node) continue;
 
                 /* Remove the written mask from the read requirements */
-                read_mask &= ~q->mask;
+                read_mask &= ~mir_bytemask(q);
         }
 
         /* Check for leftovers */
index 5b1daed6d7e25ab733b642100b2ebb12519116fe..341a746a8f4202ed17b247b18f7046ca25c6aefa 100644 (file)
@@ -125,7 +125,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                         unsigned src = instructions[i]->src[s];
 
                         if (src < node_count) {
-                                unsigned readmask = mir_mask_of_read_components(instructions[i], src);
+                                unsigned readmask = mir_from_bytemask(mir_bytemask_of_read_components(instructions[i], src), midgard_reg_mode_32);
                                 add_dependency(last_write, src, readmask, instructions, i);
                         }
                 }
@@ -140,7 +140,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                         unsigned src = instructions[i]->src[s];
 
                         if (src < node_count) {
-                                unsigned readmask = mir_mask_of_read_components(instructions[i], src);
+                                unsigned readmask = mir_from_bytemask(mir_bytemask_of_read_components(instructions[i], src), midgard_reg_mode_32);
                                 mark_access(last_read, src, readmask, i);
                         }
                 }
@@ -388,7 +388,7 @@ mir_adjust_constants(midgard_instruction *ins,
                 uint32_t *bundles = (uint32_t *) pred->constants;
                 uint32_t *constants = (uint32_t *) ins->constants;
                 unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
-                unsigned mask = mir_mask_of_read_components(ins, r_constant);
+                unsigned mask = mir_from_bytemask(mir_bytemask_of_read_components(ins, r_constant), midgard_reg_mode_32);
 
                 /* First, check if it fits */
                 unsigned count = DIV_ROUND_UP(pred->constant_count, sizeof(uint32_t));
@@ -1290,11 +1290,11 @@ static void mir_spill_register(
                 }
         }
 
-        /* For special reads, figure out how many components we need */
-        unsigned read_mask = 0;
+        /* For special reads, figure out how many bytes we need */
+        unsigned read_bytemask = 0;
 
         mir_foreach_instr_global_safe(ctx, ins) {
-                read_mask |= mir_mask_of_read_components(ins, spill_node);
+                read_bytemask |= mir_bytemask_of_read_components(ins, spill_node);
         }
 
         /* Insert a load from TLS before the first consecutive
@@ -1349,7 +1349,7 @@ static void mir_spill_register(
                                 /* Mask the load based on the component count
                                  * actually needed to prvent RA loops */
 
-                                st.mask = read_mask;
+                                st.mask = mir_from_bytemask(read_bytemask, midgard_reg_mode_32);
 
                                 mir_insert_instruction_before_scheduled(ctx, block, before, st);
                                // consecutive_skip = true;
index e660ca25aa633f469631a7ad534595e20d0fb49c..09868302a2ece9228ca0ee6410866ab71c728b05 100644 (file)
@@ -533,19 +533,19 @@ mir_bytemask(midgard_instruction *ins)
  * will return a mask of Z/Y for r2
  */
 
-static unsigned
-mir_mask_of_read_components_single(unsigned swizzle, unsigned outmask)
+static uint16_t
+mir_bytemask_of_read_components_single(unsigned swizzle, unsigned inmask, midgard_reg_mode mode)
 {
-        unsigned mask = 0;
+        unsigned cmask = 0;
 
         for (unsigned c = 0; c < 4; ++c) {
-                if (!(outmask & (1 << c))) continue;
+                if (!(inmask & (1 << c))) continue;
 
                 unsigned comp = (swizzle >> (2*c)) & 3;
-                mask |= (1 << comp);
+                cmask |= (1 << comp);
         }
 
-        return mask;
+        return mir_to_bytemask(mode, cmask);
 }
 
 static unsigned
@@ -565,40 +565,39 @@ mir_source_count(midgard_instruction *ins)
         }
 }
 
-unsigned
-mir_mask_of_read_components(midgard_instruction *ins, unsigned node)
+uint16_t
+mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node)
 {
-        unsigned mask = 0;
+        uint16_t mask = 0;
 
         for (unsigned i = 0; i < mir_source_count(ins); ++i) {
                 if (ins->src[i] != node) continue;
 
                 /* Branch writeout uses all components */
                 if (ins->compact_branch && ins->writeout && (i == 0))
-                        return 0xF;
+                        return 0xFFFF;
 
-                /* Conditional branches read one component (TODO: multi branch??) */
+                /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
                 if (ins->compact_branch && !ins->prepacked_branch && ins->branch.conditional && (i == 0))
-                        return 0x1;
+                        return 0xF;
 
                 /* ALU ops act componentwise so we need to pay attention to
                  * their mask. Texture/ldst does not so we don't clamp source
                  * readmasks based on the writemask */
-                unsigned qmask = (ins->type == TAG_ALU_4) ? ins->mask : 0xF;
+                unsigned qmask = (ins->type == TAG_ALU_4) ? ins->mask : ~0;
 
                 /* Handle dot products and things */
                 if (ins->type == TAG_ALU_4 && !ins->compact_branch) {
-                        unsigned channel_override =
-                                GET_CHANNEL_COUNT(alu_opcode_props[ins->alu.op].props);
+                        unsigned props = alu_opcode_props[ins->alu.op].props;
+
+                        unsigned channel_override = GET_CHANNEL_COUNT(props);
 
                         if (channel_override)
                                 qmask = mask_of(channel_override);
                 }
 
                 unsigned swizzle = mir_get_swizzle(ins, i);
-                unsigned m = mir_mask_of_read_components_single(swizzle, qmask);
-
-               mask |= m;
+                mask |= mir_bytemask_of_read_components_single(swizzle, qmask, mir_srcsize(ins, i));
         }
 
         return mask;