pan/mdg: Streamline dest_override handling
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 8 May 2020 21:41:49 +0000 (17:41 -0400)
committerMarge Bot <eric+marge@anholt.net>
Thu, 21 May 2020 17:49:14 +0000 (17:49 +0000)
We can pass it all off to emit time, and let the types in the IR do the
heavylifting in the meantime, which is a lot easier to get right.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5151>

src/panfrost/midgard/compiler.h
src/panfrost/midgard/midgard_compile.c
src/panfrost/midgard/midgard_emit.c
src/panfrost/midgard/midgard_ra.c
src/panfrost/midgard/midgard_schedule.c
src/panfrost/midgard/mir.c

index 751df1b108a3c755c5414230a7e9620d97fc5b4d..9de53918644522e4c2f5cbb58f7cbc741773f397 100644 (file)
@@ -500,7 +500,7 @@ uint16_t mir_from_bytemask(uint16_t bytemask, unsigned bits);
 uint16_t mir_bytemask(midgard_instruction *ins);
 uint16_t mir_round_bytemask_up(uint16_t mask, unsigned bits);
 void mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask);
-unsigned mir_upper_override(midgard_instruction *ins);
+signed mir_upper_override(midgard_instruction *ins, unsigned inst_size);
 
 /* MIR printing */
 
@@ -534,7 +534,6 @@ v_mov(unsigned src, unsigned dest)
                 .alu = {
                         .op = midgard_alu_op_imov,
                         .reg_mode = midgard_reg_mode_32,
-                        .dest_override = midgard_dest_override_none,
                         .outmod = midgard_outmod_int_wrap
                 },
         };
index 070b6e32c2d53b18d918bdacd5daf7eef83563d3..a8a400cfd6d1d5d86a0c371d244997a713b9e472 100644 (file)
@@ -535,20 +535,17 @@ nir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components)
                 assert(src_bitsize == dst_bitsize); \
                break;
 
+#define ALU_CHECK_CMP(sext) \
+                assert(src_bitsize == 16 || src_bitsize == 32); \
+                assert(dst_bitsize == 16 || dst_bitsize == 32); \
+
 #define ALU_CASE_BCAST(nir, _op, count) \
         case nir_op_##nir: \
                 op = midgard_alu_op_##_op; \
                 broadcast_swizzle = count; \
-                assert(src_bitsize == dst_bitsize); \
+                ALU_CHECK_CMP(true); \
                 break;
 
-#define ALU_CHECK_CMP(sext) \
-               if (src_bitsize == 16 && dst_bitsize == 32) { \
-                       /* inferred */ \
-                } else { \
-                        assert(src_bitsize == dst_bitsize); \
-                } \
-
 #define ALU_CASE_CMP(nir, _op, sext) \
        case nir_op_##nir: \
                op = midgard_alu_op_##_op; \
@@ -719,12 +716,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
         midgard_reg_mode reg_mode =
                 reg_mode_for_nir(instr);
 
-        /* Do we need a destination override? Used for inline
-         * type conversion */
-
-        midgard_dest_override dest_override =
-                midgard_dest_override_none;
-
         /* Should we swap arguments? */
         bool flip_src12 = false;
 
@@ -819,19 +810,19 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 
                 ALU_CASE_BCAST(b32all_fequal2, fball_eq, 2);
                 ALU_CASE_BCAST(b32all_fequal3, fball_eq, 3);
-                ALU_CASE(b32all_fequal4, fball_eq);
+                ALU_CASE_CMP(b32all_fequal4, fball_eq, true);
 
                 ALU_CASE_BCAST(b32any_fnequal2, fbany_neq, 2);
                 ALU_CASE_BCAST(b32any_fnequal3, fbany_neq, 3);
-                ALU_CASE(b32any_fnequal4, fbany_neq);
+                ALU_CASE_CMP(b32any_fnequal4, fbany_neq, true);
 
                 ALU_CASE_BCAST(b32all_iequal2, iball_eq, 2);
                 ALU_CASE_BCAST(b32all_iequal3, iball_eq, 3);
-                ALU_CASE(b32all_iequal4, iball_eq);
+                ALU_CASE_CMP(b32all_iequal4, iball_eq, true);
 
                 ALU_CASE_BCAST(b32any_inequal2, ibany_neq, 2);
                 ALU_CASE_BCAST(b32any_inequal3, ibany_neq, 3);
-                ALU_CASE(b32any_inequal4, ibany_neq);
+                ALU_CASE_CMP(b32any_inequal4, ibany_neq, true);
 
                 /* Source mods will be shoved in later */
                 ALU_CASE(fabs, fmov);
@@ -864,13 +855,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
                 else
                         op = midgard_alu_op_imov;
 
-                if (dst_bitsize == (src_bitsize * 2)) {
-                        /* inferred */
-                } else if (src_bitsize == (dst_bitsize * 2)) {
-                        /* Converting down */
-                        dest_override = midgard_dest_override_lower;
-                }
-
                 break;
         }
 
@@ -1020,7 +1004,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
         midgard_vector_alu alu = {
                 .op = op,
                 .reg_mode = reg_mode,
-                .dest_override = dest_override,
                 .outmod = outmod,
         };
 
index f26685f6d02e1f7e60f20889ded27e570f467f2b..e9a67811aa8829c5d5a659eb52ec4ed15101c93a 100644 (file)
@@ -171,11 +171,16 @@ mir_pack_mask_alu(midgard_instruction *ins)
          * override to the lower or upper half, shifting the effective mask in
          * the latter, so AAAA.... becomes AAAA */
 
-        unsigned upper_shift = mir_upper_override(ins);
+        unsigned inst_size = 8 << ins->alu.reg_mode;
+        signed upper_shift = mir_upper_override(ins, inst_size);
 
-        if (upper_shift) {
+        if (upper_shift >= 0) {
                 effective >>= upper_shift;
-                ins->alu.dest_override = midgard_dest_override_upper;
+                ins->alu.dest_override = upper_shift ?
+                        midgard_dest_override_upper :
+                        midgard_dest_override_lower;
+        } else {
+                ins->alu.dest_override = midgard_dest_override_none;
         }
 
         if (ins->alu.reg_mode == midgard_reg_mode_32)
@@ -590,7 +595,13 @@ emit_binary_bundle(compiler_context *ctx,
 
                 ins->texture.type = bundle->tag;
                 ins->texture.next_type = next_tag;
-                ins->texture.mask = ins->mask;
+
+                signed override = mir_upper_override(ins, 32);
+
+                ins->texture.mask = override > 0 ?
+                        ins->mask >> override :
+                        ins->mask;
+
                 mir_pack_swizzle_tex(ins);
 
                 unsigned osz = nir_alu_type_get_type_size(ins->dest_type);
@@ -600,6 +611,7 @@ emit_binary_bundle(compiler_context *ctx,
                 assert(isz == 32 || isz == 16);
 
                 ins->texture.out_full = (osz == 32);
+                ins->texture.out_upper = override > 0;
                 ins->texture.in_reg_full = (isz == 32);
                 ins->texture.sampler_type = midgard_sampler_type(ins->dest_type);
 
index e293fb43164910ee662d38eff27ccb381bd83ad0..ecc9d727e2be2f4e1569f99ce4e6d9fd28a5db0a 100644 (file)
@@ -723,16 +723,14 @@ install_registers_instr(
                 struct phys_reg offset = index_to_reg(ctx, l, ins->src[3], src_size[3]);
 
                 /* First, install the texture coordinate */
-                ins->texture.in_reg_full = 1;
-                ins->texture.in_reg_upper = 0;
                 ins->texture.in_reg_select = coord.reg & 1;
                 offset_swizzle(ins->swizzle[1], coord.offset, coord.size, dest.size, 0);
 
                 /* Next, install the destination */
-                ins->texture.out_full = 1;
-                ins->texture.out_upper = 0;
                 ins->texture.out_reg_select = dest.reg & 1;
-                offset_swizzle(ins->swizzle[0], 0, 4, dest.size, dest.offset);
+                offset_swizzle(ins->swizzle[0], 0, 4, dest.size,
+                                dest_size == 2 ? dest.offset % 8 :
+                                dest.offset);
                 mir_set_bytemask(ins, mir_bytemask(ins) << dest.offset);
 
                 /* If there is a register LOD/bias, use it */
index 51f628f42d0da841ed08639ed11c8d90fd2b59ea..72eabf340311e073922a912b5a87f4d6261c19dd 100644 (file)
@@ -204,7 +204,6 @@ mir_is_scalar(midgard_instruction *ains)
         /* Only 16/32-bit can run on a scalar unit */
         could_scalar &= ains->alu.reg_mode != midgard_reg_mode_8;
         could_scalar &= ains->alu.reg_mode != midgard_reg_mode_64;
-        could_scalar &= ains->alu.dest_override == midgard_dest_override_none;
 
         if (ains->src[0] != ~0)
                 could_scalar &= (sz0 == 16) || (sz0 == 32);
index 2e0960148eb75ba552b9c4fe818e0f49e900c118..b7e2917c4651f4d59929905b1ac41a0806e79f71 100644 (file)
@@ -202,22 +202,20 @@ mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask)
 /* Checks if we should use an upper destination override, rather than the lower
  * one in the IR. Returns zero if no, returns the bytes to shift otherwise */
 
-unsigned
-mir_upper_override(midgard_instruction *ins)
+signed
+mir_upper_override(midgard_instruction *ins, unsigned inst_size)
 {
-        /* If there is no override, there is no upper override, tautology */
-        if (ins->alu.dest_override == midgard_dest_override_none)
-                return 0;
+        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
 
-        /* Make sure we didn't already lower somehow */
-        assert(ins->alu.dest_override == midgard_dest_override_lower);
+        /* If the sizes are the same, there's nothing to override */
+        if (type_size == inst_size)
+                return -1;
 
         /* There are 16 bytes per vector, so there are (16/bytes)
          * components per vector. So the magic half is half of
          * (16/bytes), which simplifies to 8/bytes = 8 / (bits / 8) = 64 / bits
          * */
 
-        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
         unsigned threshold = 64 / type_size;
 
         /* How many components did we shift over? */