pan/midgard: Fix masks/alignment for 64-bit loads
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 15 Nov 2019 20:16:53 +0000 (15:16 -0500)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Mon, 18 Nov 2019 03:19:31 +0000 (22:19 -0500)
These need to be handled with special care.

Oh, Midgard, you're *extra* special.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/panfrost/midgard/compiler.h
src/panfrost/midgard/midgard_compile.c
src/panfrost/midgard/midgard_ra.c
src/panfrost/midgard/mir_promote_uniforms.c

index 3c70f071814e8862a11a56d4c73edd28400793ce..3c1730143e2a40c09e087e5b7950d3915fb21884 100644 (file)
@@ -144,6 +144,9 @@ typedef struct midgard_instruction {
         unsigned nr_dependencies;
         BITSET_WORD *dependents;
 
+        /* For load/store ops.. force 64-bit destination */
+        bool load_64;
+
         union {
                 midgard_load_store_word load_store;
                 midgard_vector_alu alu;
index 19573900bd9aa03639643b1c67fa315e5473a051..a187beaab7c9647fbb1ba60a7852720ccf011195 100644 (file)
@@ -1104,15 +1104,27 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 
 #undef ALU_CASE
 
-static unsigned
-mir_mask_for_intr(nir_instr *instr, bool is_read)
+static void
+mir_set_intr_mask(nir_instr *instr, midgard_instruction *ins, bool is_read)
 {
         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+        unsigned nir_mask = 0;
+        unsigned dsize = 0;
 
-        if (is_read)
-                return mask_of(nir_intrinsic_dest_components(intr));
-        else
-                return nir_intrinsic_write_mask(intr);
+        if (is_read) {
+                nir_mask = mask_of(nir_intrinsic_dest_components(intr));
+                dsize = nir_dest_bit_size(intr->dest);
+        } else {
+                nir_mask = nir_intrinsic_write_mask(intr);
+                dsize = 32;
+        }
+
+        /* Once we have the NIR mask, we need to normalize to work in 32-bit space */
+        unsigned bytemask = mir_to_bytemask(mir_mode_for_destsize(dsize), nir_mask);
+        mir_set_bytemask(ins, bytemask);
+
+        if (dsize == 64)
+                ins->load_64 = true;
 }
 
 /* Uniforms and UBOs use a shared code path, as uniforms are just (slightly
@@ -1134,7 +1146,7 @@ emit_ubo_read(
         /* TODO: Don't split */
         ins.load_store.varying_parameters = (offset & 0x7F) << 3;
         ins.load_store.address = offset >> 7;
-        ins.mask = mir_mask_for_intr(instr, true);
+        mir_set_intr_mask(instr, &ins, true);
 
         if (indirect_offset) {
                 ins.src[2] = nir_src_index(ctx, indirect_offset);
@@ -1204,7 +1216,7 @@ emit_ssbo_access(
 
         ins.load_store.varying_parameters = (offset & 0x1FF) << 1;
         ins.load_store.address = (offset >> 9);
-        ins.mask = mir_mask_for_intr(instr, is_read);
+        mir_set_intr_mask(instr, &ins, is_read);
 
         emit_mir_instruction(ctx, ins);
 }
index 85ad96ff312151ca925abd5bec414527e4dad9fa..c2c1874aa821209d21a0a1052ec1e5f9d8bb781f 100644 (file)
@@ -452,6 +452,7 @@ allocate_registers(compiler_context *ctx, bool *spilled)
         }
 
         unsigned *found_class = calloc(sizeof(unsigned), ctx->temp_count);
+        unsigned *min_alignment = calloc(sizeof(unsigned), ctx->temp_count);
 
         mir_foreach_instr_global(ctx, ins) {
                 if (ins->dest >= SSA_FIXED_MINIMUM) continue;
@@ -465,17 +466,21 @@ allocate_registers(compiler_context *ctx, bool *spilled)
                 int dest = ins->dest;
                 found_class[dest] = MAX2(found_class[dest], class);
 
-                lcra_set_alignment(l, dest, 2); /* (1 << 2) = 4 */
-
                 /* XXX: Ensure swizzles align the right way with more LCRA constraints? */
                 if (ins->type == TAG_ALU_4 && ins->alu.reg_mode != midgard_reg_mode_32)
-                        lcra_set_alignment(l, dest, 3); /* (1 << 3) = 8 */
+                        min_alignment[dest] = 3; /* (1 << 3) = 8 */
+
+                if (ins->type == TAG_LOAD_STORE_4 && ins->load_64)
+                        min_alignment[dest] = 3;
         }
 
-        for (unsigned i = 0; i < ctx->temp_count; ++i)
+        for (unsigned i = 0; i < ctx->temp_count; ++i) {
+                lcra_set_alignment(l, i, min_alignment[i] ? min_alignment[i] : 2);
                 lcra_restrict_range(l, i, (found_class[i] + 1) * 4);
+        }
         
         free(found_class);
+        free(min_alignment);
 
         /* Next, we'll determine semantic class. We default to zero (work).
          * But, if we're used with a special operation, that will force us to a
index b10717852e5008a7d6c3913eefb92d53bdb4edbe..8d887a615fb26ded5bcb7bed4afee640ae2f92c5 100644 (file)
@@ -89,7 +89,11 @@ midgard_promote_uniforms(compiler_context *ctx, unsigned promoted_count)
 
                 if (needs_move) {
                         midgard_instruction mov = v_mov(promoted, ins->dest);
-                        mov.mask = ins->mask;
+
+                        if (ins->load_64)
+                                mov.alu.reg_mode = midgard_reg_mode_64;
+
+                        mir_set_bytemask(&mov, mir_bytemask(ins));
                         mir_insert_instruction_before(ctx, ins, mov);
                 } else {
                         mir_rewrite_index_src(ctx, ins->dest, promoted);