panfrost/midgard: Apply code styling
[mesa.git] / src / gallium / drivers / panfrost / midgard / midgard_ra.c
index 7aa2932b806dd522c214a010c141b054b932f712..cfe091326ed13579b41b1c216ea273953ffe7087 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2019 Collabora, Ltd.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -48,8 +49,8 @@
 static unsigned reg_type_to_mask[WORK_STRIDE] = {
         0xF,                                    /* xyzw */
         0x7, 0x7 << 1,                          /* xyz */
-        0x3, 0x3 << 1, 0x3 << 2,                /* xy */
-        0x1, 0x1 << 1, 0x1 << 2, 0x1 << 3       /* x */
+                 0x3, 0x3 << 1, 0x3 << 2,                /* xy */
+                 0x1, 0x1 << 1, 0x1 << 2, 0x1 << 3       /* x */
 };
 
 static unsigned reg_type_to_swizzle[WORK_STRIDE] = {
@@ -88,16 +89,10 @@ compose_writemask(unsigned mask, struct phys_reg reg)
 }
 
 static unsigned
-compose_swizzle(unsigned swizzle, unsigned mask, struct phys_reg reg, struct phys_reg dst)
+compose_swizzle(unsigned swizzle, unsigned mask,
+                struct phys_reg reg, struct phys_reg dst)
 {
-        unsigned out = 0;
-
-        for (unsigned c = 0; c < 4; ++c) {
-                unsigned s = (swizzle >> (2*c)) & 0x3;
-                unsigned q = (reg.swizzle >> (2*s)) & 0x3;
-
-                out |= (q << (2*c));
-        }
+        unsigned out = pan_compose_swizzle(swizzle, reg.swizzle);
 
         /* Based on the register mask, we need to adjust over. E.g if we're
          * writing to yz, a base swizzle of xy__ becomes _xy_. Save the
@@ -127,7 +122,8 @@ find_or_allocate_temp(compiler_context *ctx, unsigned hash)
         if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
                 return hash;
 
-        unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
+        unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(
+                                ctx->hash_to_temp, hash + 1);
 
         if (temp)
                 return temp - 1;
@@ -136,7 +132,8 @@ find_or_allocate_temp(compiler_context *ctx, unsigned hash)
         temp = ctx->temp_count++;
         ctx->max_hash = MAX2(ctx->max_hash, hash);
 
-        _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
+        _mesa_hash_table_u64_insert(ctx->hash_to_temp,
+                                    hash + 1, (void *) ((uintptr_t) temp + 1));
 
         return temp;
 }
@@ -146,7 +143,7 @@ find_or_allocate_temp(compiler_context *ctx, unsigned hash)
 static unsigned int
 midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
 {
-        /* Choose the first available register to minimise reported register pressure */
+        /* Choose the first available register to minimise register pressure */
 
         for (int i = 0; i < (16 * WORK_STRIDE); ++i) {
                 if (BITSET_TEST(regs, i)) {
@@ -231,7 +228,7 @@ allocate_registers(compiler_context *ctx)
         };
 
         /* Add the full set of work registers */
-        for (int i = 0; i < work_count; ++i) {
+        for (unsigned i = 0; i < work_count; ++i) {
                 int base = WORK_STRIDE * i;
 
                 /* Build a full set of subdivisions */
@@ -246,13 +243,15 @@ allocate_registers(compiler_context *ctx)
                 ra_class_add_reg(regs, work_vec1, base + 8);
                 ra_class_add_reg(regs, work_vec1, base + 9);
 
-                for (unsigned i = 0; i < 10; ++i) {
-                        for (unsigned j = 0; j < 10; ++j) {
-                                unsigned mask1 = reg_type_to_mask[i];
-                                unsigned mask2 = reg_type_to_mask[j];
+                for (unsigned a = 0; a < 10; ++a) {
+                        unsigned mask1 = reg_type_to_mask[a];
+
+                        for (unsigned b = 0; b < 10; ++b) {
+                                unsigned mask2 = reg_type_to_mask[b];
 
                                 if (mask1 & mask2)
-                                        ra_add_reg_conflict(regs, base + i, base + j);
+                                        ra_add_reg_conflict(regs,
+                                                            base + a, base + b);
                         }
                 }
         }
@@ -294,16 +293,7 @@ allocate_registers(compiler_context *ctx)
                         if (ins->ssa_args.dest < 0) continue;
                         if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
 
-                        /* Default to vec4 if we're not sure */
-
-                        int mask = 0xF;
-
-                        if (ins->type == TAG_ALU_4)
-                                mask = squeeze_writemask(ins->alu.mask);
-                        else if (ins->type == TAG_LOAD_STORE_4)
-                                mask = ins->load_store.mask;
-
-                        int class = util_logbase2(mask) + 1;
+                        int class = util_logbase2(ins->mask) + 1;
 
                         /* Use the largest class if there's ambiguity, this
                          * handles partial writes */
@@ -344,7 +334,8 @@ allocate_registers(compiler_context *ctx)
                         if (ins->ssa_args.dest < 0) continue;
 
                         if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
-                                /* If this destination is not yet live, it is now since we just wrote it */
+                                /* If this destination is not yet live, it is
+                                 * now since we just wrote it */
 
                                 int dest = ins->ssa_args.dest;
 
@@ -357,7 +348,9 @@ allocate_registers(compiler_context *ctx)
                          * invocations, and if there are none, the source dies
                          * */
 
-                        int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
+                        int sources[2] = {
+                                ins->ssa_args.src0, ins->ssa_args.src1
+                        };
 
                         for (int src = 0; src < 2; ++src) {
                                 int s = sources[src];
@@ -388,7 +381,10 @@ allocate_registers(compiler_context *ctx)
 
         for (int i = 0; i < nodes; ++i) {
                 for (int j = i + 1; j < nodes; ++j) {
-                        if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
+                        bool j_overlaps_i = live_start[j] < live_end[i];
+                        bool i_overlaps_j = live_end[j] < live_start[i];
+
+                        if (i_overlaps_j || j_overlaps_i)
                                 ra_add_node_interference(g, i, j);
                 }
         }
@@ -412,9 +408,9 @@ allocate_registers(compiler_context *ctx)
 
 static void
 install_registers_instr(
-                compiler_context *ctx,
-                struct ra_graph *g,
-                midgard_instruction *ins)
+        compiler_context *ctx,
+        struct ra_graph *g,
+        midgard_instruction *ins)
 {
         ssa_args args = ins->ssa_args;
 
@@ -425,16 +421,16 @@ install_registers_instr(
                 struct phys_reg src2 = index_to_reg(ctx, g, adjusted_src);
                 struct phys_reg dest = index_to_reg(ctx, g, args.dest);
 
-                unsigned mask = squeeze_writemask(ins->alu.mask);
-                ins->alu.mask = expand_writemask(compose_writemask(mask, dest));
+                unsigned uncomposed_mask = ins->mask;
+                ins->mask = compose_writemask(uncomposed_mask, dest);
 
                 /* Adjust the dest mask if necessary. Mostly this is a no-op
                  * but it matters for dot products */
-                dest.mask = effective_writemask(&ins->alu);
+                dest.mask = effective_writemask(&ins->alu, ins->mask);
 
                 midgard_vector_alu_src mod1 =
                         vector_alu_from_unsigned(ins->alu.src1);
-                mod1.swizzle = compose_swizzle(mod1.swizzle, mask, src1, dest);
+                mod1.swizzle = compose_swizzle(mod1.swizzle, uncomposed_mask, src1, dest);
                 ins->alu.src1 = vector_alu_srco_unsigned(mod1);
 
                 ins->registers.src1_reg = src1.reg;
@@ -442,18 +438,21 @@ install_registers_instr(
                 ins->registers.src2_imm = args.inline_constant;
 
                 if (args.inline_constant) {
-                        /* Encode inline 16-bit constant as a vector by default */
+                        /* Encode inline 16-bit constant. See disassembler for
+                         * where the algorithm is from */
 
                         ins->registers.src2_reg = ins->inline_constant >> 11;
 
                         int lower_11 = ins->inline_constant & ((1 << 12) - 1);
+                        uint16_t imm = ((lower_11 >> 8) & 0x7) |
+                                       ((lower_11 & 0xFF) << 3);
 
-                        uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
                         ins->alu.src2 = imm << 2;
                 } else {
                         midgard_vector_alu_src mod2 =
                                 vector_alu_from_unsigned(ins->alu.src2);
-                        mod2.swizzle = compose_swizzle(mod2.swizzle, mask, src2, dest);
+                        mod2.swizzle = compose_swizzle(
+                                               mod2.swizzle, uncomposed_mask, src2, dest);
                         ins->alu.src2 = vector_alu_srco_unsigned(mod2);
 
                         ins->registers.src2_reg = src2.reg;
@@ -464,20 +463,26 @@ install_registers_instr(
         }
 
         case TAG_LOAD_STORE_4: {
-                if (OP_IS_STORE(ins->load_store.op)) {
+                if (OP_IS_STORE_VARY(ins->load_store.op)) {
                         /* TODO: use ssa_args for st_vary */
                         ins->load_store.reg = 0;
                 } else {
-                        struct phys_reg src = index_to_reg(ctx, g, args.dest);
+                        /* Which physical register we read off depends on
+                         * whether we are loading or storing -- think about the
+                         * logical dataflow */
+
+                        unsigned r = OP_IS_STORE(ins->load_store.op) ?
+                                     args.src0 : args.dest;
+                        struct phys_reg src = index_to_reg(ctx, g, r);
 
                         ins->load_store.reg = src.reg;
 
                         ins->load_store.swizzle = compose_swizzle(
-                                        ins->load_store.swizzle, 0xF,
-                                        default_phys_reg(0), src);
+                                                          ins->load_store.swizzle, 0xF,
+                                                          default_phys_reg(0), src);
 
-                        ins->load_store.mask = compose_writemask(
-                                        ins->load_store.mask, src);
+                        ins->mask = compose_writemask(
+                                            ins->mask, src);
                 }
 
                 break;