panfrost/midgard: Apply code styling

[mesa.git] / src / gallium / drivers / panfrost / midgard / midgard_ra.c
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ra.c b/src/gallium/drivers/panfrost/midgard/midgard_ra.c

index 7aa2932b806dd522c214a010c141b054b932f712..cfe091326ed13579b41b1c216ea273953ffe7087 100644 (file)
--- a/src/gallium/drivers/panfrost/midgard/midgard_ra.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_ra.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2019 Collabora, Ltd.
   *
   * Permission is hereby granted, free of charge, to any person obtaining a
   * copy of this software and associated documentation files (the "Software"),
@@ -48,8 +49,8 @@
  static unsigned reg_type_to_mask[WORK_STRIDE] = {
          0xF,                                    /* xyzw */
          0x7, 0x7 << 1,                          /* xyz */
-        0x3, 0x3 << 1, 0x3 << 2,                /* xy */
-        0x1, 0x1 << 1, 0x1 << 2, 0x1 << 3       /* x */
+                 0x3, 0x3 << 1, 0x3 << 2,                /* xy */
+                 0x1, 0x1 << 1, 0x1 << 2, 0x1 << 3       /* x */
  };
  
  static unsigned reg_type_to_swizzle[WORK_STRIDE] = {
@@ -88,16 +89,10 @@ compose_writemask(unsigned mask, struct phys_reg reg)
  }
  
  static unsigned
-compose_swizzle(unsigned swizzle, unsigned mask, struct phys_reg reg, struct phys_reg dst)
+compose_swizzle(unsigned swizzle, unsigned mask,
+                struct phys_reg reg, struct phys_reg dst)
  {
-        unsigned out = 0;
-
-        for (unsigned c = 0; c < 4; ++c) {
-                unsigned s = (swizzle >> (2*c)) & 0x3;
-                unsigned q = (reg.swizzle >> (2*s)) & 0x3;
-
-                out |= (q << (2*c));
-        }
+        unsigned out = pan_compose_swizzle(swizzle, reg.swizzle);
  
          /* Based on the register mask, we need to adjust over. E.g if we're
           * writing to yz, a base swizzle of xy__ becomes _xy_. Save the
@@ -127,7 +122,8 @@ find_or_allocate_temp(compiler_context *ctx, unsigned hash)
          if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
                  return hash;
  
-        unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
+        unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(
+                                ctx->hash_to_temp, hash + 1);
  
          if (temp)
                  return temp - 1;
@@ -136,7 +132,8 @@ find_or_allocate_temp(compiler_context *ctx, unsigned hash)
          temp = ctx->temp_count++;
          ctx->max_hash = MAX2(ctx->max_hash, hash);
  
-        _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
+        _mesa_hash_table_u64_insert(ctx->hash_to_temp,
+                                    hash + 1, (void *) ((uintptr_t) temp + 1));
  
          return temp;
  }
@@ -146,7 +143,7 @@ find_or_allocate_temp(compiler_context *ctx, unsigned hash)
  static unsigned int
  midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
  {
-        /* Choose the first available register to minimise reported register pressure */
+        /* Choose the first available register to minimise register pressure */
  
          for (int i = 0; i < (16 * WORK_STRIDE); ++i) {
                  if (BITSET_TEST(regs, i)) {
@@ -231,7 +228,7 @@ allocate_registers(compiler_context *ctx)
          };
  
          /* Add the full set of work registers */
-        for (int i = 0; i < work_count; ++i) {
+        for (unsigned i = 0; i < work_count; ++i) {
                  int base = WORK_STRIDE * i;
  
                  /* Build a full set of subdivisions */
@@ -246,13 +243,15 @@ allocate_registers(compiler_context *ctx)
                  ra_class_add_reg(regs, work_vec1, base + 8);
                  ra_class_add_reg(regs, work_vec1, base + 9);
  
-                for (unsigned i = 0; i < 10; ++i) {
-                        for (unsigned j = 0; j < 10; ++j) {
-                                unsigned mask1 = reg_type_to_mask[i];
-                                unsigned mask2 = reg_type_to_mask[j];
+                for (unsigned a = 0; a < 10; ++a) {
+                        unsigned mask1 = reg_type_to_mask[a];
+
+                        for (unsigned b = 0; b < 10; ++b) {
+                                unsigned mask2 = reg_type_to_mask[b];
  
                                  if (mask1 & mask2)
-                                        ra_add_reg_conflict(regs, base + i, base + j);
+                                        ra_add_reg_conflict(regs,
+                                                            base + a, base + b);
                          }
                  }
          }
@@ -294,16 +293,7 @@ allocate_registers(compiler_context *ctx)
                          if (ins->ssa_args.dest < 0) continue;
                          if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
  
-                        /* Default to vec4 if we're not sure */
-
-                        int mask = 0xF;
-
-                        if (ins->type == TAG_ALU_4)
-                                mask = squeeze_writemask(ins->alu.mask);
-                        else if (ins->type == TAG_LOAD_STORE_4)
-                                mask = ins->load_store.mask;
-
-                        int class = util_logbase2(mask) + 1;
+                        int class = util_logbase2(ins->mask) + 1;
  
                          /* Use the largest class if there's ambiguity, this
                           * handles partial writes */
@@ -344,7 +334,8 @@ allocate_registers(compiler_context *ctx)
                          if (ins->ssa_args.dest < 0) continue;
  
                          if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
-                                /* If this destination is not yet live, it is now since we just wrote it */
+                                /* If this destination is not yet live, it is
+                                 * now since we just wrote it */
  
                                  int dest = ins->ssa_args.dest;
  
@@ -357,7 +348,9 @@ allocate_registers(compiler_context *ctx)
                           * invocations, and if there are none, the source dies
                           * */
  
-                        int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
+                        int sources[2] = {
+                                ins->ssa_args.src0, ins->ssa_args.src1
+                        };
  
                          for (int src = 0; src < 2; ++src) {
                                  int s = sources[src];
@@ -388,7 +381,10 @@ allocate_registers(compiler_context *ctx)
  
          for (int i = 0; i < nodes; ++i) {
                  for (int j = i + 1; j < nodes; ++j) {
-                        if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
+                        bool j_overlaps_i = live_start[j] < live_end[i];
+                        bool i_overlaps_j = live_end[j] < live_start[i];
+
+                        if (i_overlaps_j || j_overlaps_i)
                                  ra_add_node_interference(g, i, j);
                  }
          }
@@ -412,9 +408,9 @@ allocate_registers(compiler_context *ctx)
  
  static void
  install_registers_instr(
-                compiler_context *ctx,
-                struct ra_graph *g,
-                midgard_instruction *ins)
+        compiler_context *ctx,
+        struct ra_graph *g,
+        midgard_instruction *ins)
  {
          ssa_args args = ins->ssa_args;
  
@@ -425,16 +421,16 @@ install_registers_instr(
                  struct phys_reg src2 = index_to_reg(ctx, g, adjusted_src);
                  struct phys_reg dest = index_to_reg(ctx, g, args.dest);
  
-                unsigned mask = squeeze_writemask(ins->alu.mask);
-                ins->alu.mask = expand_writemask(compose_writemask(mask, dest));
+                unsigned uncomposed_mask = ins->mask;
+                ins->mask = compose_writemask(uncomposed_mask, dest);
  
                  /* Adjust the dest mask if necessary. Mostly this is a no-op
                   * but it matters for dot products */
-                dest.mask = effective_writemask(&ins->alu);
+                dest.mask = effective_writemask(&ins->alu, ins->mask);
  
                  midgard_vector_alu_src mod1 =
                          vector_alu_from_unsigned(ins->alu.src1);
-                mod1.swizzle = compose_swizzle(mod1.swizzle, mask, src1, dest);
+                mod1.swizzle = compose_swizzle(mod1.swizzle, uncomposed_mask, src1, dest);
                  ins->alu.src1 = vector_alu_srco_unsigned(mod1);
  
                  ins->registers.src1_reg = src1.reg;
@@ -442,18 +438,21 @@ install_registers_instr(
                  ins->registers.src2_imm = args.inline_constant;
  
                  if (args.inline_constant) {
-                        /* Encode inline 16-bit constant as a vector by default */
+                        /* Encode inline 16-bit constant. See disassembler for
+                         * where the algorithm is from */
  
                          ins->registers.src2_reg = ins->inline_constant >> 11;
  
                          int lower_11 = ins->inline_constant & ((1 << 12) - 1);
+                        uint16_t imm = ((lower_11 >> 8) & 0x7) |
+                                       ((lower_11 & 0xFF) << 3);
  
-                        uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
                          ins->alu.src2 = imm << 2;
                  } else {
                          midgard_vector_alu_src mod2 =
                                  vector_alu_from_unsigned(ins->alu.src2);
-                        mod2.swizzle = compose_swizzle(mod2.swizzle, mask, src2, dest);
+                        mod2.swizzle = compose_swizzle(
+                                               mod2.swizzle, uncomposed_mask, src2, dest);
                          ins->alu.src2 = vector_alu_srco_unsigned(mod2);
  
                          ins->registers.src2_reg = src2.reg;
@@ -464,20 +463,26 @@ install_registers_instr(
          }
  
          case TAG_LOAD_STORE_4: {
-                if (OP_IS_STORE(ins->load_store.op)) {
+                if (OP_IS_STORE_VARY(ins->load_store.op)) {
                          /* TODO: use ssa_args for st_vary */
                          ins->load_store.reg = 0;
                  } else {
-                        struct phys_reg src = index_to_reg(ctx, g, args.dest);
+                        /* Which physical register we read off depends on
+                         * whether we are loading or storing -- think about the
+                         * logical dataflow */
+
+                        unsigned r = OP_IS_STORE(ins->load_store.op) ?
+                                     args.src0 : args.dest;
+                        struct phys_reg src = index_to_reg(ctx, g, r);
  
                          ins->load_store.reg = src.reg;
  
                          ins->load_store.swizzle = compose_swizzle(
-                                        ins->load_store.swizzle, 0xF,
-                                        default_phys_reg(0), src);
+                                                          ins->load_store.swizzle, 0xF,
+                                                          default_phys_reg(0), src);
  
-                        ins->load_store.mask = compose_writemask(
-                                        ins->load_store.mask, src);
+                        ins->mask = compose_writemask(
+                                            ins->mask, src);
                  }
  
                  break;