pan/midgard: Remove undefined behavior

[mesa.git] / src / panfrost / midgard / midgard_schedule.c
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c

index acfc5754480d5e67b09eaead69a98d6d9a716f31..d05bdf79d4782715f66d7cc3e4dbd289e41ff925 100644 (file)
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -24,7 +24,6 @@
  #include "compiler.h"
  #include "midgard_ops.h"
  #include "util/u_memory.h"
-#include "util/register_allocate.h"
  
  /* Scheduling for Midgard is complicated, to say the least. ALU instructions
   * must be grouped into VLIW bundles according to following model:
@@ -166,6 +165,9 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                  util_dynarray_fini(&last_read[i]);
                  util_dynarray_fini(&last_write[i]);
          }
+
+        free(last_read);
+        free(last_write);
  }
  
  /* Does the mask cover more than a scalar? */
@@ -370,7 +372,7 @@ mir_adjust_constants(midgard_instruction *ins,
          if (!ins->has_constants)
                  return true;
  
-        if (ins->alu.reg_mode == midgard_reg_mode_16) {
+        if (ins->alu.reg_mode != midgard_reg_mode_32) {
                  /* TODO: 16-bit constant combining */
                  if (pred->constant_count)
                          return false;
@@ -1072,7 +1074,9 @@ schedule_block(compiler_context *ctx, midgard_block *block)
  
          /* Blend constant was backwards as well. blend_offset if set is
           * strictly positive, as an offset of zero would imply constants before
-         * any instructions which is invalid in Midgard */
+         * any instructions which is invalid in Midgard. TODO: blend constants
+         * are broken if you spill since then quadword_count becomes invalid
+         * XXX */
  
          if (blend_offset)
                  ctx->blend_constant_offset = ((ctx->quadword_count + block->quadword_count) - blend_offset - 1) * 0x10;
@@ -1090,6 +1094,9 @@ schedule_block(compiler_context *ctx, midgard_block *block)
          mir_foreach_instr_in_block_scheduled_rev(block, ins) {
                  list_add(&ins->link, &block->instructions);
          }
+
+       free(instructions); /* Allocated by flatten_mir() */
+       free(worklist);
  }
  
  /* When we're 'squeezing down' the values in the IR, we maintain a hash
@@ -1117,7 +1124,8 @@ find_or_allocate_temp(compiler_context *ctx, unsigned hash)
          return temp;
  }
  
-/* Reassigns numbering to get rid of gaps in the indices */
+/* Reassigns numbering to get rid of gaps in the indices and to prioritize
+ * smaller register classes */
  
  static void
  mir_squeeze_index(compiler_context *ctx)
@@ -1127,8 +1135,18 @@ mir_squeeze_index(compiler_context *ctx)
          /* TODO don't leak old hash_to_temp */
          ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
  
+        /* We need to prioritize texture registers on older GPUs so we don't
+         * fail RA trying to assign to work registers r0/r1 when a work
+         * register is already there */
+
          mir_foreach_instr_global(ctx, ins) {
-                ins->dest = find_or_allocate_temp(ctx, ins->dest);
+                if (ins->type == TAG_TEXTURE_4)
+                        ins->dest = find_or_allocate_temp(ctx, ins->dest);
+        }
+
+        mir_foreach_instr_global(ctx, ins) {
+                if (ins->type != TAG_TEXTURE_4)
+                        ins->dest = find_or_allocate_temp(ctx, ins->dest);
  
                  for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i)
                          ins->src[i] = find_or_allocate_temp(ctx, ins->src[i]);
@@ -1157,16 +1175,14 @@ v_load_store_scratch(
                          /* For register spilling - to thread local storage */
                          .arg_1 = 0xEA,
                          .arg_2 = 0x1E,
-
-                        /* Splattered across, TODO combine logically */
-                        .varying_parameters = (byte & 0x1FF) << 1,
-                        .address = (byte >> 9)
                  },
  
                  /* If we spill an unspill, RA goes into an infinite loop */
                  .no_spill = true
          };
  
+        ins.constants[0] = byte;
+
         if (is_store) {
                  /* r0 = r26, r1 = r27 */
                  assert(srcdest == SSA_FIXED_REGISTER(26) || srcdest == SSA_FIXED_REGISTER(27));
@@ -1185,7 +1201,7 @@ v_load_store_scratch(
  
  static void mir_spill_register(
                  compiler_context *ctx,
-                struct ra_graph *g,
+                struct lcra_state *l,
                  unsigned *spill_count)
  {
          unsigned spill_index = ctx->temp_count;
@@ -1194,10 +1210,21 @@ static void mir_spill_register(
           * spill node. All nodes are equal in spill cost, but we can't spill
           * nodes written to from an unspill */
  
-        for (unsigned i = 0; i < ctx->temp_count; ++i) {
-                ra_set_node_spill_cost(g, i, 1.0);
+        unsigned *cost = calloc(ctx->temp_count, sizeof(cost[0]));
+
+        mir_foreach_instr_global(ctx, ins) {
+                if (ins->dest < ctx->temp_count)
+                        cost[ins->dest]++;
+
+                mir_foreach_src(ins, s) {
+                        if (ins->src[s] < ctx->temp_count)
+                                cost[ins->src[s]]++;
+                }
          }
  
+        for (unsigned i = 0; i < ctx->temp_count; ++i)
+                lcra_set_node_spill_cost(l, i, cost[i]);
+
          /* We can't spill any bundles that contain unspills. This could be
           * optimized to allow use of r27 to spill twice per bundle, but if
           * you're at the point of optimizing spilling, it's too late.
@@ -1216,7 +1243,7 @@ static void mir_spill_register(
                                                  unsigned src = bun->instructions[i]->src[s];
  
                                                  if (src < ctx->temp_count)
-                                                        ra_set_node_spill_cost(g, src, -1.0);
+                                                        lcra_set_node_spill_cost(l, src, -1);
                                          }
                                  }
                          }
@@ -1227,12 +1254,12 @@ static void mir_spill_register(
                          for (unsigned i = 0; i < bun->instruction_count; ++i) {
                                  unsigned dest = bun->instructions[i]->dest;
                                  if (dest < ctx->temp_count)
-                                        ra_set_node_spill_cost(g, dest, -1.0);
+                                        lcra_set_node_spill_cost(l, dest, -1);
                          }
                  }
          }
  
-        int spill_node = ra_get_best_spill_node(g);
+        int spill_node = lcra_get_best_spill_node(l);
  
          if (spill_node < 0) {
                  mir_print_shader(ctx);
@@ -1243,9 +1270,8 @@ static void mir_spill_register(
           * legitimately spill to TLS, but special registers just spill to work
           * registers */
  
-        unsigned class = ra_get_node_class(g, spill_node);
-        bool is_special = (class >> 2) != REG_CLASS_WORK;
-        bool is_special_w = (class >> 2) == REG_CLASS_TEXW;
+        bool is_special = l->class[spill_node] != REG_CLASS_WORK;
+        bool is_special_w = l->class[spill_node] == REG_CLASS_TEXW;
  
          /* Allocate TLS slot (maybe) */
          unsigned spill_slot = !is_special ? (*spill_count)++ : 0;
@@ -1366,12 +1392,14 @@ static void mir_spill_register(
          mir_foreach_instr_global(ctx, ins) {
                  ins->hint = false;
          }
+
+        free(cost);
  }
  
  void
  schedule_program(compiler_context *ctx)
  {
-        struct ra_graph *g = NULL;
+        struct lcra_state *l = NULL;
          bool spilled = false;
          int iter_count = 1000; /* max iterations */
  
@@ -1396,13 +1424,17 @@ schedule_program(compiler_context *ctx)
  
          do {
                  if (spilled) 
-                        mir_spill_register(ctx, g, &spill_count);
+                        mir_spill_register(ctx, l, &spill_count);
  
                  mir_squeeze_index(ctx);
                  mir_invalidate_liveness(ctx);
  
-                g = NULL;
-                g = allocate_registers(ctx, &spilled);
+                if (l) {
+                        lcra_free(l);
+                        l = NULL;
+                }
+
+                l = allocate_registers(ctx, &spilled);
          } while(spilled && ((iter_count--) > 0));
  
          if (iter_count <= 0) {
@@ -1415,5 +1447,7 @@ schedule_program(compiler_context *ctx)
  
          ctx->tls_size = spill_count * 16;
  
-        install_registers(ctx, g);
+        install_registers(ctx, l);
+
+        lcra_free(l);
  }