panfrost: Build midgard_pack.h via meson

[mesa.git] / src / panfrost / midgard / midgard_schedule.c
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c

index dc127dcea9008b6b328d5d6df35e897505d07d53..3391e5360b2f340555b55ca92f334fd6c06768e0 100644 (file)
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -26,6 +26,7 @@
  #include "midgard_quirks.h"
  #include "util/u_memory.h"
  #include "util/u_math.h"
+#include "util/half_float.h"
  
  /* Scheduling for Midgard is complicated, to say the least. ALU instructions
   * must be grouped into VLIW bundles according to following model:
@@ -198,12 +199,12 @@ mir_is_scalar(midgard_instruction *ains)
  
          /* Otherwise, check mode hazards */
          bool could_scalar = true;
+        unsigned szd = nir_alu_type_get_type_size(ains->dest_type);
          unsigned sz0 = nir_alu_type_get_type_size(ains->src_types[0]);
          unsigned sz1 = nir_alu_type_get_type_size(ains->src_types[1]);
  
          /* Only 16/32-bit can run on a scalar unit */
-        could_scalar &= ains->alu.reg_mode != midgard_reg_mode_8;
-        could_scalar &= ains->alu.reg_mode != midgard_reg_mode_64;
+        could_scalar &= (szd == 16) || (szd == 32);
  
          if (ains->src[0] != ~0)
                  could_scalar &= (sz0 == 16) || (sz0 == 32);
@@ -224,7 +225,7 @@ bytes_for_instruction(midgard_instruction *ains)
          else if (ains->unit == ALU_ENAB_BRANCH)
                  return sizeof(midgard_branch_extended);
          else if (ains->compact_branch)
-                return sizeof(ains->br_compact);
+                return sizeof(uint16_t);
          else
                  return sizeof(midgard_reg_info) + sizeof(midgard_scalar_alu);
  }
@@ -333,12 +334,17 @@ struct midgard_predicate {
           * scheduled one). Excludes conditional branches and csel */
          bool no_cond;
  
-        /* Require a minimal mask and (if nonzero) given destination. Used for
-         * writeout optimizations */
+        /* Require (or reject) a minimal mask and (if nonzero) given
+         * destination. Used for writeout optimizations */
  
          unsigned mask;
+        unsigned no_mask;
          unsigned dest;
  
+        /* Whether to not-care/only/never schedule imov/fmov instructions This
+         * allows non-move instructions to get priority on each unit */
+        unsigned move_mode;
+
          /* For load/store: how many pipeline registers are in use? The two
           * scheduled instructions cannot use more than the 256-bits of pipeline
           * space available or RA will fail (as it would run out of pipeline
@@ -508,9 +514,114 @@ mir_pipeline_count(midgard_instruction *ins)
          return DIV_ROUND_UP(bytecount, 16);
  }
  
+/* Matches FADD x, x with modifiers compatible. Since x + x = x * 2, for
+ * any x including of the form f(y) for some swizzle/abs/neg function f */
+
+static bool
+mir_is_add_2(midgard_instruction *ins)
+{
+        if (ins->op != midgard_alu_op_fadd)
+                return false;
+
+        if (ins->src[0] != ins->src[1])
+                return false;
+
+        if (ins->src_types[0] != ins->src_types[1])
+                return false;
+
+        for (unsigned i = 0; i < MIR_VEC_COMPONENTS; ++i) {
+                if (ins->swizzle[0][i] != ins->swizzle[1][i])
+                        return false;
+        }
+
+        if (ins->src_abs[0] != ins->src_abs[1])
+                return false;
+
+        if (ins->src_neg[0] != ins->src_neg[1])
+                return false;
+
+        return true;
+}
+
+static void
+mir_adjust_unit(midgard_instruction *ins, unsigned unit)
+{
+        /* FADD x, x = FMUL x, #2 */
+        if (mir_is_add_2(ins) && (unit & (UNITS_MUL | UNIT_VLUT))) {
+                ins->op = midgard_alu_op_fmul;
+
+                ins->src[1] = ~0;
+                ins->src_abs[1] = false;
+                ins->src_neg[1] = false;
+
+                ins->has_inline_constant = true;
+                ins->inline_constant = _mesa_float_to_half(2.0);
+        }
+}
+
+static unsigned
+mir_has_unit(midgard_instruction *ins, unsigned unit)
+{
+        if (alu_opcode_props[ins->op].props & unit)
+                return true;
+
+        /* FADD x, x can run on any adder or any multiplier */
+        if (mir_is_add_2(ins))
+                return true;
+
+        return false;
+}
+
+/* Net change in liveness if an instruction were scheduled. Loosely based on
+ * ir3's scheduler. */
+
+static int
+mir_live_effect(uint16_t *liveness, midgard_instruction *ins, bool destructive)
+{
+        /* TODO: what if dest is used multiple times? */
+        int free_live = 0;
+
+        if (ins->dest < SSA_FIXED_MINIMUM) {
+                unsigned bytemask = mir_bytemask(ins);
+                bytemask = util_next_power_of_two(bytemask + 1) - 1;
+                free_live += util_bitcount(liveness[ins->dest] & bytemask);
+
+                if (destructive)
+                        liveness[ins->dest] &= ~bytemask;
+        }
+
+        int new_live = 0;
+
+        mir_foreach_src(ins, s) {
+                unsigned S = ins->src[s];
+
+                bool dupe = false;
+
+                for (unsigned q = 0; q < s; ++q)
+                        dupe |= (ins->src[q] == S);
+
+                if (dupe)
+                        continue;
+
+                if (S < SSA_FIXED_MINIMUM) {
+                        unsigned bytemask = mir_bytemask_of_read_components(ins, S);
+                        bytemask = util_next_power_of_two(bytemask + 1) - 1;
+
+                        /* Count only the new components */
+                        new_live += util_bitcount(bytemask & ~(liveness[S]));
+
+                        if (destructive)
+                                liveness[S] |= bytemask;
+                }
+        }
+
+        return new_live - free_live;
+}
+
  static midgard_instruction *
  mir_choose_instruction(
                  midgard_instruction **instructions,
+                uint16_t *liveness,
                  BITSET_WORD *worklist, unsigned count,
                  struct midgard_predicate *predicate)
  {
@@ -531,6 +642,7 @@ mir_choose_instruction(
          unsigned i;
  
          signed best_index = -1;
+        signed best_effect = INT_MAX;
          bool best_conditional = false;
  
          /* Enforce a simple metric limiting distance to keep down register
@@ -538,13 +650,17 @@ mir_choose_instruction(
           * results */
  
          unsigned max_active = 0;
-        unsigned max_distance = 6;
+        unsigned max_distance = 36;
  
          BITSET_FOREACH_SET(i, worklist, count) {
                  max_active = MAX2(max_active, i);
          }
  
          BITSET_FOREACH_SET(i, worklist, count) {
+                bool is_move = alu &&
+                        (instructions[i]->op == midgard_alu_op_imov ||
+                         instructions[i]->op == midgard_alu_op_fmov);
+
                  if ((max_active - i) >= max_distance)
                          continue;
  
@@ -554,7 +670,11 @@ mir_choose_instruction(
                  if (predicate->exclude != ~0 && instructions[i]->dest == predicate->exclude)
                          continue;
  
-                if (alu && !branch && !(alu_opcode_props[instructions[i]->alu.op].props & unit))
+                if (alu && !branch && !(mir_has_unit(instructions[i], unit)))
+                        continue;
+
+                /* 0: don't care, 1: no moves, 2: only moves */
+                if (predicate->move_mode && ((predicate->move_mode - 1) != is_move))
                          continue;
  
                  if (branch && !instructions[i]->compact_branch)
@@ -572,24 +692,31 @@ mir_choose_instruction(
                  if (mask && ((~instructions[i]->mask) & mask))
                          continue;
  
+                if (instructions[i]->mask & predicate->no_mask)
+                        continue;
+
                  if (ldst && mir_pipeline_count(instructions[i]) + predicate->pipeline_count > 2)
                          continue;
  
-                bool conditional = alu && !branch && OP_IS_CSEL(instructions[i]->alu.op);
+                bool conditional = alu && !branch && OP_IS_CSEL(instructions[i]->op);
                  conditional |= (branch && instructions[i]->branch.conditional);
  
                  if (conditional && no_cond)
                          continue;
  
-                /* Simulate in-order scheduling */
-                if ((signed) i < best_index)
+                int effect = mir_live_effect(liveness, instructions[i], false);
+
+                if (effect > best_effect)
+                        continue;
+
+                if (effect == best_effect && (signed) i < best_index)
                          continue;
  
+                best_effect = effect;
                  best_index = i;
                  best_conditional = conditional;
          }
  
-
          /* Did we find anything?  */
  
          if (best_index < 0)
@@ -607,8 +734,12 @@ mir_choose_instruction(
                  if (ldst)
                          predicate->pipeline_count += mir_pipeline_count(instructions[best_index]);
  
+                if (alu)
+                        mir_adjust_unit(instructions[best_index], unit);
+
                  /* Once we schedule a conditional, we can't again */
                  predicate->no_cond |= best_conditional;
+                mir_live_effect(liveness, instructions[best_index], true);
          }
  
          return instructions[best_index];
@@ -620,6 +751,7 @@ mir_choose_instruction(
  static unsigned
  mir_choose_bundle(
                  midgard_instruction **instructions,
+                uint16_t *liveness,
                  BITSET_WORD *worklist, unsigned count)
  {
          /* At the moment, our algorithm is very simple - use the bundle of the
@@ -632,7 +764,7 @@ mir_choose_bundle(
                  .exclude = ~0
          };
  
-        midgard_instruction *chosen = mir_choose_instruction(instructions, worklist, count, &predicate);
+        midgard_instruction *chosen = mir_choose_instruction(instructions, liveness, worklist, count, &predicate);
  
          if (chosen)
                  return chosen->type;
@@ -644,6 +776,7 @@ mir_choose_bundle(
  static void
  mir_choose_alu(midgard_instruction **slot,
                  midgard_instruction **instructions,
+                uint16_t *liveness,
                  BITSET_WORD *worklist, unsigned len,
                  struct midgard_predicate *predicate,
                  unsigned unit)
@@ -654,7 +787,7 @@ mir_choose_alu(midgard_instruction **slot,
  
          /* Try to schedule something, if not */
          predicate->unit = unit;
-        *slot = mir_choose_instruction(instructions, worklist, len, predicate);
+        *slot = mir_choose_instruction(instructions, liveness, worklist, len, predicate);
  
          /* Store unit upon scheduling */
          if (*slot && !((*slot)->compact_branch))
@@ -697,13 +830,13 @@ mir_comparison_mobile(
                          return ~0;
  
                  /* If it would itself require a condition, that's recursive */
-                if (OP_IS_CSEL(instructions[i]->alu.op))
+                if (OP_IS_CSEL(instructions[i]->op))
                          return ~0;
  
                  /* We'll need to rewrite to .w but that doesn't work for vector
                   * ops that don't replicate (ball/bany), so bail there */
  
-                if (GET_CHANNEL_COUNT(alu_opcode_props[instructions[i]->alu.op].props))
+                if (GET_CHANNEL_COUNT(alu_opcode_props[instructions[i]->op].props))
                          return ~0;
  
                  /* Ensure it will fit with constants */
@@ -775,7 +908,7 @@ mir_schedule_condition(compiler_context *ctx,
          unsigned condition_index = branch ? 0 : 2;
  
          /* csel_v is vector; otherwise, conditions are scalar */
-        bool vector = !branch && OP_IS_CSEL_V(last->alu.op);
+        bool vector = !branch && OP_IS_CSEL_V(last->op);
  
          /* Grab the conditional instruction */
  
@@ -821,6 +954,7 @@ mir_schedule_condition(compiler_context *ctx,
  static midgard_bundle
  mir_schedule_texture(
                  midgard_instruction **instructions,
+                uint16_t *liveness,
                  BITSET_WORD *worklist, unsigned len,
                  bool is_vertex)
  {
@@ -831,13 +965,14 @@ mir_schedule_texture(
          };
  
          midgard_instruction *ins =
-                mir_choose_instruction(instructions, worklist, len, &predicate);
+                mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
  
          mir_update_worklist(worklist, len, instructions, ins);
  
          struct midgard_bundle out = {
-                .tag = ins->texture.op == TEXTURE_OP_BARRIER ?
-                        TAG_TEXTURE_4_BARRIER :  is_vertex ?
+                .tag = ins->op == TEXTURE_OP_BARRIER ?
+                        TAG_TEXTURE_4_BARRIER :
+                        (ins->op == TEXTURE_OP_TEXEL_FETCH) || is_vertex ?
                          TAG_TEXTURE_4_VTX : TAG_TEXTURE_4,
                  .instruction_count = 1,
                  .instructions = { ins }
@@ -849,6 +984,7 @@ mir_schedule_texture(
  static midgard_bundle
  mir_schedule_ldst(
                  midgard_instruction **instructions,
+                uint16_t *liveness,
                  BITSET_WORD *worklist, unsigned len)
  {
          struct midgard_predicate predicate = {
@@ -860,10 +996,10 @@ mir_schedule_ldst(
          /* Try to pick two load/store ops. Second not gauranteed to exist */
  
          midgard_instruction *ins =
-                mir_choose_instruction(instructions, worklist, len, &predicate);
+                mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
  
          midgard_instruction *pair =
-                mir_choose_instruction(instructions, worklist, len, &predicate);
+                mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
  
          struct midgard_bundle out = {
                  .tag = TAG_LOAD_STORE_4,
@@ -880,10 +1016,79 @@ mir_schedule_ldst(
          return out;
  }
  
+static void
+mir_schedule_zs_write(
+                compiler_context *ctx,
+                struct midgard_predicate *predicate,
+                midgard_instruction **instructions,
+                uint16_t *liveness,
+                BITSET_WORD *worklist, unsigned len,
+                midgard_instruction *branch,
+                midgard_instruction **smul,
+                midgard_instruction **vadd,
+                midgard_instruction **vlut,
+                bool stencil)
+{
+        bool success = false;
+        unsigned idx = stencil ? 3 : 2;
+        unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(1) : branch->src[idx];
+
+        predicate->dest = src;
+        predicate->mask = 0x1;
+
+        midgard_instruction **units[] = { smul, vadd, vlut };
+        unsigned unit_names[] = { UNIT_SMUL, UNIT_VADD, UNIT_VLUT };
+
+        for (unsigned i = 0; i < 3; ++i) {
+                if (*(units[i]))
+                        continue;
+
+                predicate->unit = unit_names[i];
+                midgard_instruction *ins =
+                        mir_choose_instruction(instructions, liveness, worklist, len, predicate);
+
+                if (ins) {
+                        ins->unit = unit_names[i];
+                        *(units[i]) = ins;
+                        success |= true;
+                        break;
+                }
+        }
+
+        predicate->dest = predicate->mask = 0;
+
+        if (success)
+                return;
+
+        midgard_instruction *mov = ralloc(ctx, midgard_instruction);
+        *mov = v_mov(src, make_compiler_temp(ctx));
+        mov->mask = 0x1;
+
+        branch->src[idx] = mov->dest;
+
+        if (stencil) {
+                unsigned swizzle = (branch->src[0] == ~0) ? COMPONENT_Y : COMPONENT_X;
+
+                for (unsigned c = 0; c < 16; ++c)
+                        mov->swizzle[1][c] = swizzle;
+        }
+
+        for (unsigned i = 0; i < 3; ++i) {
+                if (!(*(units[i]))) {
+                        *(units[i]) = mov;
+                        mov->unit = unit_names[i];
+                        return;
+                }
+        }
+
+        unreachable("Could not schedule Z/S move to any unit");
+}
+
  static midgard_bundle
  mir_schedule_alu(
                  compiler_context *ctx,
                  midgard_instruction **instructions,
+                uint16_t *liveness,
                  BITSET_WORD *worklist, unsigned len)
  {
          struct midgard_bundle bundle = {};
@@ -904,10 +1109,9 @@ mir_schedule_alu(
          midgard_instruction *sadd = NULL;
          midgard_instruction *branch = NULL;
  
-        mir_choose_alu(&branch, instructions, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
+        mir_choose_alu(&branch, instructions, liveness, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
          mir_update_worklist(worklist, len, instructions, branch);
-        bool writeout = branch && branch->writeout;
-        bool zs_writeout = writeout && (branch->writeout_depth | branch->writeout_stencil);
+        unsigned writeout = branch ? branch->writeout : 0;
  
          if (branch && branch->branch.conditional) {
                  midgard_instruction *cond = mir_schedule_condition(ctx, &predicate, worklist, len, instructions, branch);
@@ -939,21 +1143,24 @@ mir_schedule_alu(
                  predicate.no_cond = true;
          }
  
-        mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL);
-
-        if (!writeout) {
-                mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);
-        } else {
+        if (writeout) {
                  /* Propagate up */
                  bundle.last_writeout = branch->last_writeout;
          }
  
-        if (writeout && !zs_writeout) {
+        /* When MRT is in use, writeout loops require r1.w to be filled (with a
+         * return address? by symmetry with Bifrost, etc), at least for blend
+         * shaders to work properly. When MRT is not in use (including on SFBD
+         * GPUs), this is not needed. Blend shaders themselves don't know if
+         * they are paired with MRT or not so they always need this, at least
+         * on MFBD GPUs. */
+
+        if (writeout && (ctx->is_blend || ctx->writeout_branch[1])) {
                  vadd = ralloc(ctx, midgard_instruction);
                  *vadd = v_mov(~0, make_compiler_temp(ctx));
  
                  if (!ctx->is_blend) {
-                        vadd->alu.op = midgard_alu_op_iadd;
+                        vadd->op = midgard_alu_op_iadd;
                          vadd->src[0] = SSA_FIXED_REGISTER(31);
                          vadd->src_types[0] = nir_type_uint32;
  
@@ -976,14 +1183,31 @@ mir_schedule_alu(
                  branch->dest_type = vadd->dest_type;
          }
  
-        mir_choose_alu(&vadd, instructions, worklist, len, &predicate, UNIT_VADD);
-        
+        if (writeout & PAN_WRITEOUT_Z)
+                mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist, len, branch, &smul, &vadd, &vlut, false);
+
+        if (writeout & PAN_WRITEOUT_S)
+                mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist, len, branch, &smul, &vadd, &vlut, true);
+
+        mir_choose_alu(&smul, instructions, liveness, worklist, len, &predicate, UNIT_SMUL);
+
+        for (unsigned mode = 1; mode < 3; ++mode) {
+                predicate.move_mode = mode;
+                predicate.no_mask = writeout ? (1 << 3) : 0;
+                mir_choose_alu(&vlut, instructions, liveness, worklist, len, &predicate, UNIT_VLUT);
+                predicate.no_mask = 0;
+                mir_choose_alu(&vadd, instructions, liveness, worklist, len, &predicate, UNIT_VADD);
+        }
+
+        /* Reset */
+        predicate.move_mode = 0;
+
          mir_update_worklist(worklist, len, instructions, vlut);
          mir_update_worklist(worklist, len, instructions, vadd);
          mir_update_worklist(worklist, len, instructions, smul);
  
-        bool vadd_csel = vadd && OP_IS_CSEL(vadd->alu.op);
-        bool smul_csel = smul && OP_IS_CSEL(smul->alu.op);
+        bool vadd_csel = vadd && OP_IS_CSEL(vadd->op);
+        bool smul_csel = smul && OP_IS_CSEL(smul->op);
  
          if (vadd_csel || smul_csel) {
                  midgard_instruction *ins = vadd_csel ? vadd : smul;
@@ -998,13 +1222,13 @@ mir_schedule_alu(
          }
  
          /* Stage 2, let's schedule sadd before vmul for writeout */
-        mir_choose_alu(&sadd, instructions, worklist, len, &predicate, UNIT_SADD);
+        mir_choose_alu(&sadd, instructions, liveness, worklist, len, &predicate, UNIT_SADD);
  
          /* Check if writeout reads its own register */
  
          if (writeout) {
-                midgard_instruction *stages[] = { sadd, vadd, smul };
-                unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : branch->src[0];
+                midgard_instruction *stages[] = { sadd, vadd, smul, vlut };
+                unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : branch->src[0];
                  unsigned writeout_mask = 0x0;
                  bool bad_writeout = false;
  
@@ -1020,12 +1244,10 @@ mir_schedule_alu(
                  }
  
                  /* It's possible we'll be able to schedule something into vmul
-                 * to fill r0/r1. Let's peak into the future, trying to schedule
+                 * to fill r0. Let's peak into the future, trying to schedule
                   * vmul specially that way. */
  
-                unsigned full_mask = zs_writeout ?
-                        (1 << (branch->writeout_depth + branch->writeout_stencil)) - 1 :
-                        0xF;
+                unsigned full_mask = 0xF;
  
                  if (!bad_writeout && writeout_mask != full_mask) {
                          predicate.unit = UNIT_VMUL;
@@ -1033,7 +1255,7 @@ mir_schedule_alu(
                          predicate.mask = writeout_mask ^ full_mask;
  
                          struct midgard_instruction *peaked =
-                                mir_choose_instruction(instructions, worklist, len, &predicate);
+                                mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
  
                          if (peaked) {
                                  vmul = peaked;
@@ -1048,7 +1270,7 @@ mir_schedule_alu(
  
                  /* Finally, add a move if necessary */
                  if (bad_writeout || writeout_mask != full_mask) {
-                        unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : make_compiler_temp(ctx);
+                        unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : make_compiler_temp(ctx);
  
                          vmul = ralloc(ctx, midgard_instruction);
                          *vmul = v_mov(src, temp);
@@ -1066,7 +1288,7 @@ mir_schedule_alu(
                  }
          }
  
-        mir_choose_alu(&vmul, instructions, worklist, len, &predicate, UNIT_VMUL);
+        mir_choose_alu(&vmul, instructions, liveness, worklist, len, &predicate, UNIT_VMUL);
  
          mir_update_worklist(worklist, len, instructions, vmul);
          mir_update_worklist(worklist, len, instructions, sadd);
@@ -1109,8 +1331,11 @@ mir_schedule_alu(
          /* Size ALU instruction for tag */
          bundle.tag = (TAG_ALU_4) + (bytes_emitted / 16) - 1;
  
+        bool tilebuf_wait = branch && branch->compact_branch &&
+           branch->branch.target_type == TARGET_TILEBUF_WAIT;
+
          /* MRT capable GPUs use a special writeout procedure */
-        if (writeout && !(ctx->quirks & MIDGARD_NO_UPPER_ALU))
+        if ((writeout || tilebuf_wait) && !(ctx->quirks & MIDGARD_NO_UPPER_ALU))
                  bundle.tag += 4;
  
          bundle.padding = padding;
@@ -1140,6 +1365,7 @@ schedule_block(compiler_context *ctx, midgard_block *block)
          /* Allocate the worklist */
          size_t sz = BITSET_WORDS(len) * sizeof(BITSET_WORD);
          BITSET_WORD *worklist = calloc(sz, 1);
+        uint16_t *liveness = calloc(node_count, 2);
          mir_initialize_worklist(worklist, instructions, len);
  
          struct util_dynarray bundles;
@@ -1149,15 +1375,15 @@ schedule_block(compiler_context *ctx, midgard_block *block)
          unsigned blend_offset = 0;
  
          for (;;) {
-                unsigned tag = mir_choose_bundle(instructions, worklist, len);
+                unsigned tag = mir_choose_bundle(instructions, liveness, worklist, len);
                  midgard_bundle bundle;
  
                  if (tag == TAG_TEXTURE_4)
-                        bundle = mir_schedule_texture(instructions, worklist, len, ctx->stage != MESA_SHADER_FRAGMENT);
+                        bundle = mir_schedule_texture(instructions, liveness, worklist, len, ctx->stage != MESA_SHADER_FRAGMENT);
                  else if (tag == TAG_LOAD_STORE_4)
-                        bundle = mir_schedule_ldst(instructions, worklist, len);
+                        bundle = mir_schedule_ldst(instructions, liveness, worklist, len);
                  else if (tag == TAG_ALU_4)
-                        bundle = mir_schedule_alu(ctx, instructions, worklist, len);
+                        bundle = mir_schedule_alu(ctx, instructions, liveness, worklist, len);
                  else
                          break;
  
@@ -1202,6 +1428,7 @@ schedule_block(compiler_context *ctx, midgard_block *block)
  
         free(instructions); /* Allocated by flatten_mir() */
         free(worklist);
+        free(liveness);
  }
  
  void