#define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__));
#define SWIZZLE_XYZW SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W)
+#define SWIZZLE_XYXX SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_X)
+#define SWIZZLE_XXXX SWIZZLE(COMPONENT_X, COMPONENT_X, COMPONENT_X, COMPONENT_X)
+#define SWIZZLE_WWWW SWIZZLE(COMPONENT_W, COMPONENT_W, COMPONENT_W, COMPONENT_W)
#define M_LOAD_STORE(name, rname, uname) \
static midgard_instruction m_##name(unsigned ssa, unsigned address) { \
NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
NIR_PASS(progress, nir, midgard_nir_lower_fdot2);
+ NIR_PASS(progress, nir, nir_lower_idiv);
nir_lower_tex_options lower_tex_options = {
- .lower_rect = true
+ .lower_rect = true,
+ .lower_txp = ~0
};
NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
.unit = for_branch ? UNIT_SMUL : UNIT_SADD,
.ssa_args = {
-
.src0 = condition,
.src1 = condition,
.dest = SSA_FIXED_REGISTER(31),
},
+
.alu = {
.op = midgard_alu_op_iand,
- .outmod = midgard_outmod_int,
+ .outmod = midgard_outmod_int_wrap,
.reg_mode = midgard_reg_mode_32,
.dest_override = midgard_dest_override_none,
.mask = (0x3 << 6), /* w */
},
.alu = {
.op = midgard_alu_op_iand,
- .outmod = midgard_outmod_int,
+ .outmod = midgard_outmod_int_wrap,
.reg_mode = midgard_reg_mode_32,
.dest_override = midgard_dest_override_none,
.mask = expand_writemask((1 << nr_comp) - 1),
},
.alu = {
.op = midgard_alu_op_imov,
- .outmod = midgard_outmod_int,
+ .outmod = midgard_outmod_int_wrap,
.reg_mode = midgard_reg_mode_32,
.dest_override = midgard_dest_override_none,
.mask = (0x3 << 6), /* w */
ALU_CASE(iadd, iadd);
ALU_CASE(isub, isub);
ALU_CASE(imul, imul);
- ALU_CASE(iabs, iabs);
+
+ /* Zero shoved as second-arg */
+ ALU_CASE(iabs, iabsdiff);
+
ALU_CASE(mov, imov);
ALU_CASE(feq32, feq);
ALU_CASE(fsin, fsin);
ALU_CASE(fcos, fcos);
+ /* Second op implicit #0 */
+ ALU_CASE(inot, inor);
ALU_CASE(iand, iand);
ALU_CASE(ior, ior);
ALU_CASE(ixor, ixor);
- ALU_CASE(inot, inand);
ALU_CASE(ishl, ishl);
ALU_CASE(ishr, iasr);
ALU_CASE(ushr, ilsr);
}
/* Midgard can perform certain modifiers on output of an ALU op */
- midgard_outmod outmod =
- midgard_is_integer_out_op(op) ? midgard_outmod_int :
- instr->dest.saturate ? midgard_outmod_sat : midgard_outmod_none;
+ unsigned outmod;
- if (instr->op == nir_op_fsat)
- outmod = midgard_outmod_sat;
+ if (midgard_is_integer_out_op(op)) {
+ outmod = midgard_outmod_int_wrap;
+ } else {
+ bool sat = instr->dest.saturate || instr->op == nir_op_fsat;
+ outmod = sat ? midgard_outmod_sat : midgard_outmod_none;
+ }
/* fmax(a, 0.0) can turn into a .pos modifier as an optimization */
}
ins.alu.src2 = vector_alu_srco_unsigned(blank_alu_src_xxxx);
- } else if (instr->op == nir_op_f2b32 || instr->op == nir_op_i2b32) {
+ } else if (nr_inputs == 1 && !quirk_flipped_r24) {
+ /* Lots of instructions need a 0 plonked in */
ins.ssa_args.inline_constant = false;
ins.ssa_args.src1 = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
ins.has_constants = true;
}
}
+static void
+emit_varying_read(
+ compiler_context *ctx,
+ unsigned dest, unsigned offset,
+ unsigned nr_comp, unsigned component,
+ nir_src *indirect_offset)
+{
+ /* XXX: Half-floats? */
+ /* TODO: swizzle, mask */
+
+ midgard_instruction ins = m_ld_vary_32(dest, offset);
+ ins.load_store.mask = (1 << nr_comp) - 1;
+ ins.load_store.swizzle = SWIZZLE_XYZW >> (2 * component);
+
+ midgard_varying_parameter p = {
+ .is_varying = 1,
+ .interpolation = midgard_interp_default,
+ .flat = /*var->data.interpolation == INTERP_MODE_FLAT*/ 0
+ };
+
+ unsigned u;
+ memcpy(&u, &p, sizeof(p));
+ ins.load_store.varying_parameters = u;
+
+ if (indirect_offset) {
+ /* We need to add in the dynamic index, moved to r27.w */
+ emit_indirect_offset(ctx, indirect_offset);
+ ins.load_store.unknown = 0x79e; /* xxx: what is this? */
+ } else {
+ /* Just a direct load */
+ ins.load_store.unknown = 0x1e9e; /* xxx: what is this? */
+ }
+
+ emit_mir_instruction(ctx, ins);
+}
+
static void
emit_sysval_read(compiler_context *ctx, nir_intrinsic_instr *instr)
{
offset += nir_src_as_uint(instr->src[0]);
}
+ /* We may need to apply a fractional offset */
+ int component = instr->intrinsic == nir_intrinsic_load_input ?
+ nir_intrinsic_component(instr) : 0;
reg = nir_dest_index(ctx, &instr->dest);
if (instr->intrinsic == nir_intrinsic_load_uniform && !ctx->is_blend) {
emit_uniform_read(ctx, reg, ctx->sysval_count + offset, !direct ? &instr->src[0] : NULL);
} else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
- /* XXX: Half-floats? */
- /* TODO: swizzle, mask */
-
- midgard_instruction ins = m_ld_vary_32(reg, offset);
- ins.load_store.mask = (1 << nr_comp) - 1;
-
- midgard_varying_parameter p = {
- .is_varying = 1,
- .interpolation = midgard_interp_default,
- .flat = /*var->data.interpolation == INTERP_MODE_FLAT*/ 0
- };
-
- unsigned u;
- memcpy(&u, &p, sizeof(p));
- ins.load_store.varying_parameters = u;
-
- if (direct) {
- /* We have the offset totally ready */
- ins.load_store.unknown = 0x1e9e; /* xxx: what is this? */
- } else {
- /* We have it partially ready, but we need to
- * add in the dynamic index, moved to r27.w */
- emit_indirect_offset(ctx, &instr->src[0]);
- ins.load_store.unknown = 0x79e; /* xxx: what is this? */
- }
-
- emit_mir_instruction(ctx, ins);
+ emit_varying_read(ctx, reg, offset, nr_comp, component, !direct ? &instr->src[0] : NULL);
} else if (ctx->is_blend) {
/* For blend shaders, load the input color, which is
* preloaded to r0 */
ctx->fragment_output = reg;
} else if (ctx->stage == MESA_SHADER_VERTEX) {
/* Varyings are written into one of two special
- * varying register, r26 or r27. The register itself is selected as the register
- * in the st_vary instruction, minus the base of 26. E.g. write into r27 and then call st_vary(1)
- *
- * Normally emitting fmov's is frowned upon,
- * but due to unique constraints of
- * REGISTER_VARYING, fmov emission + a
- * dedicated cleanup pass is the only way to
- * guarantee correctness when considering some
- * (common) edge cases XXX: FIXME */
-
- /* If this varying corresponds to a constant (why?!),
- * emit that now since it won't get picked up by
- * hoisting (since there is no corresponding move
- * emitted otherwise) */
-
- void *constant_value = _mesa_hash_table_u64_search(ctx->ssa_constants, reg + 1);
-
- if (constant_value) {
- /* Special case: emit the varying write
- * directly to r26 (looks funny in asm but it's
- * fine) and emit the store _now_. Possibly
- * slightly slower, but this is a really stupid
- * special case anyway (why on earth would you
- * have a constant varying? Your own fault for
- * slightly worse perf :P) */
-
- midgard_instruction ins = v_fmov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), blank_alu_src, SSA_FIXED_REGISTER(26));
- attach_constants(ctx, &ins, constant_value, reg + 1);
- emit_mir_instruction(ctx, ins);
+ * varying register, r26 or r27. The register itself is
+ * selected as the register in the st_vary instruction,
+ * minus the base of 26. E.g. write into r27 and then
+ * call st_vary(1) */
- midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(0), offset);
- st.load_store.unknown = 0x1E9E; /* XXX: What is this? */
- emit_mir_instruction(ctx, st);
- } else {
- /* Do not emit the varying yet -- instead, just mark down that we need to later */
+ midgard_instruction ins = v_fmov(reg, blank_alu_src, SSA_FIXED_REGISTER(26));
+ emit_mir_instruction(ctx, ins);
- _mesa_hash_table_u64_insert(ctx->ssa_varyings, reg + 1, (void *) ((uintptr_t) (offset + 1)));
- }
+ /* We should have been vectorized. That also lets us
+ * ignore the mask. because the mask component on
+ * st_vary is (as far as I can tell) ignored [the blob
+ * sets it to zero] */
+ assert(nir_intrinsic_component(instr) == 0);
+
+ midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(0), offset);
+ st.load_store.unknown = 0x1E9E; /* XXX: What is this? */
+ emit_mir_instruction(ctx, st);
} else {
DBG("Unknown store\n");
assert(0);
}
}
+static unsigned
+midgard_tex_op(nir_texop op)
+{
+ switch (op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ return TEXTURE_OP_NORMAL;
+ case nir_texop_txl:
+ return TEXTURE_OP_LOD;
+ default:
+ unreachable("Unhanlded texture op");
+ }
+}
+
static void
emit_tex(compiler_context *ctx, nir_tex_instr *instr)
{
/* TODO */
//assert (!instr->sampler);
//assert (!instr->texture_array_size);
- assert (instr->op == nir_texop_tex);
/* Allocate registers via a round robin scheme to alternate between the two registers */
int reg = ctx->texture_op_count & 1;
int sampler_index = texture_index;
for (unsigned i = 0; i < instr->num_srcs; ++i) {
+ int reg = SSA_FIXED_REGISTER(REGISTER_TEXTURE_BASE + in_reg);
+ int index = nir_src_index(ctx, &instr->src[i].src);
+ midgard_vector_alu_src alu_src = blank_alu_src;
+
switch (instr->src[i].src_type) {
case nir_tex_src_coord: {
- int index = nir_src_index(ctx, &instr->src[i].src);
-
- midgard_vector_alu_src alu_src = blank_alu_src;
-
- int reg = SSA_FIXED_REGISTER(REGISTER_TEXTURE_BASE + in_reg);
-
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
/* For cubemaps, we need to load coords into
* special r27, and then use a special ld/st op
- * to copy into the texture register */
+ * to select the face and copy the xy into the
+ * texture register */
alu_src.swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_X);
midgard_instruction st = m_st_cubemap_coords(reg, 0);
st.load_store.unknown = 0x24; /* XXX: What is this? */
- st.load_store.mask = 0x3; /* xy? */
+ st.load_store.mask = 0x3; /* xy */
st.load_store.swizzle = alu_src.swizzle;
emit_mir_instruction(ctx, st);
alu_src.swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_X, COMPONENT_X);
midgard_instruction ins = v_fmov(index, alu_src, reg);
+ ins.alu.mask = expand_writemask(0x3); /* xy */
emit_mir_instruction(ctx, ins);
}
break;
}
+ case nir_tex_src_bias:
+ case nir_tex_src_lod: {
+ /* To keep RA simple, we put the bias/LOD into the w
+ * component of the input source, which is otherwise in xy */
+
+ alu_src.swizzle = SWIZZLE_XXXX;
+
+ midgard_instruction ins = v_fmov(index, alu_src, reg);
+ ins.alu.mask = expand_writemask(1 << COMPONENT_W);
+ emit_mir_instruction(ctx, ins);
+ break;
+ };
+
default: {
DBG("Unknown source type\n");
//assert(0);
midgard_instruction ins = {
.type = TAG_TEXTURE_4,
.texture = {
- .op = TEXTURE_OP_NORMAL,
+ .op = midgard_tex_op(instr->op),
.format = midgard_tex_format(instr->sampler_dim),
.texture_handle = texture_index,
.sampler_handle = sampler_index,
- /* TODO: Don't force xyzw */
- .swizzle = SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W),
+ /* TODO: Regalloc it in */
+ .swizzle = SWIZZLE_XYZW,
.mask = 0xF,
/* TODO: half */
- //.in_reg_full = 1,
+ .in_reg_full = 1,
+ .in_reg_swizzle = SWIZZLE_XYXX,
.out_full = 1,
- .filter = 1,
-
/* Always 1 */
.unknown7 = 1,
-
- /* Assume we can continue; hint it out later */
- .cont = 1,
}
};
ins.texture.in_reg_select = in_reg;
ins.texture.out_reg_select = out_reg;
- /* TODO: Dynamic swizzle input selection, half-swizzles? */
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_3D) {
- ins.texture.in_reg_swizzle_right = COMPONENT_X;
- ins.texture.in_reg_swizzle_left = COMPONENT_Y;
- //ins.texture.in_reg_swizzle_third = COMPONENT_Z;
- } else {
- ins.texture.in_reg_swizzle_left = COMPONENT_X;
- ins.texture.in_reg_swizzle_right = COMPONENT_Y;
- //ins.texture.in_reg_swizzle_third = COMPONENT_X;
+ /* Setup bias/LOD if necessary. Only register mode support right now.
+ * TODO: Immediate mode for performance gains */
+
+ if (instr->op == nir_texop_txb || instr->op == nir_texop_txl) {
+ ins.texture.lod_register = true;
+
+ midgard_tex_register_select sel = {
+ .select = in_reg,
+ .full = 1,
+
+ /* w */
+ .component_lo = 1,
+ .component_hi = 1
+ };
+
+ uint8_t packed;
+ memcpy(&packed, &sel, sizeof(packed));
+ ins.texture.bias = packed;
}
emit_mir_instruction(ctx, ins);
static void
map_ssa_to_alias(compiler_context *ctx, int *ref)
{
+ /* Sign is used quite deliberately for unused */
+ if (*ref < 0)
+ return;
+
unsigned int alias = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_alias, *ref + 1);
if (alias) {
return progress;
}
+/* Dead code elimination for branches at the end of a block - only one branch
+ * per block is legal semantically */
+
+static void
+midgard_opt_cull_dead_branch(compiler_context *ctx, midgard_block *block)
+{
+ bool branched = false;
+
+ mir_foreach_instr_in_block_safe(block, ins) {
+ if (!midgard_is_branch_unit(ins->unit)) continue;
+
+ /* We ignore prepacked branches since the fragment epilogue is
+ * just generally special */
+ if (ins->prepacked_branch) continue;
+
+ /* Discards are similarly special and may not correspond to the
+ * end of a block */
+
+ if (ins->branch.target_type == TARGET_DISCARD) continue;
+
+ if (branched) {
+ /* We already branched, so this is dead */
+ mir_remove_instruction(ins);
+ }
+
+ branched = true;
+ }
+}
+
static bool
mir_nontrivial_mod(midgard_vector_alu_src src, bool is_int, unsigned mask)
{
return false;
}
+static bool
+mir_nontrivial_source2_mod(midgard_instruction *ins)
+{
+ unsigned mask = squeeze_writemask(ins->alu.mask);
+ bool is_int = midgard_is_integer_op(ins->alu.op);
+
+ midgard_vector_alu_src src2 =
+ vector_alu_from_unsigned(ins->alu.src2);
+
+ return mir_nontrivial_mod(src2, is_int, mask);
+}
+
+static bool
+mir_nontrivial_outmod(midgard_instruction *ins)
+{
+ bool is_int = midgard_is_integer_op(ins->alu.op);
+ unsigned mod = ins->alu.outmod;
+
+ if (is_int)
+ return mod != midgard_outmod_int_wrap;
+ else
+ return mod != midgard_outmod_none;
+}
+
static bool
midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block)
{
if (ins->ssa_args.inline_constant) continue;
if (ins->has_constants) continue;
- /* Also, if the move has side effects, we're helpless */
-
- midgard_vector_alu_src src =
- vector_alu_from_unsigned(ins->alu.src2);
- unsigned mask = squeeze_writemask(ins->alu.mask);
- bool is_int = midgard_is_integer_op(ins->alu.op);
-
- if (mir_nontrivial_mod(src, is_int, mask)) continue;
- if (ins->alu.outmod != midgard_outmod_none) continue;
+ if (mir_nontrivial_source2_mod(ins)) continue;
+ if (mir_nontrivial_outmod(ins)) continue;
/* We're clear -- rewrite */
mir_rewrite_index_src(ctx, to, from);
return progress;
}
+/* fmov.pos is an idiom for fpos. Propoagate the .pos up to the source, so then
+ * the move can be propagated away entirely */
+
+static bool
+mir_compose_float_outmod(midgard_outmod_float *outmod, midgard_outmod_float comp)
+{
+ /* Nothing to do */
+ if (comp == midgard_outmod_none)
+ return true;
+
+ if (*outmod == midgard_outmod_none) {
+ *outmod = comp;
+ return true;
+ }
+
+ /* TODO: Compose rules */
+ return false;
+}
+
+static bool
+midgard_opt_pos_propagate(compiler_context *ctx, midgard_block *block)
+{
+ bool progress = false;
+
+ mir_foreach_instr_in_block_safe(block, ins) {
+ if (ins->type != TAG_ALU_4) continue;
+ if (ins->alu.op != midgard_alu_op_fmov) continue;
+ if (ins->alu.outmod != midgard_outmod_pos) continue;
+
+ /* TODO: Registers? */
+ unsigned src = ins->ssa_args.src1;
+ if (src >= ctx->func->impl->ssa_alloc) continue;
+ assert(!mir_has_multiple_writes(ctx, src));
+
+ /* There might be a source modifier, too */
+ if (mir_nontrivial_source2_mod(ins)) continue;
+
+ /* Backpropagate the modifier */
+ mir_foreach_instr_in_block_from_rev(block, v, mir_prev_op(ins)) {
+ if (v->type != TAG_ALU_4) continue;
+ if (v->ssa_args.dest != src) continue;
+
+ /* Can we even take a float outmod? */
+ if (midgard_is_integer_out_op(v->alu.op)) continue;
+
+ midgard_outmod_float temp = v->alu.outmod;
+ progress |= mir_compose_float_outmod(&temp, ins->alu.outmod);
+
+ /* Throw in the towel.. */
+ if (!progress) break;
+
+ /* Otherwise, transfer the modifier */
+ v->alu.outmod = temp;
+ ins->alu.outmod = midgard_outmod_none;
+
+ break;
+ }
+ }
+
+ return progress;
+}
+
static bool
midgard_opt_copy_prop_tex(compiler_context *ctx, midgard_block *block)
{
}
}
-/* Emit varying stores late */
-
-static void
-midgard_emit_store(compiler_context *ctx, midgard_block *block) {
- /* Iterate in reverse to get the final write, rather than the first */
-
- mir_foreach_instr_in_block_safe_rev(block, ins) {
- /* Check if what we just wrote needs a store */
- int idx = ins->ssa_args.dest;
- uintptr_t varying = ((uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_varyings, idx + 1));
-
- if (!varying) continue;
-
- varying -= 1;
-
- /* We need to store to the appropriate varying, so emit the
- * move/store */
-
- /* TODO: Integrate with special purpose RA (and scheduler?) */
- bool high_varying_register = false;
-
- midgard_instruction mov = v_fmov(idx, blank_alu_src, SSA_FIXED_REGISTER(REGISTER_VARYING_BASE + high_varying_register));
-
- midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(high_varying_register), varying);
- st.load_store.unknown = 0x1E9E; /* XXX: What is this? */
-
- mir_insert_instruction_before(mir_next_op(ins), st);
- mir_insert_instruction_before(mir_next_op(ins), mov);
-
- /* We no longer need to store this varying */
- _mesa_hash_table_u64_remove(ctx->ssa_varyings, idx + 1);
- }
-}
-
/* If there are leftovers after the below pass, emit actual fmov
* instructions for the slow-but-correct path */
.op = midgard_alu_op_imov,
.reg_mode = midgard_reg_mode_8,
.dest_override = midgard_dest_override_none,
- .outmod = midgard_outmod_int,
+ .outmod = midgard_outmod_int_wrap,
.mask = 0xFF,
.src1 = vector_alu_srco_unsigned(blank_alu_src),
.src2 = vector_alu_srco_unsigned(blank_alu_src),
/* Perform heavylifting for aliasing */
actualise_ssa_to_alias(ctx);
- midgard_emit_store(ctx, this_block);
midgard_pair_load_store(ctx, this_block);
/* Append fragment shader epilogue (value writeout) */
/* TODO: Decide this at runtime */
ctx->uniform_cutoff = 8;
- /* Assign var locations early, so the epilogue can use them if necessary */
-
- nir_assign_var_locations(&nir->outputs, &nir->num_outputs, glsl_type_size);
- nir_assign_var_locations(&nir->inputs, &nir->num_inputs, glsl_type_size);
- nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, glsl_type_size);
-
/* Initialize at a global (not block) level hash tables */
ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
- ctx->ssa_varyings = _mesa_hash_table_u64_create(NULL);
ctx->ssa_to_alias = _mesa_hash_table_u64_create(NULL);
ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL);
struct exec_list *varyings =
ctx->stage == MESA_SHADER_VERTEX ? &nir->outputs : &nir->inputs;
+ unsigned max_varying = 0;
nir_foreach_variable(var, varyings) {
unsigned loc = var->data.driver_location;
unsigned sz = glsl_type_size(var->type, FALSE);
- for (int c = 0; c < sz; ++c) {
- program->varyings[loc + c] = var->data.location;
+ for (int c = loc; c < (loc + sz); ++c) {
+ program->varyings[c] = var->data.location;
+ max_varying = MAX2(max_varying, c);
}
}
- /* Lower gl_Position pre-optimisation */
+ /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
+ * (so we don't accidentally duplicate the epilogue since mesa/st has
+ * messed with our I/O quite a bit already) */
+
+ NIR_PASS_V(nir, nir_lower_vars_to_ssa);
if (ctx->stage == MESA_SHADER_VERTEX)
NIR_PASS_V(nir, nir_lower_viewport_transform);
memcpy(program->sysvals, ctx->sysvals, sizeof(ctx->sysvals[0]) * ctx->sysval_count);
program->attribute_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_inputs : 0;
- program->varying_count = (ctx->stage == MESA_SHADER_VERTEX) ? nir->num_outputs : ((ctx->stage == MESA_SHADER_FRAGMENT) ? nir->num_inputs : 0);
+ program->varying_count = max_varying + 1; /* Fencepost off-by-one */
nir_foreach_function(func, nir) {
if (!func->impl)
progress = false;
mir_foreach_block(ctx, block) {
+ progress |= midgard_opt_pos_propagate(ctx, block);
progress |= midgard_opt_copy_prop(ctx, block);
progress |= midgard_opt_copy_prop_tex(ctx, block);
progress |= midgard_opt_dead_code_eliminate(ctx, block);
}
} while (progress);
+ /* Nested control-flow can result in dead branches at the end of the
+ * block. This messes with our analysis and is just dead code, so cull
+ * them */
+ mir_foreach_block(ctx, block) {
+ midgard_opt_cull_dead_branch(ctx, block);
+ }
+
/* Schedule! */
schedule_program(ctx);
* last is an ALU, then it's also 1... */
mir_foreach_block(ctx, block) {
- util_dynarray_foreach(&block->bundles, midgard_bundle, bundle) {
+ mir_foreach_bundle_in_block(block, bundle) {
int lookahead = 1;
if (current_bundle + 1 < bundle_count) {