{
assert(start_idx < nir_op_infos[vec->op].num_inputs);
- nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
+ nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
* can then call insert_mov as normal.
*/
static unsigned
-try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
+try_coalesce(nir_alu_instr *vec, unsigned start_idx)
{
assert(start_idx < nir_op_infos[vec->op].num_inputs);
/* If we are going to do a reswizzle, then the vecN operation must be the
* only use of the source value. We also can't have any source modifiers.
*/
- nir_foreach_use(vec->src[start_idx].src.ssa, src) {
+ nir_foreach_use(src, vec->src[start_idx].src.ssa) {
if (src->parent_instr != &vec->instr)
return 0;
return 0;
}
- if (!list_empty(&vec->src[start_idx].src.ssa->if_uses))
+ if (!list_is_empty(&vec->src[start_idx].src.ssa->if_uses))
return 0;
if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu)
continue; /* The loop */
}
+ bool vec_had_ssa_dest = vec->dest.dest.is_ssa;
if (vec->dest.dest.is_ssa) {
/* Since we insert multiple MOVs, we have a register destination. */
nir_register *reg = nir_local_reg_create(impl);
if (!(vec->dest.write_mask & (1 << i)))
continue;
- if (!(finished_write_mask & (1 << i)))
- finished_write_mask |= try_coalesce(vec, i, shader);
+ /* Coalescing moves the register writes from the vec up to the ALU
+ * instruction in the source. We can only do this if the original
+ * vecN had an SSA destination.
+ */
+ if (vec_had_ssa_dest && !(finished_write_mask & (1 << i)))
+ finished_write_mask |= try_coalesce(vec, i);
if (!(finished_write_mask & (1 << i)))
finished_write_mask |= insert_mov(vec, i, shader);