* moves with partial writes.
*/
-struct vec_to_movs_state {
- nir_function_impl *impl;
- bool progress;
-};
-
static bool
src_matches_dest_reg(nir_dest *dest, nir_src *src)
{
}
}
+ unsigned channels_handled = mov->dest.write_mask;
+
/* In some situations (if the vecN is involved in a phi-web), we can end
* up with a mov from a register to itself. Some of those channels may end
* up doing nothing and there's no reason to have them as part of the mov.
ralloc_free(mov);
}
- return mov->dest.write_mask;
+ return channels_handled;
}
static bool
* can then call insert_mov as normal.
*/
static unsigned
-try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
+try_coalesce(nir_alu_instr *vec, unsigned start_idx)
{
assert(start_idx < nir_op_infos[vec->op].num_inputs);
/* If we are going to do a reswizzle, then the vecN operation must be the
* only use of the source value. We also can't have any source modifiers.
*/
- nir_foreach_use(vec->src[start_idx].src.ssa, src) {
+ nir_foreach_use(src, vec->src[start_idx].src.ssa) {
if (src->parent_instr != &vec->instr)
return 0;
}
static bool
-lower_vec_to_movs_block(nir_block *block, void *void_state)
+lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl)
{
- struct vec_to_movs_state *state = void_state;
- nir_function_impl *impl = state->impl;
+ bool progress = false;
nir_shader *shader = impl->function->shader;
- nir_foreach_instr_safe(block, instr) {
+ nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_alu)
continue;
continue; /* The loop */
}
+ bool vec_had_ssa_dest = vec->dest.dest.is_ssa;
if (vec->dest.dest.is_ssa) {
/* Since we insert multiple MOVs, we have a register destination. */
nir_register *reg = nir_local_reg_create(impl);
reg->num_components = vec->dest.dest.ssa.num_components;
+ reg->bit_size = vec->dest.dest.ssa.bit_size;
nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg));
if (!(vec->dest.write_mask & (1 << i)))
continue;
- if (!(finished_write_mask & (1 << i)))
- finished_write_mask |= try_coalesce(vec, i, shader);
+ /* Coalescing moves the register writes from the vec up to the ALU
+ * instruction in the source. We can only do this if the original
+ * vecN had an SSA destination.
+ */
+ if (vec_had_ssa_dest && !(finished_write_mask & (1 << i)))
+ finished_write_mask |= try_coalesce(vec, i);
if (!(finished_write_mask & (1 << i)))
finished_write_mask |= insert_mov(vec, i, shader);
nir_instr_remove(&vec->instr);
ralloc_free(vec);
- state->progress = true;
+ progress = true;
}
- return true;
+ return progress;
}
static bool
nir_lower_vec_to_movs_impl(nir_function_impl *impl)
{
- struct vec_to_movs_state state = { impl, false };
+ bool progress = false;
- nir_foreach_block(impl, lower_vec_to_movs_block, &state);
+ nir_foreach_block(block, impl) {
+ progress |= lower_vec_to_movs_block(block, impl);
+ }
- if (state.progress) {
+ if (progress) {
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
}
- return state.progress;
+ return progress;
}
bool
{
bool progress = false;
- nir_foreach_function(shader, function) {
+ nir_foreach_function(function, shader) {
if (function->impl)
progress = nir_lower_vec_to_movs_impl(function->impl) || progress;
}