X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir_lower_vec_to_movs.c;h=eec994f15e53881580659ecfcbce91713b0f5db2;hb=7f106a2b5d0b27c1ce47a4b335c4cc8ae9cd460b;hp=0ba374937e8cd7a19cb19936b0860ead790617a9;hpb=1557344c81c78802ec3325271eecce6cfae1540b;p=mesa.git diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c index 0ba374937e8..eec994f15e5 100644 --- a/src/compiler/nir/nir_lower_vec_to_movs.c +++ b/src/compiler/nir/nir_lower_vec_to_movs.c @@ -57,7 +57,7 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) { assert(start_idx < nir_op_infos[vec->op].num_inputs); - nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov); + nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov); nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov); nir_alu_dest_copy(&mov->dest, &vec->dest, mov); @@ -118,7 +118,7 @@ has_replicated_dest(nir_alu_instr *alu) * can then call insert_mov as normal. */ static unsigned -try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) +try_coalesce(nir_alu_instr *vec, unsigned start_idx) { assert(start_idx < nir_op_infos[vec->op].num_inputs); @@ -131,7 +131,7 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) /* If we are going to do a reswizzle, then the vecN operation must be the * only use of the source value. We also can't have any source modifiers. */ - nir_foreach_use(vec->src[start_idx].src.ssa, src) { + nir_foreach_use(src, vec->src[start_idx].src.ssa) { if (src->parent_instr != &vec->instr) return 0; @@ -140,7 +140,7 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) return 0; } - if (!list_empty(&vec->src[start_idx].src.ssa->if_uses)) + if (!list_is_empty(&vec->src[start_idx].src.ssa->if_uses)) return 0; if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu) @@ -215,7 +215,7 @@ lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl) bool progress = false; nir_shader *shader = impl->function->shader; - nir_foreach_instr_safe(block, instr) { + nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_alu) continue; @@ -230,6 +230,7 @@ lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl) continue; /* The loop */ } + bool vec_had_ssa_dest = vec->dest.dest.is_ssa; if (vec->dest.dest.is_ssa) { /* Since we insert multiple MOVs, we have a register destination. */ nir_register *reg = nir_local_reg_create(impl); @@ -263,8 +264,12 @@ lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl) if (!(vec->dest.write_mask & (1 << i))) continue; - if (!(finished_write_mask & (1 << i))) - finished_write_mask |= try_coalesce(vec, i, shader); + /* Coalescing moves the register writes from the vec up to the ALU + * instruction in the source. We can only do this if the original + * vecN had an SSA destination. + */ + if (vec_had_ssa_dest && !(finished_write_mask & (1 << i))) + finished_write_mask |= try_coalesce(vec, i); if (!(finished_write_mask & (1 << i))) finished_write_mask |= insert_mov(vec, i, shader); @@ -300,7 +305,7 @@ nir_lower_vec_to_movs(nir_shader *shader) { bool progress = false; - nir_foreach_function(shader, function) { + nir_foreach_function(function, shader) { if (function->impl) progress = nir_lower_vec_to_movs_impl(function->impl) || progress; }