* creating duplicate variants..
*/
- if (ir3_key_lowers_nir(&so->key)) {
- nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
- ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
- } else {
- /* fast-path for shader key that lowers nothing in NIR: */
- ctx->s = nir_shader_clone(ctx, so->shader->nir);
- }
+ ctx->s = nir_shader_clone(ctx, so->shader->nir);
+ if (ir3_key_lowers_nir(&so->key))
+ ir3_optimize_nir(so->shader, ctx->s, &so->key);
/* this needs to be the last pass run, so do this here instead of
* in ir3_optimize_nir():
*/
NIR_PASS_V(ctx->s, nir_lower_bool_to_int32);
NIR_PASS_V(ctx->s, nir_lower_locals_to_regs);
+
+ /* We want to lower nir_op_imul as late as possible, to catch also
+ * those generated by earlier passes (e.g, nir_lower_locals_to_regs).
+ * However, we want a final swing of a few passes to have a chance
+ * at optimizing the result.
+ */
+ bool progress = false;
+ NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
+ if (progress) {
+ NIR_PASS_V(ctx->s, nir_opt_algebraic);
+ NIR_PASS_V(ctx->s, nir_opt_copy_prop_vars);
+ NIR_PASS_V(ctx->s, nir_opt_dead_write_vars);
+ NIR_PASS_V(ctx->s, nir_opt_dce);
+ NIR_PASS_V(ctx->s, nir_opt_constant_folding);
+ }
+
NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
if (ir3_shader_debug & IR3_DBG_DISASM) {
/* if not relative store, don't create an extra mov, since that
* ends up being difficult for cp to remove.
+ *
+ * Also, don't skip the mov if the src is meta (like fanout/split),
+ * since that creates a situation that RA can't really handle properly.
*/
- if (!address) {
+ if (!address && !is_meta(src)) {
dst = src->regs[0];
src->barrier_class |= IR3_BARRIER_ARRAY_W;