From e581ddeeeecf9475d0634794ee126096d0f23135 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 27 Apr 2020 15:31:12 -0500 Subject: [PATCH] intel/fs: Don't delete coalesced MOVs if they have a cmod Shader-db results on ICL: total instructions in shared programs: 17133088 -> 17133287 (<.01%) instructions in affected programs: 61300 -> 61499 (0.32%) helped: 0 HURT: 199 This means it's likely fixing 199 bugs. :-) All the changed shaders are in Mad Max. It's surprisingly difficult to get the back-end compiler to generate a pattern that hits this we don't tend to emit a lot coalescable MOVs. The pattern in Mad Max that's able to hit is fsign(fsat(x)) under the right conditions. Closes: #2820 Cc: mesa-stable@lists.freedesktop.org Tested-by: Ian Romanick Reviewed-by: Ian Romanick Part-of: --- src/intel/compiler/brw_fs_register_coalesce.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp index 5d24240c393..671ced5bc95 100644 --- a/src/intel/compiler/brw_fs_register_coalesce.cpp +++ b/src/intel/compiler/brw_fs_register_coalesce.cpp @@ -277,13 +277,26 @@ fs_visitor::register_coalesce() progress = true; for (int i = 0; i < src_size; i++) { - if (mov[i]) { + if (!mov[i]) + continue; + + if (mov[i]->conditional_mod == BRW_CONDITIONAL_NONE) { mov[i]->opcode = BRW_OPCODE_NOP; - mov[i]->conditional_mod = BRW_CONDITIONAL_NONE; mov[i]->dst = reg_undef; for (int j = 0; j < mov[i]->sources; j++) { mov[i]->src[j] = reg_undef; } + } else { + /* If we have a conditional modifier, rewrite the MOV to be a + * MOV.cmod from the coalesced register. Hopefully, cmod + * propagation will clean this up and move it to the instruction + * that writes the register. If not, this keeps things correct + * while still letting us coalesce. + */ + assert(mov[i]->opcode == BRW_OPCODE_MOV); + assert(mov[i]->sources == 1); + mov[i]->src[0] = mov[i]->dst; + mov[i]->dst = retype(brw_null_reg(), mov[i]->dst.type); } } -- 2.30.2