From 54b1b71e7347fdca7900e11328d09931aec51c40 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 30 Dec 2019 00:37:35 -0800 Subject: [PATCH] intel/fs: Allow limited copy propagation of a LOAD_PAYLOAD into another. This is particularly useful in cases where register coalaesce is unlikely to succeed because the LOAD_PAYLOAD isn't a plain copy -- E.g. when a LOAD_PAYLOAD is shuffling the contents of a barycentric vector in order to transform it into the PLN layout. This prevents the following shader-db regressions (including SIMD32 programs) in combination with the interpolation rework part of this series. On SKL: total instructions in shared programs: 18596672 -> 18976097 (2.04%) instructions in affected programs: 7937041 -> 8316466 (4.78%) helped: 39 HURT: 67427 LOST: 466 GAINED: 220 On SNB: total instructions in shared programs: 13993866 -> 14202963 (1.49%) instructions in affected programs: 7611309 -> 7820406 (2.75%) helped: 624 HURT: 52943 LOST: 6 GAINED: 18 Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs_copy_propagation.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 4f5493e4f43..03882b38dfb 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -454,8 +454,22 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) assert(entry->src.file == VGRF || entry->src.file == UNIFORM || entry->src.file == ATTR || entry->src.file == FIXED_GRF); + /* Avoid propagating a LOAD_PAYLOAD instruction into another if there is a + * good chance that we'll be able to eliminate the latter through register + * coalescing. If only part of the sources of the second LOAD_PAYLOAD can + * be simplified through copy propagation we would be making register + * coalescing impossible, ending up with unnecessary copies in the program. + * This is also the case for is_multi_copy_payload() copies that can only + * be coalesced when the instruction is lowered into a sequence of MOVs. + * + * Worse -- In cases where the ACP entry was the result of CSE combining + * multiple LOAD_PAYLOAD subexpressions, propagating the first LOAD_PAYLOAD + * into the second would undo the work of CSE, leading to an infinite + * optimization loop. Avoid this by detecting LOAD_PAYLOAD copies from CSE + * temporaries which should match is_coalescing_payload(). + */ if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD && - inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) + (is_coalescing_payload(alloc, inst) || is_multi_copy_payload(inst))) return false; assert(entry->dst.file == VGRF); -- 2.30.2