From f60226fd72331398dd5fd239e9d1b4feccc91988 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 12 Feb 2021 15:54:48 +0000 Subject: [PATCH] df: Record all definitions in DF_LR_BB_INFO->def [PR98863] df_lr_bb_local_compute has: FOR_EACH_INSN_INFO_DEF (def, insn_info) /* If the def is to only part of the reg, it does not kill the other defs that reach here. */ if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))) However, as noted in the comment in the patch and below, almost all partial definitions have an associated use. This means that the confluence function: IN = (OUT & ~DEF) | USE is unaffected by whether partial definitions are in DEF or not. Even though the choice doesn't matter for the LR problem itself, it's IMO much more convenient for consumers if DEF contains all the definitions in the block. The only pre-RTL-SSA code that tries to consume DEF directly is shrink-wrap.c, which already has to work around the incompleteness of the information: /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e. at -O1, just give up searching NEXT_BLOCK. */ I hit the same problem when trying to fix the RTL-SSA part of PR98863. This patch treats partial definitions as both a def and a use, just like the df_ref records almost always do. To show that partial definitions almost always have uses: DF_REF_CONDITIONAL: Added by: case COND_EXEC: df_defs_record (collection_rec, COND_EXEC_CODE (x), bb, insn_info, DF_REF_CONDITIONAL); break; Later, df_get_conditional_uses creates uses for all DF_REF_CONDITIONAL definitions. DF_REF_PARTIAL: In total, there are 4 locations at which we add partial definitions. Case 1: if (GET_CODE (dst) == STRICT_LOW_PART) { flags |= DF_REF_READ_WRITE | DF_REF_PARTIAL | DF_REF_STRICT_LOW_PART; loc = &XEXP (dst, 0); dst = *loc; } Corresponding use: case STRICT_LOW_PART: { rtx *temp = &XEXP (dst, 0); /* A strict_low_part uses the whole REG and not just the SUBREG. */ dst = XEXP (dst, 0); df_uses_record (collection_rec, (GET_CODE (dst) == SUBREG) ? &SUBREG_REG (dst) : temp, DF_REF_REG_USE, bb, insn_info, DF_REF_READ_WRITE | DF_REF_STRICT_LOW_PART); } break; Case 2: if (GET_CODE (dst) == ZERO_EXTRACT) { flags |= DF_REF_READ_WRITE | DF_REF_PARTIAL | DF_REF_ZERO_EXTRACT; loc = &XEXP (dst, 0); dst = *loc; } Corresponding use: case ZERO_EXTRACT: { df_uses_record (collection_rec, &XEXP (dst, 1), DF_REF_REG_USE, bb, insn_info, flags); df_uses_record (collection_rec, &XEXP (dst, 2), DF_REF_REG_USE, bb, insn_info, flags); if (GET_CODE (XEXP (dst,0)) == MEM) df_uses_record (collection_rec, &XEXP (dst, 0), DF_REF_REG_USE, bb, insn_info, flags); else df_uses_record (collection_rec, &XEXP (dst, 0), DF_REF_REG_USE, bb, insn_info, DF_REF_READ_WRITE | DF_REF_ZERO_EXTRACT); ----------------------------^^^^^^^^^^^^^^^^^ } break; Case 3: else if (GET_CODE (dst) == SUBREG && REG_P (SUBREG_REG (dst))) { if (read_modify_subreg_p (dst)) flags |= DF_REF_READ_WRITE | DF_REF_PARTIAL; flags |= DF_REF_SUBREG; df_ref_record (DF_REF_REGULAR, collection_rec, dst, loc, bb, insn_info, DF_REF_REG_DEF, flags); } Corresponding use: case SUBREG: if (read_modify_subreg_p (dst)) { df_uses_record (collection_rec, &SUBREG_REG (dst), DF_REF_REG_USE, bb, insn_info, flags | DF_REF_READ_WRITE | DF_REF_SUBREG); break; } Case 4: /* If this is a multiword hardreg, we create some extra datastructures that will enable us to easily build REG_DEAD and REG_UNUSED notes. */ if (collection_rec && (endregno != regno + 1) && insn_info) { /* Sets to a subreg of a multiword register are partial. Sets to a non-subreg of a multiword register are not. */ if (GET_CODE (reg) == SUBREG) ref_flags |= DF_REF_PARTIAL; ref_flags |= DF_REF_MW_HARDREG; Corresponding use: None. However, this case should be rare to non-existent on most targets, and the current handling seems suspect. See the comment in the patch for more details. gcc/ * df-problems.c (df_lr_bb_local_compute): Treat partial definitions as read-modify operations. gcc/testsuite/ * gcc.dg/rtl/aarch64/multi-subreg-1.c: New test. --- gcc/df-problems.c | 54 ++++++++++++++++--- .../gcc.dg/rtl/aarch64/multi-subreg-1.c | 19 +++++++ 2 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/multi-subreg-1.c diff --git a/gcc/df-problems.c b/gcc/df-problems.c index 8fe58581f32..83b200baf9d 100644 --- a/gcc/df-problems.c +++ b/gcc/df-problems.c @@ -842,14 +842,54 @@ df_lr_bb_local_compute (unsigned int bb_index) df_insn_info *insn_info = DF_INSN_INFO_GET (insn); FOR_EACH_INSN_INFO_DEF (def, insn_info) - /* If the def is to only part of the reg, it does - not kill the other defs that reach here. */ - if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))) - { - unsigned int dregno = DF_REF_REGNO (def); - bitmap_set_bit (&bb_info->def, dregno); + { + /* If the definition is to only part of the register, it will + usually have a corresponding use. For example, stores to one + word of a multiword register R have both a use and a partial + definition of R. + + In those cases, the LR confluence function: + + IN = (OUT & ~DEF) | USE + + is unaffected by whether we count the partial definition or not. + However, it's more convenient for consumers if DEF contains + *all* the registers defined in a block. + + The only current case in which we record a partial definition + without a corresponding use is if the destination is the + multi-register subreg of a hard register. An artificial + example of this is: + + (set (subreg:TI (reg:V8HI x0) 0) (const_int -1)) + + on AArch64. This is described as a DF_REF_PARTIAL + definition of x0 and x1 with no corresponding uses. + In previous versions of GCC, the instruction had no + effect on LR (that is, LR acted as though the instruction + didn't exist). + + It seems suspect that this case is treated differently. + Either the instruction should be a full definition of x0 and x1, + or the definition should be treated in the same way as other + partial definitions, such as strict_lowparts or subregs that + satisfy read_modify_subreg_p. + + Fortunately, multi-register subregs of hard registers should + be rare. They should be folded into a plain REG if the target + allows that (as AArch64 does for example above). + + Here we treat the cases alike by forcing a use even in the rare + case that no DF_REF_REG_USE is recorded. That is, we model all + partial definitions as both a use and a definition of the + register. */ + unsigned int dregno = DF_REF_REGNO (def); + bitmap_set_bit (&bb_info->def, dregno); + if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)) + bitmap_set_bit (&bb_info->use, dregno); + else bitmap_clear_bit (&bb_info->use, dregno); - } + } FOR_EACH_INSN_INFO_USE (use, insn_info) /* Add use to set of uses in this BB. */ diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/multi-subreg-1.c b/gcc/testsuite/gcc.dg/rtl/aarch64/multi-subreg-1.c new file mode 100644 index 00000000000..d7be5592e78 --- /dev/null +++ b/gcc/testsuite/gcc.dg/rtl/aarch64/multi-subreg-1.c @@ -0,0 +1,19 @@ +/* { dg-additional-options "-O -fdump-rtl-cse1-all" } */ + +__int128 __RTL (startwith ("vregs")) foo (void) +{ +(function "foo" + (insn-chain + (block 2 + (edge-from entry (flags "FALLTHRU")) + (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK) + (cnote 2 NOTE_INSN_FUNCTION_BEG) + (cinsn 3 (set (subreg:TI (reg:V8HI x0) 0) (const_int -1))) + (edge-to exit (flags "FALLTHRU")) + ) + ) + (crtl (return_rtx (reg/i:TI x0))) +) +} + +/* { dg-final { scan-rtl-dump {(?n)lr *def.*\[x0\].*\[x1\]} cse1 } } */ -- 2.30.2