From: Jakub Jelinek Date: Mon, 8 Apr 2019 12:35:22 +0000 (+0200) Subject: re PR rtl-optimization/89865 (FAIL: gcc.target/i386/pr49095.c scan-assembler-times... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8ad68a6d87ff1978a7ef9228e79795711aed4d61;p=gcc.git re PR rtl-optimization/89865 (FAIL: gcc.target/i386/pr49095.c scan-assembler-times \\\\), % 45) PR rtl-optimization/89865 * config/i386/i386.md (SWI12 peephole for mem {+,-,&,|,^}= x; mem != 0): Fix up operand numbers not to clash with the additional operands[4]. (peepholes for mem {+,-,&,|,^}= x; mem != 0): New peephole2s with extra register copy in the middle. * gcc.target/i386/pr49095.c: Adjust number of expected RMW spots on ia32. From-SVN: r270205 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b991f600481..5ee20a9bbaa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2019-04-08 Jakub Jelinek + + PR rtl-optimization/89865 + * config/i386/i386.md + (SWI12 peephole for mem {+,-,&,|,^}= x; mem != 0): Fix up operand + numbers not to clash with the additional operands[4]. + (peepholes for mem {+,-,&,|,^}= x; mem != 0): New peephole2s + with extra register copy in the middle. + 2019-04-08 Martin Liska PR gcov-profile/89961 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0fe778fda92..b797e406a92 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -18804,17 +18804,130 @@ (GET_CODE (operands[3]) == PLUS || GET_CODE (operands[3]) == MINUS) ? CCGOCmode : CCNOmode)" - [(parallel [(set (match_dup 4) (match_dup 6)) - (set (match_dup 1) (match_dup 5))])] + [(parallel [(set (match_dup 5) (match_dup 7)) + (set (match_dup 1) (match_dup 6))])] { - operands[4] = SET_DEST (PATTERN (peep2_next_insn (3))); - operands[5] + operands[5] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), mode, copy_rtx (operands[1]), gen_lowpart (mode, operands[2])); + operands[7] + = gen_rtx_COMPARE (GET_MODE (operands[5]), + copy_rtx (operands[6]), + const0_rtx); +}) + +;; peephole2 comes before regcprop, so deal also with a case that +;; would be cleaned up by regcprop. +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_dup 0) + (match_operator:SWI 3 "plusminuslogic_operator" + [(match_dup 0) + (match_operand:SWI 2 "")])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand:SWI 4 "register_operand") (match_dup 0)) + (set (match_dup 1) (match_dup 4)) + (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && peep2_reg_dead_p (5, operands[4]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[4], operands[1]) + && (mode != QImode + || immediate_operand (operands[2], QImode) + || any_QIreg_operand (operands[2], QImode)) + && ix86_match_ccmode (peep2_next_insn (4), + (GET_CODE (operands[3]) == PLUS + || GET_CODE (operands[3]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 5) (match_dup 7)) + (set (match_dup 1) (match_dup 6))])] +{ + operands[5] = SET_DEST (PATTERN (peep2_next_insn (4))); operands[6] - = gen_rtx_COMPARE (GET_MODE (operands[4]), - copy_rtx (operands[5]), + = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), + copy_rtx (operands[1]), + operands[2]); + operands[7] + = gen_rtx_COMPARE (GET_MODE (operands[5]), + copy_rtx (operands[6]), + const0_rtx); +}) + +(define_peephole2 + [(set (match_operand:SWI12 0 "register_operand") + (match_operand:SWI12 1 "memory_operand")) + (parallel [(set (match_operand:SI 4 "register_operand") + (match_operator:SI 3 "plusminuslogic_operator" + [(match_dup 4) + (match_operand:SI 2 "nonmemory_operand")])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand:SWI12 5 "register_operand") (match_dup 0)) + (set (match_dup 1) (match_dup 5)) + (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[0]) == REGNO (operands[4]) + && peep2_reg_dead_p (3, operands[0]) + && peep2_reg_dead_p (5, operands[5]) + && (mode != QImode + || immediate_operand (operands[2], SImode) + || any_QIreg_operand (operands[2], SImode)) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[5], operands[1]) + && ix86_match_ccmode (peep2_next_insn (4), + (GET_CODE (operands[3]) == PLUS + || GET_CODE (operands[3]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 6) (match_dup 8)) + (set (match_dup 1) (match_dup 7))])] +{ + operands[6] = SET_DEST (PATTERN (peep2_next_insn (4))); + operands[7] + = gen_rtx_fmt_ee (GET_CODE (operands[3]), mode, + copy_rtx (operands[1]), + gen_lowpart (mode, operands[2])); + operands[8] + = gen_rtx_COMPARE (GET_MODE (operands[6]), + copy_rtx (operands[7]), + const0_rtx); +}) + +;; Likewise for cmpelim optimized pattern. +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (reg FLAGS_REG) + (compare (match_operator:SWI 3 "plusminuslogic_operator" + [(match_dup 0) + (match_operand:SWI 2 "")]) + (const_int 0))) + (set (match_dup 0) (match_dup 3))]) + (set (match_operand:SWI 4 "register_operand") (match_dup 0)) + (set (match_dup 1) (match_dup 4))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && peep2_reg_dead_p (4, operands[4]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && !reg_overlap_mentioned_p (operands[4], operands[1]) + && ix86_match_ccmode (peep2_next_insn (1), + (GET_CODE (operands[3]) == PLUS + || GET_CODE (operands[3]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 5) (match_dup 7)) + (set (match_dup 1) (match_dup 6))])] +{ + operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0)); + operands[6] + = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), + copy_rtx (operands[1]), operands[2]); + operands[7] + = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]), const0_rtx); }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c8f538b26e6..087c58b2e4f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2019-04-08 Jakub Jelinek + + PR rtl-optimization/89865 + * gcc.target/i386/pr49095.c: Adjust number of expected RMW spots + on ia32. + 2019-04-01 Bin Cheng PR tree-optimization/89725 diff --git a/gcc/testsuite/gcc.target/i386/pr49095.c b/gcc/testsuite/gcc.target/i386/pr49095.c index 177e4ceaa62..3ae14e3d4e0 100644 --- a/gcc/testsuite/gcc.target/i386/pr49095.c +++ b/gcc/testsuite/gcc.target/i386/pr49095.c @@ -73,5 +73,5 @@ G (long) /* { dg-final { scan-assembler-not "test\[lq\]" } } */ /* The {f,h}{char,short,int,long}xor functions aren't optimized into a RMW instruction, so need load, modify and store. FIXME eventually. */ -/* { dg-final { scan-assembler-times "\\(%eax\\), %" 12 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "\\(%eax\\), %" 8 { target { ia32 } } } } */ /* { dg-final { scan-assembler-times "\\(%\[re\]di\\), %" 8 { target { ! ia32 } } } } */