From adb5b54b28383885b4a34b95698cc302af8e0415 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 16 Aug 2017 17:25:34 +0200 Subject: [PATCH] re PR target/46091 (missed optimization: x86 bt/btc/bts instructions) PR target/46091 * config/i386/i386.md (*anddi_1_btr): Change predicates of operand 0 and operand 1 to nomimmediate_operand. Add "m" constraint. Add ix86_binary_operator_ok to insn constraint. (*iordi_1_bts): Ditto. (*xordi_1_btc): Ditto. (*btsq): Change predicate of operand 0 to nonimmediate_operand. Update corresponding peephole2 pattern. (*btrq): Ditto. (*btcq): Ditto. testsuite/ChangeLog: PR target/46091 * gcc.target/i386/pr46091-1.c: Update scan-assembler-times. (testm): New test function. * gcc.target/i386/pr46091-2.c: Ditto. * gcc.target/i386/pr46091-3.c: Ditto. From-SVN: r251124 --- gcc/ChangeLog | 13 ++++++++ gcc/config/i386/i386.md | 37 ++++++++++++----------- gcc/testsuite/ChangeLog | 8 +++++ gcc/testsuite/gcc.target/i386/pr46091-1.c | 9 +++++- gcc/testsuite/gcc.target/i386/pr46091-2.c | 9 +++++- gcc/testsuite/gcc.target/i386/pr46091-3.c | 9 +++++- 6 files changed, 65 insertions(+), 20 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index eb3277224b0..3eff5f5ff04 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2017-08-16 Uros Bizjak + + PR target/46091 + * config/i386/i386.md (*anddi_1_btr): Change predicates of + operand 0 and operand 1 to nomimmediate_operand. Add "m" constraint. + Add ix86_binary_operator_ok to insn constraint. + (*iordi_1_bts): Ditto. + (*xordi_1_btc): Ditto. + (*btsq): Change predicate of operand 0 to nonimmediate_operand. + Update corresponding peephole2 pattern. + (*btrq): Ditto. + (*btcq): Ditto. + 2017-08-16 Bin Cheng PR tree-optimization/81832 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 059a51832de..14688a863cf 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8268,12 +8268,13 @@ (set_attr "mode" "SI,DI,DI,SI")]) (define_insn_and_split "*anddi_1_btr" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (and:DI - (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 1 "nonimmediate_operand" "%0") (match_operand:DI 2 "const_int_operand" "n"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_USE_BT + && ix86_binary_operator_ok (AND, DImode, operands) && IN_RANGE (exact_log2 (~INTVAL (operands[2])), 31, 63)" "#" "&& reload_completed" @@ -8813,12 +8814,13 @@ (set_attr "mode" "")]) (define_insn_and_split "*iordi_1_bts" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ior:DI - (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 1 "nonimmediate_operand" "%0") (match_operand:DI 2 "const_int_operand" "n"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_USE_BT + && ix86_binary_operator_ok (IOR, DImode, operands) && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)" "#" "&& reload_completed" @@ -8834,12 +8836,13 @@ (set_attr "mode" "DI")]) (define_insn_and_split "*xordi_1_btc" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (xor:DI - (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 1 "nonimmediate_operand" "%0") (match_operand:DI 2 "const_int_operand" "n"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_USE_BT + && ix86_binary_operator_ok (XOR, DImode, operands) && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)" "#" "&& reload_completed" @@ -10996,10 +10999,10 @@ ;; Bit set / bit test instructions ;; %%% bts, btr, btc, bt. -;; In general these instructions are *slow* when applied to memory, -;; since they enforce atomic operation. When applied to registers, -;; it depends on the cpu implementation. They're never faster than -;; the corresponding and/ior/xor operations, so with 32-bit there's +;; In general these instructions are *slow* with variable operand +;; when applied to memory. When applied to registers, it depends +;; on the cpu implementation. They're never faster than the +;; corresponding and/ior/xor operations, so with 32-bit there's ;; no point. But in 64-bit, we can't hold the relevant immediates ;; within the instruction itself, so operating on bits in the high ;; 32-bits of a register becomes easier. @@ -11009,7 +11012,7 @@ ;; negdf respectively, so they can never be disabled entirely. (define_insn "*btsq" - [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm") (const_int 1) (match_operand 1 "const_0_to_63_operand" "J")) (const_int 1)) @@ -11022,7 +11025,7 @@ (set_attr "mode" "DI")]) (define_insn "*btrq" - [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm") (const_int 1) (match_operand 1 "const_0_to_63_operand" "J")) (const_int 0)) @@ -11035,7 +11038,7 @@ (set_attr "mode" "DI")]) (define_insn "*btcq" - [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm") (const_int 1) (match_operand 1 "const_0_to_63_operand" "J")) (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1)))) @@ -11052,7 +11055,7 @@ (define_peephole2 [(match_scratch:DI 2 "r") (parallel [(set (zero_extract:DI - (match_operand:DI 0 "register_operand") + (match_operand:DI 0 "nonimmediate_operand") (const_int 1) (match_operand 1 "const_0_to_63_operand")) (const_int 1)) @@ -11076,7 +11079,7 @@ (define_peephole2 [(match_scratch:DI 2 "r") (parallel [(set (zero_extract:DI - (match_operand:DI 0 "register_operand") + (match_operand:DI 0 "nonimmediate_operand") (const_int 1) (match_operand 1 "const_0_to_63_operand")) (const_int 0)) @@ -11100,7 +11103,7 @@ (define_peephole2 [(match_scratch:DI 2 "r") (parallel [(set (zero_extract:DI - (match_operand:DI 0 "register_operand") + (match_operand:DI 0 "nonimmediate_operand") (const_int 1) (match_operand 1 "const_0_to_63_operand")) (not:DI (zero_extract:DI @@ -11128,7 +11131,7 @@ (zero_extract:SWI48 (match_operand:SWI48 0 "register_operand" "r") (const_int 1) - (match_operand:SI 1 "nonmemory_operand" "rN")) + (match_operand:SI 1 "nonmemory_operand" "r")) (const_int 0)))] "" { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 717e951a8cc..2b4c63ab712 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2017-08-16 Uros Bizjak + + PR target/46091 + * gcc.target/i386/pr46091-1.c: Update scan-assembler-times. + (testm): New test function. + * gcc.target/i386/pr46091-2.c: Ditto. + * gcc.target/i386/pr46091-3.c: Ditto. + 2017-08-16 Bin Cheng PR tree-optimization/81832 diff --git a/gcc/testsuite/gcc.target/i386/pr46091-1.c b/gcc/testsuite/gcc.target/i386/pr46091-1.c index adca01f294c..74685af8184 100644 --- a/gcc/testsuite/gcc.target/i386/pr46091-1.c +++ b/gcc/testsuite/gcc.target/i386/pr46091-1.c @@ -6,4 +6,11 @@ unsigned long long test (unsigned long long a) return a & ~(1ull << 55); } -/* { dg-final { scan-assembler "btr" } } */ +extern unsigned long long m; + +void testm (void) +{ + m &= ~(1ull << 45); +} + +/* { dg-final { scan-assembler-times "btr" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr46091-2.c b/gcc/testsuite/gcc.target/i386/pr46091-2.c index 174375393cf..5b340450725 100644 --- a/gcc/testsuite/gcc.target/i386/pr46091-2.c +++ b/gcc/testsuite/gcc.target/i386/pr46091-2.c @@ -6,4 +6,11 @@ unsigned long long test (unsigned long long a) return a | (1ull << 55); } -/* { dg-final { scan-assembler "bts" } } */ +extern unsigned long long m; + +void testm (void) +{ + m |= (1ull << 45); +} + +/* { dg-final { scan-assembler-times "bts" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr46091-3.c b/gcc/testsuite/gcc.target/i386/pr46091-3.c index c8091e9f41c..3c601a3c543 100644 --- a/gcc/testsuite/gcc.target/i386/pr46091-3.c +++ b/gcc/testsuite/gcc.target/i386/pr46091-3.c @@ -6,4 +6,11 @@ unsigned long long test (unsigned long long a) return a ^ (1ull << 55); } -/* { dg-final { scan-assembler "btc" } } */ +extern unsigned long long m; + +void testm (void) +{ + m ^= (1ull << 45); +} + +/* { dg-final { scan-assembler-times "btc" 2 } } */ -- 2.30.2