From e389ba3073f8cfaa6209c27418ad175234c15482 Mon Sep 17 00:00:00 2001 From: Andrew Burgess Date: Thu, 17 Nov 2016 22:40:05 +0000 Subject: [PATCH] arc/nps400: New peephole2 pattern allow more cmem loads In the case where we access a single bit from a value and use this in a EQ/NE comparison, GCC will convert this into a sign-extend and GE/LT comparison. Normally this would be fine, however, if the value is in CMEM memory, then we don't have a sign-extending load available (using the special short CMEM load instructions), and instead we end up using a long form load with LIMM, which is less efficient. This peephole optimisation looks for the sign-extend followed by GE/LT pattern and converts this back into a load and EQ/NE comparison. gcc/ChangeLog: * config/arc/arc.md (cmem bit/sign-extend peephole2): New peephole to make better use of cmem loads in the case where a single bit is being accessed. * config/arc/predicates.md (ge_lt_comparison_operator): New predicate. gcc/testsuite/ChangeLog: * gcc.target/arc/cmem-bit-1.c: New file. * gcc.target/arc/cmem-bit-2.c: New file. * gcc.target/arc/cmem-bit-3.c: New file. * gcc.target/arc/cmem-bit-4.c: New file. From-SVN: r242572 --- gcc/ChangeLog | 8 +++++ gcc/config/arc/arc.md | 43 +++++++++++++++++++++++ gcc/config/arc/predicates.md | 3 ++ gcc/testsuite/ChangeLog | 7 ++++ gcc/testsuite/gcc.target/arc/cmem-bit-1.c | 20 +++++++++++ gcc/testsuite/gcc.target/arc/cmem-bit-2.c | 20 +++++++++++ gcc/testsuite/gcc.target/arc/cmem-bit-3.c | 20 +++++++++++ gcc/testsuite/gcc.target/arc/cmem-bit-4.c | 20 +++++++++++ 8 files changed, 141 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arc/cmem-bit-1.c create mode 100644 gcc/testsuite/gcc.target/arc/cmem-bit-2.c create mode 100644 gcc/testsuite/gcc.target/arc/cmem-bit-3.c create mode 100644 gcc/testsuite/gcc.target/arc/cmem-bit-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6132cdcf177..24ed0f89b9e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2016-11-17 Andrew Burgess + + * config/arc/arc.md (cmem bit/sign-extend peephole2): New peephole + to make better use of cmem loads in the case where a single bit is + being accessed. + * config/arc/predicates.md (ge_lt_comparison_operator): New + predicate. + 2016-11-17 Andrew Senkevich * config/i386/i386.c (processor_features): Add F_AVX5124VNNIW, diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index c16bf67816d..925fcd6d1ad 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -1465,6 +1465,49 @@ [(set_attr "type" "cmove,cmove") (set_attr "length" "4,8")]) +;; When there's a mask of a single bit, and then a compare to 0 or 1, +;; if the single bit is the sign bit, then GCC likes to convert this +;; into a sign extend and a compare less than, or greater to zero. +;; This is usually fine, except for the NXP400 where we have access to +;; a bit test instruction, along with a special short load instruction +;; (from CMEM), that doesn't support sign-extension on load. +;; +;; This peephole optimisation attempts to restore the use of bit-test +;; in those cases where it is useful to do so. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI + (match_operand:QI 1 "any_mem_operand" ""))) + (set (reg:CC_ZN CC_REG) + (compare:CC_ZN (match_dup 0) + (const_int 0))) + (set (pc) + (if_then_else (match_operator 2 "ge_lt_comparison_operator" + [(reg:CC_ZN CC_REG) (const_int 0)]) + (match_operand 3 "" "") + (match_operand 4 "" "")))] + "TARGET_NPS_CMEM + && cmem_address (XEXP (operands[1], 0), SImode) + && peep2_reg_dead_p (2, operands[0]) + && peep2_regno_dead_p (3, CC_REG)" + [(set (match_dup 0) + (zero_extend:SI + (match_dup 1))) + (set (reg:CC_ZN CC_REG) + (compare:CC_ZN (zero_extract:SI + (match_dup 0) + (const_int 1) + (const_int 7)) + (const_int 0))) + (set (pc) + (if_then_else (match_dup 2) + (match_dup 3) + (match_dup 4)))] + "if (GET_CODE (operands[2]) == GE) + operands[2] = gen_rtx_EQ (VOIDmode, gen_rtx_REG (CC_ZNmode, 61), const0_rtx); + else + operands[2] = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_ZNmode, 61), const0_rtx);") + ; Try to generate more short moves, and/or less limms, by substituting a ; conditional move with a conditional sub. (define_peephole2 diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md index cb75dbce69b..d8c92814101 100644 --- a/gcc/config/arc/predicates.md +++ b/gcc/config/arc/predicates.md @@ -452,6 +452,9 @@ (define_predicate "equality_comparison_operator" (match_code "eq, ne")) +(define_predicate "ge_lt_comparison_operator" + (match_code "ge, lt")) + (define_predicate "brcc_nolimm_operator" (ior (match_test "REG_P (XEXP (op, 1))") (and (match_code "eq, ne, lt, ge, ltu, geu") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e84613fe5f7..1daa0aabb60 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2016-11-17 Andrew Burgess + + * gcc.target/arc/cmem-bit-1.c: New file. + * gcc.target/arc/cmem-bit-2.c: New file. + * gcc.target/arc/cmem-bit-3.c: New file. + * gcc.target/arc/cmem-bit-4.c: New file. + 2016-11-17 Andrew Senkevich * gcc.target/i386/builtin_target.c: Handle new "avx5124vnniw", diff --git a/gcc/testsuite/gcc.target/arc/cmem-bit-1.c b/gcc/testsuite/gcc.target/arc/cmem-bit-1.c new file mode 100644 index 00000000000..d49ab5cd44f --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/cmem-bit-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=nps400 -mcmem -O2" } */ + +struct strange_bool +{ + unsigned char bool_bit :1; + unsigned char other_bits :7; +}; + +struct strange_bool a_strange_bool __attribute__((section(".cmem"))); + +extern void bar(); + +void foo() { + if (a_strange_bool.bool_bit) + bar(); +} + +/* { dg-final { scan-assembler "xldb r\[0-9\]+,\\\[@a_strange_bool\\\]" } } */ +/* { dg-final { scan-assembler "btst_s r\[0-9\]+,7" { target arceb-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/arc/cmem-bit-2.c b/gcc/testsuite/gcc.target/arc/cmem-bit-2.c new file mode 100644 index 00000000000..45b49c6dfc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/cmem-bit-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=nps400 -mcmem -O2" } */ + +struct strange_bool +{ + unsigned short bool_bit :1; + unsigned short other_bits :15; +}; + +struct strange_bool a_strange_bool __attribute__((section(".cmem"))); + +extern void bar(); + +void foo() { + if (a_strange_bool.bool_bit) + bar(); +} + +/* { dg-final { scan-assembler "xldb r\[0-9\]+,\\\[@a_strange_bool\\\]" } } */ +/* { dg-final { scan-assembler "btst_s r\[0-9\]+,7" { target arceb-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/arc/cmem-bit-3.c b/gcc/testsuite/gcc.target/arc/cmem-bit-3.c new file mode 100644 index 00000000000..371ff2bca8b --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/cmem-bit-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=nps400 -mcmem -O2" } */ + +struct strange_bool +{ + unsigned int bool_bit :1; + unsigned int other_bits :31; +}; + +struct strange_bool a_strange_bool __attribute__((section(".cmem"))); + +extern void bar(); + +void foo() { + if (a_strange_bool.bool_bit) + bar(); +} + +/* { dg-final { scan-assembler "xldb r\[0-9\]+,\\\[@a_strange_bool\\\]" } } */ +/* { dg-final { scan-assembler "btst_s r\[0-9\]+,7" { target arceb-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/arc/cmem-bit-4.c b/gcc/testsuite/gcc.target/arc/cmem-bit-4.c new file mode 100644 index 00000000000..a95c6ae14d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/cmem-bit-4.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=nps400 -mcmem -O2" } */ + +struct strange_bool +{ + unsigned long long bool_bit :1; + unsigned long long other_bits :61; +}; + +struct strange_bool a_strange_bool __attribute__((section(".cmem"))); + +extern void bar(); + +void foo() { + if (a_strange_bool.bool_bit) + bar(); +} + +/* { dg-final { scan-assembler "xldb r\[0-9\]+,\\\[@a_strange_bool\\\]" } } */ +/* { dg-final { scan-assembler "btst_s r\[0-9\]+,7" { target arceb-*-* } } } */ -- 2.30.2