With -fpu=neon DI mode shifts are expanded after reload.
authorWilco Dijkstra <wdijkstr@arm.com>
Tue, 25 Oct 2016 10:25:28 +0000 (10:25 +0000)
committerWilco Dijkstra <wilco@gcc.gnu.org>
Tue, 25 Oct 2016 10:25:28 +0000 (10:25 +0000)
With -fpu=neon DI mode shifts are expanded after reload.  DI mode registers can
either fully or partially overlap on both ARM and Thumb-2.  However the shift
expansion code can only deal with the full overlap case, and generates incorrect
code for partial overlaps.  The fix is to add new variants that support either
full overlap or no overlap.

    gcc/
PR target/78041
* config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants.
Remove partial overlap check for shift by 1.
(ashldi3_neon): Likewise.
    testsuite/
* gcc.target/arm/pr78041.c: New test.

From-SVN: r241508

gcc/ChangeLog
gcc/config/arm/neon.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/arm/pr78041.c [new file with mode: 0644]

index c1d8f946d0d8001ab843aba080485d9585854ce2..aaf07f436d7a834a6a3af1e65ec1aeb7e553b58d 100644 (file)
@@ -1,3 +1,10 @@
+2016-10-25  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       PR target/78041
+       * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants.
+       Remove partial overlap check for shift by 1.
+       (ashldi3_neon): Likewise.
+
 2016-10-25  Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
        * config/arm/constraints.md (Q constraint): Document its use for
index 05323334ffd81aeff33ee407b96c788d123b3fe3..59316de004107913c1db0951ced4d584999fc099 100644 (file)
 )
 
 (define_insn_and_split "ashldi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand"           "= w, w,?&r,?r, ?w,w")
-       (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
-                  (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,rUm,i")))
-   (clobber (match_scratch:SI 3                                    "= X, X,?&r, X,  X,X"))
-   (clobber (match_scratch:SI 4                                    "= X, X,?&r, X,  X,X"))
-   (clobber (match_scratch:DI 5                                    "=&w, X,  X, X, &w,X"))
+  [(set (match_operand:DI 0 "s_register_operand"           "= w, w,?&r,?r,?&r, ?w,w")
+       (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 0w,w")
+                  (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  i,rUm,i")))
+   (clobber (match_scratch:SI 3                                    "= X, X,?&r, X,  X,  X,X"))
+   (clobber (match_scratch:SI 4                                    "= X, X,?&r, X,  X,  X,X"))
+   (clobber (match_scratch:DI 5                                    "=&w, X,  X, X,  X, &w,X"))
    (clobber (reg:CC_C CC_REGNUM))]
   "TARGET_NEON"
   "#"
       }
     else
       {
-       if (operands[2] == CONST1_RTX (SImode)
-           && (!reg_overlap_mentioned_p (operands[0], operands[1])
-               || REGNO (operands[0]) == REGNO (operands[1])))
+       /* The shift expanders support either full overlap or no overlap.  */
+       gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
+                   || REGNO (operands[0]) == REGNO (operands[1]));
+
+       if (operands[2] == CONST1_RTX (SImode))
          /* This clobbers CC.  */
          emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
        else
       }
     DONE;
   }"
-  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
-   (set_attr "opt" "*,*,speed,speed,*,*")
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "opt" "*,*,speed,speed,speed,*,*")
    (set_attr "type" "multiple")]
 )
 
 ;; ashrdi3_neon
 ;; lshrdi3_neon
 (define_insn_and_split "<shift>di3_neon"
-  [(set (match_operand:DI 0 "s_register_operand"            "= w, w,?&r,?r,?w,?w")
-       (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
-                   (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, i")))
-   (clobber (match_scratch:SI 3                                     "=2r, X, &r, X,2r, X"))
-   (clobber (match_scratch:SI 4                                     "= X, X, &r, X, X, X"))
-   (clobber (match_scratch:DI 5                                     "=&w, X,  X, X,&w, X"))
+  [(set (match_operand:DI 0 "s_register_operand"            "= w, w,?&r,?r,?&r,?w,?w")
+       (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r,0w, w")
+                   (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  i, r, i")))
+   (clobber (match_scratch:SI 3                                     "=2r, X, &r, X,  X,2r, X"))
+   (clobber (match_scratch:SI 4                                     "= X, X, &r, X,  X, X, X"))
+   (clobber (match_scratch:DI 5                                     "=&w, X,  X, X, X,&w, X"))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
   "#"
       }
     else
       {
-       if (operands[2] == CONST1_RTX (SImode)
-           && (!reg_overlap_mentioned_p (operands[0], operands[1])
-               || REGNO (operands[0]) == REGNO (operands[1])))
+       /* The shift expanders support either full overlap or no overlap.  */
+       gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
+                   || REGNO (operands[0]) == REGNO (operands[1]));
+
+       if (operands[2] == CONST1_RTX (SImode))
          /* This clobbers CC.  */
          emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
        else
 
     DONE;
   }"
-  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
-   (set_attr "opt" "*,*,speed,speed,*,*")
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "opt" "*,*,speed,speed,speed,*,*")
    (set_attr "type" "multiple")]
 )
 
index 1dc07326565d46380214914e836c64d3bb10004e..02d8ac6303d44941a0573ffdc5cc8eb695309cf4 100644 (file)
@@ -1,3 +1,8 @@
+2016-10-25  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       PR target/78041
+       * gcc.target/arm/pr78041.c: New test.
+
 2016-10-25  Jakub Jelinek  <jakub@redhat.com>
 
        * g++.dg/cpp1z/launder1.C: New test.
diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c b/gcc/testsuite/gcc.target/arm/pr78041.c
new file mode 100644 (file)
index 0000000..340ab5c
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */
+
+extern void abort (void);
+
+register long long x asm ("r1");
+
+long long f (void)
+{
+  return x << 5;
+}
+
+int main ()
+{
+  x = 0x0100000001;
+  if (f () != 0x2000000020)
+    abort ();
+  return 0;
+}