re PR target/55073 (Wrong Neon code generation at -O2 caused by -fschedule-insns)
authorRichard Earnshaw <rearnsha@arm.com>
Thu, 29 Nov 2012 17:51:40 +0000 (17:51 +0000)
committerRichard Earnshaw <rearnsha@gcc.gnu.org>
Thu, 29 Nov 2012 17:51:40 +0000 (17:51 +0000)
PR target/55073
* arm/neon.md (neon_vtrn<mode>_internal): Split into expand
and insn patterns.  Re-order insn arguments to tie inputs to
outputs.
(neon_vzip<mode>_internal): Likewise.
(neon_vuzp<mode>_internal): Likewise.

* gcc.target/arm/pr55073.C: New test.

From-SVN: r193943

gcc/ChangeLog
gcc/config/arm/neon.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/arm/pr55073.C [new file with mode: 0644]

index 8ca3588b3eeffdf0ac3bef024a5fec9942528892..7560e28eb8768d0b94e8cf832b01ec4d339a96ec 100644 (file)
@@ -1,3 +1,12 @@
+2012-11-29  Richard Earnshaw  <rearnsha@arm.com>
+
+       PR target/55073
+       * arm/neon.md (neon_vtrn<mode>_internal): Split into expand
+       and insn patterns.  Re-order insn arguments to tie inputs to
+       outputs.
+       (neon_vzip<mode>_internal): Likewise.
+       (neon_vuzp<mode>_internal): Likewise.
+
 2012-11-29  Marc Glisse  <marc.glisse@inria.fr>
 
        PR c++/53094
index 8f84795334b7ccc9b811f7cd8c086394b30e3090..0822049a3e9387f16a43a0f6b64c1c6cbe69bdab 100644 (file)
   [(set_attr "neon_type" "neon_bp_3cycle")]
 )
 
-(define_insn "neon_vtrn<mode>_internal"
+(define_expand "neon_vtrn<mode>_internal"
+  [(parallel
+    [(set (match_operand:VDQW 0 "s_register_operand" "")
+         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
+                       (match_operand:VDQW 2 "s_register_operand" "")]
+          UNSPEC_VTRN1))
+     (set (match_operand:VDQW 3 "s_register_operand" "")
+          (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
+  "TARGET_NEON"
+  ""
+)
+
+;; Note: Different operand numbering to handle tied registers correctly.
+(define_insn "*neon_vtrn<mode>_insn"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                      (match_operand:VDQW 3 "s_register_operand" "2")]
                      UNSPEC_VTRN1))
-   (set (match_operand:VDQW 3 "s_register_operand" "=2")
-         (unspec:VDQW [(match_dup 1) (match_dup 2)]
+   (set (match_operand:VDQW 2 "s_register_operand" "=w")
+         (unspec:VDQW [(match_dup 1) (match_dup 3)]
                      UNSPEC_VTRN2))]
   "TARGET_NEON"
-  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   [(set (attr "neon_type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_bp_simple")
   DONE;
 })
 
-(define_insn "neon_vzip<mode>_internal"
+(define_expand "neon_vzip<mode>_internal"
+  [(parallel
+    [(set (match_operand:VDQW 0 "s_register_operand" "")
+         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
+                       (match_operand:VDQW 2 "s_register_operand" "")]
+                      UNSPEC_VZIP1))
+    (set (match_operand:VDQW 3 "s_register_operand" "")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
+  "TARGET_NEON"
+  ""
+)
+
+;; Note: Different operand numbering to handle tied registers correctly.
+(define_insn "*neon_vzip<mode>_insn"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                      (match_operand:VDQW 3 "s_register_operand" "2")]
                      UNSPEC_VZIP1))
-   (set (match_operand:VDQW 3 "s_register_operand" "=2")
-        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+   (set (match_operand:VDQW 2 "s_register_operand" "=w")
+        (unspec:VDQW [(match_dup 1) (match_dup 3)]
                      UNSPEC_VZIP2))]
   "TARGET_NEON"
-  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   [(set (attr "neon_type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_bp_simple")
   DONE;
 })
 
-(define_insn "neon_vuzp<mode>_internal"
+(define_expand "neon_vuzp<mode>_internal"
+  [(parallel
+    [(set (match_operand:VDQW 0 "s_register_operand" "")
+         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
+                       (match_operand:VDQW 2 "s_register_operand" "")]
+          UNSPEC_VUZP1))
+     (set (match_operand:VDQW 3 "s_register_operand" "")
+         (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
+  "TARGET_NEON"
+  ""
+)
+
+;; Note: Different operand numbering to handle tied registers correctly.
+(define_insn "*neon_vuzp<mode>_insn"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                      (match_operand:VDQW 3 "s_register_operand" "2")]
                      UNSPEC_VUZP1))
-   (set (match_operand:VDQW 3 "s_register_operand" "=2")
-        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+   (set (match_operand:VDQW 2 "s_register_operand" "=w")
+        (unspec:VDQW [(match_dup 1) (match_dup 3)]
                      UNSPEC_VUZP2))]
   "TARGET_NEON"
-  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   [(set (attr "neon_type")
       (if_then_else (match_test "<Is_d_reg>")
                     (const_string "neon_bp_simple")
index 4e623fb7df262dececaf804492084039d28b8314..996347b764f7a926c2ef3aef3d61f2e5801d00e1 100644 (file)
@@ -1,3 +1,8 @@
+2012-11-29  Richard Earnshaw  <rearnsha@arm.com>
+
+       PR target/55073
+       * gcc.target/arm/pr55073.C: New test.
+
 2012-11-29  Marc Glisse  <marc.glisse@inria.fr>
 
        PR c++/53094
diff --git a/gcc/testsuite/gcc.target/arm/pr55073.C b/gcc/testsuite/gcc.target/arm/pr55073.C
new file mode 100644 (file)
index 0000000..5575cf7
--- /dev/null
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+#include <stdlib.h>
+
+struct __attribute__((aligned(16))) _v16u8_ {
+       uint8x16_t val;
+       _v16u8_() { }
+
+       _v16u8_( const  uint8x16_t &src) { val = src; }
+       _v16u8_( const   int16x8_t &src) { val = vreinterpretq_u8_s16(src); }
+       _v16u8_( const  uint32x4_t &src) { val = vreinterpretq_u8_u32(src); }
+
+       operator  uint8x16_t () const { return val; }
+       operator   int8x16_t () const { return vreinterpretq_s8_u8 (val); }
+       operator   int16x8_t () const { return vreinterpretq_s16_u8(val); }
+       operator  uint32x4_t () const { return vreinterpretq_u32_u8(val); }
+       operator   int32x4_t () const { return vreinterpretq_s32_u8(val); }
+};
+typedef struct _v16u8_ v16u8;
+typedef const v16u8 cv16u8;
+
+typedef v16u8 v16i8;
+typedef v16u8 v8i16;
+typedef v16u8 v4u32;
+
+inline v16u8 __attribute__((always_inline)) mergelo( const v16u8 & s, const v16u8 & t )
+{
+       uint8x8x2_t r = vzip_u8( vget_low_u8(s), vget_low_u8(t) );
+       return vcombine_u8( r.val[0], r.val[1] );
+}
+
+inline v8i16 __attribute__((always_inline)) unpacklo(const v16i8 & s)
+{
+       return vmovl_s8( vget_low_s8( s ) );
+}
+
+const uint32_t __attribute__((aligned(16))) _InA [4] = { 0xFF020001, 0xFF020001, 0xFF000101, 0xFF000101 } ;
+const uint32_t __attribute__((aligned(16))) _InB [4] = { 0xFF050002, 0xFF050002, 0xFF000303, 0xFF000203 } ;
+
+__attribute__((noinline)) v16i8 test_func(void)
+{
+       v16u8 A = vld1q_u8( (uint8_t*) _InA );
+       v16u8 B = vld1q_u8( (uint8_t*) _InB );
+       v8i16 r   = vdupq_n_s16(2);
+
+       v16u8 _0 = mergelo( A, B );
+       v16u8 _1 = mergelo( B, A );
+
+       v16u8 _2 = mergelo( _0, _1 );
+       v16u8 _3 = mergelo( _1, _0 );
+
+       v8i16 _4 = vsubq_s16( unpacklo( _2 ), r );
+       v8i16 _5 = vsubq_s16( unpacklo( _3 ), r );
+
+       v8i16 ret = vaddq_s16( _4, _5 );
+
+       return ( ret );
+}
+
+int main (int argc, char **argv)
+{
+       v16u8 val = test_func();
+
+       if (vgetq_lane_u32( val, 0 ) != 0xffffffff
+           || vgetq_lane_u32( val, 1 ) != 0xffffffff
+           || vgetq_lane_u32( val, 2 ) != 0xfffcfffc
+           || vgetq_lane_u32( val, 3 ) != 0xfffcfffc)
+         abort ();
+       exit (0);
+}