[ARM] Turning off 64bits ops in Neon

author Christophe Lyon <christophe.lyon@linaro.org>

Thu, 21 Mar 2013 14:26:23 +0000 (14:26 +0000)

committer Christophe Lyon <clyon@gcc.gnu.org>

Thu, 21 Mar 2013 14:26:23 +0000 (15:26 +0100)
author Christophe Lyon <christophe.lyon@linaro.org>
Thu, 21 Mar 2013 14:26:23 +0000 (14:26 +0000)
committer Christophe Lyon <clyon@gcc.gnu.org>
Thu, 21 Mar 2013 14:26:23 +0000 (15:26 +0100)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index be4b87fea2ac789c3f29731b68c3ea6ab9a55955..822e68dd3fd44b26ab0c3e55f820b41d2f5ca682 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,29 @@
+2013-03-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * config/arm/arm-protos.h (tune_params): Add
+       prefer_neon_for_64bits field.
+       * config/arm/arm.c (prefer_neon_for_64bits): New variable.
+       (arm_slowmul_tune): Default prefer_neon_for_64bits to false.
+       (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto.
+       (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto.
+       (arm_cortex_a15_tune, arm_cortex_a5_tune): Ditto.
+       (arm_cortex_a9_tune, arm_v6m_tune, arm_fa726te_tune): Ditto.
+       (arm_option_override): Handle -mneon-for-64bits new option.
+       * config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro.
+       (prefer_neon_for_64bits): Declare new variable.
+       * config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to
+       avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and
+       nota8.
+       (arch_enabled): Handle new arch types. Remove support for onlya8
+       and nota8.
+       (one_cmpldi2): Use new arch names.
+       * config/arm/arm.opt (mneon-for-64bits): Add option.
+       * config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon)
+       (anddi3_neon, xordi3_neon, ashldi3_neon, <shift>di3_neon): Use
+       neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead
+       of onlya8.
+       * doc/invoke.texi (-mneon-for-64bits): Document.
+
  2013-03-21  Richard Biener  <rguenther@suse.de>
  
         PR tree-optimization/39326
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

index ffa00c0f7b74cdb296158f8459af992f21273843..694aa2802ae553328633b64cf738734106d5ce17 100644 (file)
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -269,6 +269,8 @@ struct tune_params
    bool logical_op_non_short_circuit[2];
    /* Vectorizer costs.  */
    const struct cpu_vec_costs* vec_costs;
+  /* Prefer Neon for 64-bit bitops.  */
+  bool prefer_neon_for_64bits;
  };
  
  extern const struct tune_params *current_tune;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

index ca367608c96c81ca4c7e0145bbf4ab40ad70206e..0c48d6ee49c0c3d4f6bf723659970cdfba947318 100644 (file)
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -839,6 +839,10 @@ int arm_arch_thumb2;
  int arm_arch_arm_hwdiv;
  int arm_arch_thumb_hwdiv;
  
+/* Nonzero if we should use Neon to handle 64-bits operations rather
+   than core registers.  */
+int prefer_neon_for_64bits = 0;
+
  /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
     we must report the mode of the memory reference from
     TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
@@ -936,6 +940,7 @@ const struct tune_params arm_slowmul_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  const struct tune_params arm_fastmul_tune =
@@ -950,6 +955,7 @@ const struct tune_params arm_fastmul_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  /* StrongARM has early execution of branches, so a sequence that is worth
@@ -967,6 +973,7 @@ const struct tune_params arm_strongarm_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  const struct tune_params arm_xscale_tune =
@@ -981,6 +988,7 @@ const struct tune_params arm_xscale_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  const struct tune_params arm_9e_tune =
@@ -995,6 +1003,7 @@ const struct tune_params arm_9e_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  const struct tune_params arm_v6t2_tune =
@@ -1009,6 +1018,7 @@ const struct tune_params arm_v6t2_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -1024,6 +1034,7 @@ const struct tune_params arm_cortex_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  const struct tune_params arm_cortex_a15_tune =
@@ -1038,6 +1049,7 @@ const struct tune_params arm_cortex_a15_tune =
    true,                                                /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  /* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -1055,6 +1067,7 @@ const struct tune_params arm_cortex_a5_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {false, false},                              /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  const struct tune_params arm_cortex_a9_tune =
@@ -1069,6 +1082,7 @@ const struct tune_params arm_cortex_a9_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
@@ -1085,6 +1099,7 @@ const struct tune_params arm_v6m_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {false, false},                              /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  const struct tune_params arm_fa726te_tune =
@@ -1099,6 +1114,7 @@ const struct tune_params arm_fa726te_tune =
    false,                                       /* Prefer LDRD/STRD.  */
    {true, true},                                        /* Prefer non short circuit.  */
    &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false                                         /* Prefer Neon for 64-bits bitops.  */
  };
  
  
@@ -2129,6 +2145,12 @@ arm_option_override (void)
                             global_options.x_param_values,
                             global_options_set.x_param_values);
  
+  /* Use Neon to perform 64-bits operations rather than core
+     registers.  */
+  prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
+  if (use_neon_for_64bits == 1)
+     prefer_neon_for_64bits = true;
+
    /* Use the alternative scheduling-pressure algorithm by default.  */
    maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
                           global_options.x_param_values,
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

index 05aea3577d70ceedc2780bbb03f3d11499091f91..04bff13368eda2a6f8ba582838f11c75e12fa868 100644 (file)
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -354,6 +354,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
  #define TARGET_IDIV            ((TARGET_ARM && arm_arch_arm_hwdiv) \
                                  || (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
  
+/* Should NEON be used for 64-bits bitops.  */
+#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
+
  /* True iff the full BPABI is being used.  If TARGET_BPABI is true,
     then TARGET_AAPCS_BASED must be true -- but the converse does not
     hold.  TARGET_BPABI implies the use of the BPABI runtime library,
@@ -539,6 +542,10 @@ extern int arm_arch_arm_hwdiv;
  /* Nonzero if chip supports integer division instruction in Thumb mode.  */
  extern int arm_arch_thumb_hwdiv;
  
+/* Nonzero if we should use Neon to handle 64-bits operations rather
+   than core registers.  */
+extern int prefer_neon_for_64bits;
+
  #ifndef TARGET_DEFAULT
  #define TARGET_DEFAULT  (MASK_APCS_FRAME)
  #endif
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md

index f3c59f37c8596e32b831b19a347d6a890b230262..d48bc8c01239b3c8c8e470b9220727535310cc14 100644 (file)
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -94,7 +94,7 @@
  ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
  ; arm_arch6.  This attribute is used to compute attribute "enabled",
  ; use type "any" to enable an alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8,iwmmxt,iwmmxt2"
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
    (const_string "any"))
  
  (define_attr "arch_enabled" "no,yes"
@@ -129,22 +129,14 @@
               (match_test "TARGET_32BIT && !arm_arch6"))
          (const_string "yes")
  
-        (and (eq_attr "arch" "onlya8")
-             (eq_attr "tune" "cortexa8"))
+        (and (eq_attr "arch" "avoid_neon_for_64bits")
+             (match_test "TARGET_NEON")
+             (not (match_test "TARGET_PREFER_NEON_64BITS")))
          (const_string "yes")
  
-        (and (eq_attr "arch" "neon_onlya8")
-             (eq_attr "tune" "cortexa8")
-             (match_test "TARGET_NEON"))
-        (const_string "yes")
-
-        (and (eq_attr "arch" "nota8")
-             (not (eq_attr "tune" "cortexa8")))
-        (const_string "yes")
-
-        (and (eq_attr "arch" "neon_nota8")
-             (not (eq_attr "tune" "cortexa8"))
-             (match_test "TARGET_NEON"))
+        (and (eq_attr "arch" "neon_for_64bits")
+             (match_test "TARGET_NEON")
+             (match_test "TARGET_PREFER_NEON_64BITS"))
          (const_string "yes")
  
          (and (eq_attr "arch" "iwmmxt2")
@@ -4330,7 +4322,7 @@
    [(set_attr "length" "*,8,8,*")
     (set_attr "predicable" "no,yes,yes,no")
     (set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "arch" "neon_nota8,*,*,neon_onlya8")]
+   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
  )
  
  (define_expand "one_cmplsi2"
@@ -4498,7 +4490,7 @@
    "TARGET_32BIT <qhs_zextenddi_cond>"
    "#"
    [(set_attr "length" "8,4,8,8")
-   (set_attr "arch" "neon_nota8,*,*,neon_onlya8")
+   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")
     (set_attr "ce_count" "2")
     (set_attr "predicable" "yes")]
  )
@@ -4513,7 +4505,7 @@
     (set_attr "ce_count" "2")
     (set_attr "shift" "1")
     (set_attr "predicable" "yes")
-   (set_attr "arch" "neon_nota8,*,a,t,neon_onlya8")]
+   (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")]
  )
  
  ;; Splits for all extensions to DImode
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt

index e778407ab4c5112ac498c8b853d409fe68bcc5a3..afb42421c0656b924592b8c9d4a7401e0fc45c4a 100644 (file)
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -247,3 +247,7 @@ that may trigger Cortex-M3 errata.
  munaligned-access
  Target Report Var(unaligned_access) Init(2)
  Enable unaligned word and halfword accesses to packed data.
+
+mneon-for-64bits
+Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
+Use Neon to perform 64-bits operations rather than core registers.
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md

index 79b3f667c1727dd424ecf20950618b352cd128d3..153475202d6dd6e124fe59036144350cab6eabe8 100644 (file)
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -487,7 +487,7 @@
    [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
     (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
     (set_attr "length" "*,8,8,*,8,8,8")
-   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
+   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
  )
  
  (define_insn "*sub<mode>3_neon"
@@ -524,7 +524,7 @@
    [(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2")
     (set_attr "conds" "*,clob,clob,clob,*")
     (set_attr "length" "*,8,8,8,*")
-   (set_attr "arch" "nota8,*,*,*,onlya8")]
+   (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
  )
  
  (define_insn "*mul<mode>3_neon"
@@ -699,7 +699,7 @@
  }
    [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
     (set_attr "length" "*,*,8,8,*,*")
-   (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
+   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
  )
  
  ;; The concrete forms of the Neon immediate-logic instructions are vbic and
@@ -744,7 +744,7 @@
  }
    [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
     (set_attr "length" "*,*,8,8,*,*")
-   (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
+   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
  )
  
  (define_insn "orn<mode>3_neon"
@@ -840,7 +840,7 @@
     veor\t%P0, %P1, %P2"
    [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
     (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "nota8,*,*,onlya8")]
+   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
  )
  
  (define_insn "one_cmpl<mode>2"
@@ -1162,7 +1162,7 @@
        }
      DONE;
    }"
-  [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
     (set_attr "opt" "*,*,speed,speed,*,*")]
  )
  
@@ -1263,7 +1263,7 @@
  
      DONE;
    }"
-  [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
     (set_attr "opt" "*,*,speed,speed,*,*")]
  )
  
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 6cf142f81b061bd81d61a92a5d49e09133ef0b13..9b8b36a52e3a386dc4cc28d758498d27c30aafe3 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -510,7 +510,8 @@ Objective-C and Objective-C++ Dialects}.
  -mtp=@var{name} -mtls-dialect=@var{dialect} @gol
  -mword-relocations @gol
  -mfix-cortex-m3-ldrd @gol
--munaligned-access}
+-munaligned-access @gol
+-mneon-for-64bits}
  
  @emph{AVR Options}
  @gccoptlist{-mmcu=@var{mcu} -maccumulate-args -mbranch-cost=@var{cost} @gol
@@ -11530,6 +11531,11 @@ setting of this option.  If unaligned access is enabled then the
  preprocessor symbol @code{__ARM_FEATURE_UNALIGNED} will also be
  defined.
  
+@item -mneon-for-64bits
+@opindex mneon-for-64bits
+Enables using Neon to handle scalar 64-bits operations. This is
+disabled by default since the cost of moving data from core registers
+to Neon is high.
  @end table
  
  @node AVR Options
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 61729b0fc6e656f4897bfdd4260214ffbb1e3aba..eaa6bdcf1a7e2059c15bffd91ea51046f9daaea4 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2013-03-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+       * gcc.target/arm/neon-for-64bits-1.c: New tests.
+       * gcc.target/arm/neon-for-64bits-2.c: Likewise.
+
  2013-03-21  Richard Biener  <rguenther@suse.de>
  
         * gcc.dg/vect/vect-outer-3a-big-array.c: Adjust.
@@ -19,7 +24,6 @@
  2013-03-20  Jeff Law  <law@redhat.com>
  
         * g++.dg/tree-ssa/ssa-dom.C: New test.
-       
  
  2013-03-20  Michael Meissner  <meissner@linux.vnet.ibm.com>
  
diff --git a/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c b/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c

new file mode 100644 (file)

index 0000000..a2a4103
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
@@ -0,0 +1,54 @@
+/* Check that Neon is *not* used by default to handle 64-bits scalar
+   operations.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+typedef long long i64;
+typedef unsigned long long u64;
+typedef unsigned int u32;
+typedef int i32;
+
+/* Unary operators */
+#define UNARY_OP(name, op) \
+  void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
+
+/* Binary operators */
+#define BINARY_OP(name, op) \
+  void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
+
+/* Unsigned shift */
+#define SHIFT_U(name, op, amount) \
+  void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
+
+/* Signed shift */
+#define SHIFT_S(name, op, amount) \
+  void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
+
+UNARY_OP(not, ~)
+
+BINARY_OP(add, +)
+BINARY_OP(sub, -)
+BINARY_OP(and, &)
+BINARY_OP(or, |)
+BINARY_OP(xor, ^)
+
+SHIFT_U(right1, >>, 1)
+SHIFT_U(right2, >>, 2)
+SHIFT_U(right5, >>, 5)
+SHIFT_U(rightn, >>, c)
+
+SHIFT_S(right1, >>, 1)
+SHIFT_S(right2, >>, 2)
+SHIFT_S(right5, >>, 5)
+SHIFT_S(rightn, >>, c)
+
+/* { dg-final {scan-assembler-times "vmvn" 0} }  */
+/* { dg-final {scan-assembler-times "vadd" 0} }  */
+/* { dg-final {scan-assembler-times "vsub" 0} }  */
+/* { dg-final {scan-assembler-times "vand" 0} }  */
+/* { dg-final {scan-assembler-times "vorr" 0} }  */
+/* { dg-final {scan-assembler-times "veor" 0} }  */
+/* { dg-final {scan-assembler-times "vshr" 0} }  */
diff --git a/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c b/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c

new file mode 100644 (file)

index 0000000..035bfb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
@@ -0,0 +1,57 @@
+/* Check that Neon is used to handle 64-bits scalar operations.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mneon-for-64bits" } */
+/* { dg-add-options arm_neon } */
+
+typedef long long i64;
+typedef unsigned long long u64;
+typedef unsigned int u32;
+typedef int i32;
+
+/* Unary operators */
+#define UNARY_OP(name, op) \
+  void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
+
+/* Binary operators */
+#define BINARY_OP(name, op) \
+  void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
+
+/* Unsigned shift */
+#define SHIFT_U(name, op, amount) \
+  void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
+
+/* Signed shift */
+#define SHIFT_S(name, op, amount) \
+  void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
+
+UNARY_OP(not, ~)
+
+BINARY_OP(add, +)
+BINARY_OP(sub, -)
+BINARY_OP(and, &)
+BINARY_OP(or, |)
+BINARY_OP(xor, ^)
+
+SHIFT_U(right1, >>, 1)
+SHIFT_U(right2, >>, 2)
+SHIFT_U(right5, >>, 5)
+SHIFT_U(rightn, >>, c)
+
+SHIFT_S(right1, >>, 1)
+SHIFT_S(right2, >>, 2)
+SHIFT_S(right5, >>, 5)
+SHIFT_S(rightn, >>, c)
+
+/* { dg-final {scan-assembler-times "vmvn" 1} }  */
+/* Two vadd: 1 in unary_not, 1 in binary_add */
+/* { dg-final {scan-assembler-times "vadd" 2} }  */
+/* { dg-final {scan-assembler-times "vsub" 1} }  */
+/* { dg-final {scan-assembler-times "vand" 1} }  */
+/* { dg-final {scan-assembler-times "vorr" 1} }  */
+/* { dg-final {scan-assembler-times "veor" 1} }  */
+/* 6 vshr for right shifts by constant, and variable right shift uses
+   vshl with a negative amount in register.  */
+/* { dg-final {scan-assembler-times "vshr" 6} }  */
+/* { dg-final {scan-assembler-times "vshl" 2} }  */
author	Christophe Lyon <christophe.lyon@linaro.org>
	Thu, 21 Mar 2013 14:26:23 +0000 (14:26 +0000)
committer	Christophe Lyon <clyon@gcc.gnu.org>
	Thu, 21 Mar 2013 14:26:23 +0000 (15:26 +0100)
gcc/ChangeLog		patch \| blob \| history
gcc/config/arm/arm-protos.h		patch \| blob \| history
gcc/config/arm/arm.c		patch \| blob \| history
gcc/config/arm/arm.h		patch \| blob \| history
gcc/config/arm/arm.md		patch \| blob \| history
gcc/config/arm/arm.opt		patch \| blob \| history
gcc/config/arm/neon.md		patch \| blob \| history
gcc/doc/invoke.texi		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c	[new file with mode: 0644]	patch \| blob