arm.c (neon_builtin_type_mode): Add T_V4HF.

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Thu, 25 Apr 2013 10:54:39 +0000 (10:54 +0000)

committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>

Thu, 25 Apr 2013 10:54:39 +0000 (10:54 +0000)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Thu, 25 Apr 2013 10:54:39 +0000 (10:54 +0000)
committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>
Thu, 25 Apr 2013 10:54:39 +0000 (10:54 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 022036f8842f7f3688bf6a39d6c2d04e667518a7..e4d576778c64dbc8e4e7be9cbf61e0f1fd5f04b8 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,43 @@
+2013-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+            Julian Brown  <julian@codesourcery.com>
+
+       * config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF.
+       (TB_DREG): Add T_V4HF.
+       (v4hf_UP): New macro.
+       (neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW.
+       (arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN,
+       NEON_FLOAT_NARROW.
+       Handle initialisation of V4HF. Adjust initialisation of reinterpret
+       built-ins.
+       (arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN,
+       NEON_FLOAT_NARROW.
+       (arm_vector_mode_supported_p): Handle V4HF.
+       (arm_mangle_map): Handle V4HFmode.
+       * config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF.
+       * config/arm/arm_neon_builtins.def: Add entries for
+       vcvtv4hfv4sf, vcvtv4sfv4hf.
+       * config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern.
+       (neon_vcvtv4hfv4sf): Likewise.
+       * config/arm/neon-gen.ml: Handle half-precision floating point
+       features.
+       * config/arm/neon-testgen.ml: Handle Requires_FP_bit feature.
+       * config/arm/arm_neon.h: Regenerate.
+       * config/arm/neon.ml (type elts): Add F16.
+       (type vectype): Add T_float16x4, T_floatHF.
+       (type vecmode): Add V4HF.
+       (type features): Add Requires_FP_bit feature.
+       (elt_width): Handle F16.
+       (elt_class): Likewise.
+       (elt_of_class_width): Likewise.
+       (mode_of_elt): Refactor.
+       (type_for_elt): Handle F16, fix error messages.
+       (vectype_size): Handle T_float16x4.
+       (vcvt_sh): New function.
+       (ops): Add entries for vcvt_f16_f32, vcvt_f32_f16.
+       (string_of_vectype): Handle T_floatHF, T_float16, T_float16x4.
+       (string_of_mode): Handle V4HF.
+       * doc/arm-neon-intrinsics.texi: Regenerate.
+
  2013-04-25  James Greenhalgh  <james.greenhalgh@arm.com>
  
         * config/aarch64/aarch64.c (aarch64_print_operand): Fix asm_fprintf
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

index 464d91c88ab4ad7d30acdccef83c72d6bbd5f788..bc4247358ebad52e4db2c77364fc4f5b4355875e 100644 (file)
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -19940,6 +19940,7 @@ arm_debugger_arg_offset (int value, rtx addr)
  typedef enum {
    T_V8QI,
    T_V4HI,
+  T_V4HF,
    T_V2SI,
    T_V2SF,
    T_DI,
@@ -19957,14 +19958,15 @@ typedef enum {
  #define TYPE_MODE_BIT(X) (1 << (X))
  
  #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)       \
-                | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF)      \
-                | TYPE_MODE_BIT (T_DI))
+                | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI)      \
+                | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
  #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)      \
                  | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)      \
                  | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
  
  #define v8qi_UP  T_V8QI
  #define v4hi_UP  T_V4HI
+#define v4hf_UP  T_V4HF
  #define v2si_UP  T_V2SI
  #define v2sf_UP  T_V2SF
  #define di_UP    T_DI
@@ -20000,6 +20002,8 @@ typedef enum {
    NEON_SCALARMULH,
    NEON_SCALARMAC,
    NEON_CONVERT,
+  NEON_FLOAT_WIDEN,
+  NEON_FLOAT_NARROW,
    NEON_FIXCONV,
    NEON_SELECT,
    NEON_RESULTPAIR,
@@ -20393,6 +20397,7 @@ arm_init_neon_builtins (void)
  
    tree neon_intQI_type_node;
    tree neon_intHI_type_node;
+  tree neon_floatHF_type_node;
    tree neon_polyQI_type_node;
    tree neon_polyHI_type_node;
    tree neon_intSI_type_node;
@@ -20419,6 +20424,7 @@ arm_init_neon_builtins (void)
  
    tree V8QI_type_node;
    tree V4HI_type_node;
+  tree V4HF_type_node;
    tree V2SI_type_node;
    tree V2SF_type_node;
    tree V16QI_type_node;
@@ -20473,6 +20479,9 @@ arm_init_neon_builtins (void)
    neon_float_type_node = make_node (REAL_TYPE);
    TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
    layout_type (neon_float_type_node);
+  neon_floatHF_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
+  layout_type (neon_floatHF_type_node);
  
    /* Define typedefs which exactly correspond to the modes we are basing vector
       types on.  If you change these names you'll need to change
@@ -20481,6 +20490,8 @@ arm_init_neon_builtins (void)
                                              "__builtin_neon_qi");
    (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
                                              "__builtin_neon_hi");
+  (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
+                                            "__builtin_neon_hf");
    (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
                                              "__builtin_neon_si");
    (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
@@ -20522,6 +20533,8 @@ arm_init_neon_builtins (void)
      build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
    V4HI_type_node =
      build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
+  V4HF_type_node =
+    build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
    V2SI_type_node =
      build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
    V2SF_type_node =
@@ -20644,7 +20657,7 @@ arm_init_neon_builtins (void)
        neon_builtin_datum *d = &neon_builtin_data[i];
  
        const char* const modenames[] = {
-       "v8qi", "v4hi", "v2si", "v2sf", "di",
+       "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
         "v16qi", "v8hi", "v4si", "v4sf", "v2di",
         "ti", "ei", "oi"
        };
@@ -20847,8 +20860,9 @@ arm_init_neon_builtins (void)
         case NEON_REINTERP:
           {
             /* We iterate over 5 doubleword types, then 5 quadword
-              types.  */
-           int rhs = d->mode % 5;
+              types. V4HF is not a type used in reinterpret, so we translate
+              d->mode to the correct index in reinterp_ftype_dreg.  */
+           int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
             switch (insn_data[d->code].operand[0].mode)
               {
               case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
@@ -20865,7 +20879,38 @@ arm_init_neon_builtins (void)
               }
           }
           break;
+       case NEON_FLOAT_WIDEN:
+         {
+           tree eltype = NULL_TREE;
+           tree return_type = NULL_TREE;
  
+           switch (insn_data[d->code].operand[1].mode)
+           {
+             case V4HFmode:
+               eltype = V4HF_type_node;
+               return_type = V4SF_type_node;
+               break;
+             default: gcc_unreachable ();
+           }
+           ftype = build_function_type_list (return_type, eltype, NULL);
+           break;
+         }
+       case NEON_FLOAT_NARROW:
+         {
+           tree eltype = NULL_TREE;
+           tree return_type = NULL_TREE;
+
+           switch (insn_data[d->code].operand[1].mode)
+           {
+             case V4SFmode:
+               eltype = V4SF_type_node;
+               return_type = V4HF_type_node;
+               break;
+             default: gcc_unreachable ();
+           }
+           ftype = build_function_type_list (return_type, eltype, NULL);
+           break;
+         }
         default:
           gcc_unreachable ();
         }
@@ -21862,6 +21907,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
      case NEON_DUP:
      case NEON_RINT:
      case NEON_SPLIT:
+    case NEON_FLOAT_WIDEN:
+    case NEON_FLOAT_NARROW:
      case NEON_REINTERP:
        return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
          NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
@@ -25517,7 +25564,7 @@ arm_vector_mode_supported_p (enum machine_mode mode)
  {
    /* Neon also supports V2SImode, etc. listed in the clause below.  */
    if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
-      || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
+      || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
      return true;
  
    if ((TARGET_NEON || TARGET_IWMMXT)
@@ -26361,6 +26408,7 @@ static arm_mangle_map_entry arm_mangle_map[] = {
    { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
    { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
    { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
+  { V4HFmode,  "__builtin_neon_hf",     "18__simd64_float16_t" },
    { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
    { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
    { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

index cc1774b559c0966b61729c7ee26cb6b6b1b0e2ae..c47fdf68098536ef317c04809d5d23efacda4712 100644 (file)
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1052,7 +1052,7 @@ extern int prefer_neon_for_64bits;
  /* Modes valid for Neon D registers.  */
  #define VALID_NEON_DREG_MODE(MODE) \
    ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
-   || (MODE) == V2SFmode || (MODE) == DImode)
+   || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
  
  /* Modes valid for Neon Q registers.  */
  #define VALID_NEON_QREG_MODE(MODE) \
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h

index 4d945cec558ba03f865b1b24fe6257a8f943277f..e23d03b9d10d8b4008133374bb7d5b70f84f171d 100644 (file)
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -43,6 +43,7 @@ typedef __builtin_neon_hi int16x4_t   __attribute__ ((__vector_size__ (8)));
  typedef __builtin_neon_si int32x2_t    __attribute__ ((__vector_size__ (8)));
  typedef __builtin_neon_di int64x1_t;
  typedef __builtin_neon_sf float32x2_t  __attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_hf float16x4_t  __attribute__ ((__vector_size__ (8)));
  typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8)));
  typedef __builtin_neon_poly16 poly16x4_t       __attribute__ ((__vector_size__ (8)));
  typedef __builtin_neon_uqi uint8x8_t   __attribute__ ((__vector_size__ (8)));
@@ -6016,6 +6017,22 @@ vcvtq_u32_f32 (float32x4_t __a)
    return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
  }
  
+#if ((__ARM_FP & 0x2) != 0)
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_f32 (float32x4_t __a)
+{
+  return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
+}
+
+#endif
+#if ((__ARM_FP & 0x2) != 0)
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvt_f32_f16 (float16x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
+}
+
+#endif
  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  vcvt_n_s32_f32 (float32x2_t __a, const int __b)
  {
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def

index 5bf6d31cb8844291db070eb1f419a9b0465cd043..92f1d7ad1c437b7c043c6f93c4f3bade8609f8a9 100644 (file)
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -132,6 +132,8 @@ VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
  VAR2 (UNOP, vrev16, v8qi, v16qi),
  VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
  VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
+VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
+VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
  VAR10 (SELECT, vbsl,
          v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
  VAR2 (RINT, vrintn, v2sf, v4sf),
diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml

index a811e156133f654aae5fa4180b0e0da6e8dfe6e5..948b162ccfa7f09574ae2f41a88a5378b323383f 100644 (file)
--- a/gcc/config/arm/neon-gen.ml
+++ b/gcc/config/arm/neon-gen.ml
@@ -121,6 +121,7 @@ let rec signed_ctype = function
    | T_uint16 | T_int16 -> T_intHI
    | T_uint32 | T_int32 -> T_intSI
    | T_uint64 | T_int64 -> T_intDI
+  | T_float16 -> T_floatHF
    | T_float32 -> T_floatSF
    | T_poly8 -> T_intQI
    | T_poly16 -> T_intHI
@@ -275,8 +276,8 @@ let rec mode_suffix elttype shape =
      let mode = mode_of_elt elttype shape in
      string_of_mode mode
    with MixedMode (dst, src) ->
-    let dstmode = mode_of_elt dst shape
-    and srcmode = mode_of_elt src shape in
+    let dstmode = mode_of_elt ~argpos:0 dst shape
+    and srcmode = mode_of_elt ~argpos:1 src shape in
      string_of_mode dstmode ^ string_of_mode srcmode
  
  let get_shuffle features =
@@ -291,19 +292,24 @@ let print_feature_test_start features =
      match List.find (fun feature ->
                         match feature with Requires_feature _ -> true
                                          | Requires_arch _ -> true
+                                        | Requires_FP_bit _ -> true
                                          | _ -> false)
                       features with
-      Requires_feature feature -> 
+      Requires_feature feature ->
          Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
      | Requires_arch arch ->
          Format.printf "#if __ARM_ARCH >= %d@\n" arch
+    | Requires_FP_bit bit ->
+        Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
+                      (1 lsl bit)
      | _ -> assert false
    with Not_found -> assert true
  
  let print_feature_test_end features =
    let feature =
-    List.exists (function Requires_feature x -> true
-                          | Requires_arch x -> true
+    List.exists (function Requires_feature _ -> true
+                          | Requires_arch _ -> true
+                          | Requires_FP_bit _ -> true
                            |  _ -> false) features in
    if feature then Format.printf "#endif@\n"
  
@@ -365,6 +371,7 @@ let deftypes () =
      "__builtin_neon_hi", "int", 16, 4;
      "__builtin_neon_si", "int", 32, 2;
      "__builtin_neon_di", "int", 64, 1;
+    "__builtin_neon_hf", "float", 16, 4;
      "__builtin_neon_sf", "float", 32, 2;
      "__builtin_neon_poly8", "poly", 8, 8;
      "__builtin_neon_poly16", "poly", 16, 4;
diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml

index d855c297981e1cd1da8e8e77219ca2d44166abff..543318bfcc6161b814000be507f9a79a7cd9cdbd 100644 (file)
--- a/gcc/config/arm/neon-testgen.ml
+++ b/gcc/config/arm/neon-testgen.ml
@@ -163,10 +163,12 @@ let effective_target features =
      match List.find (fun feature ->
                         match feature with Requires_feature _ -> true
                                          | Requires_arch _ -> true
+                                        | Requires_FP_bit 1 -> true
                                          | _ -> false)
                       features with
        Requires_feature "FMA" -> "arm_neonv2"
      | Requires_arch 8 -> "arm_v8_neon"
+    | Requires_FP_bit 1 -> "arm_neon_fp16"
      | _ -> assert false
    with Not_found -> "arm_neon"
  
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md

index 67f89b2fb03cd9e37baa88622699eb893838b137..f91a6f7d08bfd2499512a08baa23cf8e949aaffb 100644 (file)
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3258,6 +3258,24 @@
                     (const_string "neon_fp_vadd_qqq_vabs_qq")))]
  )
  
+(define_insn "neon_vcvtv4sfv4hf"
+  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
+       (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
+                         UNSPEC_VCVT))]
+  "TARGET_NEON && TARGET_FP16"
+  "vcvt.f32.f16\t%q0, %P1"
+  [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
+)
+
+(define_insn "neon_vcvtv4hfv4sf"
+  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
+       (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
+                         UNSPEC_VCVT))]
+  "TARGET_NEON && TARGET_FP16"
+  "vcvt.f16.f32\t%P0, %q1"
+  [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
+)
+
  (define_insn "neon_vcvt_n<mode>"
    [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml

index 34090c9655cf778bff0a83933de043f407b3a97e..ca9a4c06aa645591445c84446ea7f688234ddf13 100644 (file)
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -21,7 +21,7 @@
     <http://www.gnu.org/licenses/>.  *)
  
  (* Shorthand types for vector elements.  *)
-type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
+type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
            | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
            | Cast of elts * elts | NoElts
  
@@ -37,6 +37,7 @@ type vectype = T_int8x8    | T_int8x16
              | T_uint16x4  | T_uint16x8
              | T_uint32x2  | T_uint32x4
              | T_uint64x1  | T_uint64x2
+            | T_float16x4
              | T_float32x2 | T_float32x4
              | T_poly8x8   | T_poly8x16
              | T_poly16x4  | T_poly16x8
@@ -46,11 +47,13 @@ type vectype = T_int8x8    | T_int8x16
               | T_uint8     | T_uint16
               | T_uint32    | T_uint64
               | T_poly8     | T_poly16
-             | T_float32   | T_arrayof of int * vectype
+             | T_float16   | T_float32
+             | T_arrayof of int * vectype
               | T_ptrto of vectype | T_const of vectype
               | T_void      | T_intQI
               | T_intHI     | T_intSI
-             | T_intDI     | T_floatSF
+             | T_intDI     | T_floatHF
+             | T_floatSF
  
  (* The meanings of the following are:
       TImode : "Tetra", two registers (four words).
@@ -92,7 +95,7 @@ type arity = Arity0 of vectype
            | Arity3 of vectype * vectype * vectype * vectype
             | Arity4 of vectype * vectype * vectype * vectype * vectype
  
-type vecmode = V8QI | V4HI | V2SI | V2SF | DI
+type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
               | V16QI | V8HI | V4SI | V4SF | V2DI
               | QI | HI | SI | SF
  
@@ -284,18 +287,22 @@ type features =
    | Fixed_core_reg
      (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined.  *)
    | Requires_feature of string
+    (* Mark that the intrinsic requires a particular architecture version.  *)
    | Requires_arch of int
+    (* Mark that the intrinsic requires a particular bit in __ARM_FP to
+    be set.   *)
+  | Requires_FP_bit of int
  
  exception MixedMode of elts * elts
  
  let rec elt_width = function
      S8 | U8 | P8 | I8 | B8 -> 8
-  | S16 | U16 | P16 | I16 | B16 -> 16
+  | S16 | U16 | P16 | I16 | B16 | F16 -> 16
    | S32 | F32 | U32 | I32 | B32 -> 32
    | S64 | U64 | I64 | B64 -> 64
    | Conv (a, b) ->
        let wa = elt_width a and wb = elt_width b in
-      if wa = wb then wa else failwith "element width?"
+      if wa = wb then wa else raise (MixedMode (a, b))
    | Cast (a, b) -> raise (MixedMode (a, b))
    | NoElts -> failwith "No elts"
  
@@ -303,7 +310,7 @@ let rec elt_class = function
      S8 | S16 | S32 | S64 -> Signed
    | U8 | U16 | U32 | U64 -> Unsigned
    | P8 | P16 -> Poly
-  | F32 -> Float
+  | F16 | F32 -> Float
    | I8 | I16 | I32 | I64 -> Int
    | B8 | B16 | B32 | B64 -> Bits
    | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
@@ -315,6 +322,7 @@ let elt_of_class_width c w =
    | Signed, 16 -> S16
    | Signed, 32 -> S32
    | Signed, 64 -> S64
+  | Float, 16 -> F16
    | Float, 32 -> F32
    | Unsigned, 8 -> U8
    | Unsigned, 16 -> U16
@@ -384,7 +392,12 @@ let find_key_operand operands =
    in
      scan ((Array.length operands) - 1)
  
-let rec mode_of_elt elt shape =
+(* Find a vecmode from a shape_elt ELT for an instruction with shape_form
+   SHAPE.  For a Use_operands shape, if ARGPOS is passed then return the mode
+   for the given argument position, else determine which argument to return a
+   mode for automatically.  *)
+
+let rec mode_of_elt ?argpos elt shape =
    let flt = match elt_class elt with
      Float | ConvClass(_, Float) -> true | _ -> false in
    let idx =
@@ -394,7 +407,10 @@ let rec mode_of_elt elt shape =
    in match shape with
      All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
    | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
-      [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
+      if flt then
+        [| V8QI; V4HF; V2SF; DI |].(idx)
+      else
+        [| V8QI; V4HI; V2SI; DI |].(idx)
    | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
    | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
        [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
@@ -404,7 +420,11 @@ let rec mode_of_elt elt shape =
    | Long_imm ->
        [| V8QI; V4HI; V2SI; DI |].(idx)
    | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
-  | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
+  | Use_operands ops ->
+      begin match argpos with
+        None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
+      | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
+      end
    | _ -> failwith "invalid shape"
  
  (* Modify an element type dependent on the shape of the instruction and the
@@ -454,10 +474,11 @@ let type_for_elt shape elt no =
          | U16 -> T_uint16x4
          | U32 -> T_uint32x2
          | U64 -> T_uint64x1
+        | F16 -> T_float16x4
          | F32 -> T_float32x2
          | P8 -> T_poly8x8
          | P16 -> T_poly16x4
-        | _ -> failwith "Bad elt type"
+        | _ -> failwith "Bad elt type for Dreg"
          end
      | Qreg ->
          begin match elt with
@@ -472,7 +493,7 @@ let type_for_elt shape elt no =
          | F32 -> T_float32x4
          | P8 -> T_poly8x16
          | P16 -> T_poly16x8
-        | _ -> failwith "Bad elt type"
+        | _ -> failwith "Bad elt type for Qreg"
          end
      | Corereg ->
          begin match elt with
@@ -487,7 +508,7 @@ let type_for_elt shape elt no =
          | P8 -> T_poly8
          | P16 -> T_poly16
          | F32 -> T_float32
-        | _ -> failwith "Bad elt type"
+        | _ -> failwith "Bad elt type for Corereg"
          end
      | Immed ->
          T_immediate (0, 0)
@@ -506,7 +527,7 @@ let type_for_elt shape elt no =
  let vectype_size = function
      T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
    | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
-  | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
+  | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
    | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
    | T_uint8x16 | T_uint16x8  | T_uint32x4  | T_uint64x2
    | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
@@ -1217,6 +1238,10 @@ let ops =
        [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
      Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
        [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+    Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
+          Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
+    Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
+          Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
      Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
        [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
      Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
@@ -1782,7 +1807,7 @@ let rec string_of_elt = function
    | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
    | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
    | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
-  | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
+  | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
    | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
    | NoElts -> failwith "No elts"
  
@@ -1809,6 +1834,7 @@ let string_of_vectype vt =
    | T_uint32x4 -> affix "uint32x4"
    | T_uint64x1 -> affix "uint64x1"
    | T_uint64x2 -> affix "uint64x2"
+  | T_float16x4 -> affix "float16x4"
    | T_float32x2 -> affix "float32x2"
    | T_float32x4 -> affix "float32x4"
    | T_poly8x8 -> affix "poly8x8"
@@ -1825,6 +1851,7 @@ let string_of_vectype vt =
    | T_uint64 -> affix "uint64"
    | T_poly8 -> affix "poly8"
    | T_poly16 -> affix "poly16"
+  | T_float16 -> affix "float16"
    | T_float32 -> affix "float32"
    | T_immediate _ -> "const int"
    | T_void -> "void"
@@ -1832,6 +1859,7 @@ let string_of_vectype vt =
    | T_intHI -> "__builtin_neon_hi"
    | T_intSI -> "__builtin_neon_si"
    | T_intDI -> "__builtin_neon_di"
+  | T_floatHF -> "__builtin_neon_hf"
    | T_floatSF -> "__builtin_neon_sf"
    | T_arrayof (num, base) ->
        let basename = name (fun x -> x) base in
@@ -1853,10 +1881,10 @@ let string_of_inttype = function
    | B_XImode -> "__builtin_neon_xi"
  
  let string_of_mode = function
-    V8QI -> "v8qi" | V4HI  -> "v4hi"  | V2SI -> "v2si" | V2SF -> "v2sf"
-  | DI   -> "di"   | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
-  | V4SF -> "v4sf" | V2DI  -> "v2di"  | QI -> "qi" | HI -> "hi" | SI -> "si"
-  | SF -> "sf"
+    V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF  -> "v4hf"  | V2SI -> "v2si"
+  | V2SF -> "v2sf" | DI   -> "di"   | V16QI -> "v16qi" | V8HI -> "v8hi"
+  | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI  -> "v2di"  | QI   -> "qi"
+  | HI -> "hi" | SI -> "si" | SF -> "sf"
  
  (* Use uppercase chars for letters which form part of the intrinsic name, but
     should be omitted from the builtin name (the info is passed in an extra
diff --git a/gcc/doc/arm-neon-intrinsics.texi b/gcc/doc/arm-neon-intrinsics.texi

index af8eff012f78c91993755ef61751814b1a70f0a1..fcd6c0f5305c027b5921f8a7bfa72e36a31032e3 100644 (file)
--- a/gcc/doc/arm-neon-intrinsics.texi
+++ b/gcc/doc/arm-neon-intrinsics.texi
@@ -5747,6 +5747,18 @@
  @end itemize
  
  
+@itemize @bullet
+@item float16x4_t vcvt_f16_f32 (float32x4_t)
+@*@emph{Form of expected instruction(s):} @code{vcvt.f16.f32 @var{d0}, @var{q0}}
+@end itemize
+
+
+@itemize @bullet
+@item float32x4_t vcvt_f32_f16 (float16x4_t)
+@*@emph{Form of expected instruction(s):} @code{vcvt.f32.f16 @var{q0}, @var{d0}}
+@end itemize
+
+
  @itemize @bullet
  @item float32x2_t vcvt_n_f32_u32 (uint32x2_t, const int)
  @*@emph{Form of expected instruction(s):} @code{vcvt.f32.u32 @var{d0}, @var{d0}, #@var{0}}
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 2eaa45366e01869c7b73eae37f1fefb5084bd8ed..0bc2c8350f28e8bd00661d59a6347656c9ccd048 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,12 @@
+2013-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * lib/target-supports.exp
+       (check_effective_target_arm_neon_fp16_ok_nocache): New procedure.
+       (check_effective_target_arm_neon_fp16_ok): Likewise.
+       (add_options_for_arm_neon_fp16): Likewise.
+       * gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated.
+       * gcc.target/arm/neon/vcvtf32_f16.c: Likewise.
+
  2013-04-24  Vladimir Makarov  <vmakarov@redhat.com>
  
         PR rtl-optimizations/57046
diff --git a/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c b/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c

new file mode 100644 (file)

index 0000000..6675596
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c
@@ -0,0 +1,20 @@
+/* Test the `vcvtf16_f32' ARM Neon intrinsic.  */
+/* This file was autogenerated by neon-testgen.  */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_neon_fp16_ok } */
+/* { dg-options "-save-temps -O0" } */
+/* { dg-add-options arm_neon_fp16 } */
+
+#include "arm_neon.h"
+
+void test_vcvtf16_f32 (void)
+{
+  float16x4_t out_float16x4_t;
+  float32x4_t arg0_float32x4_t;
+
+  out_float16x4_t = vcvt_f16_f32 (arg0_float32x4_t);
+}
+
+/* { dg-final { scan-assembler "vcvt\.f16.f32\[        \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[         \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c b/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c

new file mode 100644 (file)

index 0000000..dd0ce17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c
@@ -0,0 +1,20 @@
+/* Test the `vcvtf32_f16' ARM Neon intrinsic.  */
+/* This file was autogenerated by neon-testgen.  */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_neon_fp16_ok } */
+/* { dg-options "-save-temps -O0" } */
+/* { dg-add-options arm_neon_fp16 } */
+
+#include "arm_neon.h"
+
+void test_vcvtf32_f16 (void)
+{
+  float32x4_t out_float32x4_t;
+  float16x4_t arg0_float16x4_t;
+
+  out_float32x4_t = vcvt_f32_f16 (arg0_float16x4_t);
+}
+
+/* { dg-final { scan-assembler "vcvt\.f32.f16\[        \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[         \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index 4604af63b8990ec42de7a1b1cd8071418a500739..593ed06ad361e11900740a314c54af64f001d11b 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2255,6 +2255,47 @@ proc check_effective_target_arm_neon_ok { } {
                 check_effective_target_arm_neon_ok_nocache]
  }
  
+# Return 1 if this is an ARM target supporting -mfpu=neon-fp16
+# -mfloat-abi=softfp or equivalent options.  Some multilibs may be
+# incompatible with these options.  Also set et_arm_neon_flags to the
+# best options to add.
+
+proc check_effective_target_arm_neon_fp16_ok_nocache { } {
+    global et_arm_neon_fp16_flags
+    set et_arm_neon_fp16_flags ""
+    if { [check_effective_target_arm32] } {
+       foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
+                      "-mfpu=neon-fp16 -mfloat-abi=softfp"} {
+           if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
+               #include "arm_neon.h"
+               float16x4_t
+               foo (float32x4_t arg)
+               {
+                  return vcvt_f16_f32 (arg);
+               }
+           } "$flags"] } {
+               set et_arm_neon_fp16_flags $flags
+               return 1
+           }
+       }
+    }
+
+    return 0
+}
+
+proc check_effective_target_arm_neon_fp16_ok { } {
+    return [check_cached_effective_target arm_neon_fp16_ok \
+               check_effective_target_arm_neon_fp16_ok_nocache]
+}
+
+proc add_options_for_arm_neon_fp16 { flags } {
+    if { ! [check_effective_target_arm_neon_fp16_ok] } {
+       return "$flags"
+    }
+    global et_arm_neon_fp16_flags
+    return "$flags $et_arm_neon_fp16_flags"
+}
+
  # Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
  # -mfloat-abi=softfp or equivalent options.  Some multilibs may be
  # incompatible with these options.  Also set et_arm_v8_neon_flags to the
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Thu, 25 Apr 2013 10:54:39 +0000 (10:54 +0000)
committer	Kyrylo Tkachov <ktkachov@gcc.gnu.org>
	Thu, 25 Apr 2013 10:54:39 +0000 (10:54 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/arm/arm.c		patch \| blob \| history
gcc/config/arm/arm.h		patch \| blob \| history
gcc/config/arm/arm_neon.h		patch \| blob \| history
gcc/config/arm/arm_neon_builtins.def		patch \| blob \| history
gcc/config/arm/neon-gen.ml		patch \| blob \| history
gcc/config/arm/neon-testgen.ml		patch \| blob \| history
gcc/config/arm/neon.md		patch \| blob \| history
gcc/config/arm/neon.ml		patch \| blob \| history
gcc/doc/arm-neon-intrinsics.texi		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/lib/target-supports.exp		patch \| blob \| history