gcc/
authorAndrey Turetskiy <andrey.turetskiy@intel.com>
Fri, 4 Sep 2015 08:54:14 +0000 (08:54 +0000)
committerKirill Yukhin <kyukhin@gcc.gnu.org>
Fri, 4 Sep 2015 08:54:14 +0000 (08:54 +0000)
* config/i386/i386-builtin-types.def
(VOID_PFLOAT_HI_V8DI_V16SF_INT): New.
(VOID_PDOUBLE_QI_V16SI_V8DF_INT): Ditto.
(VOID_PINT_HI_V8DI_V16SI_INT): Ditto.
(VOID_PLONGLONG_QI_V16SI_V8DI_INT): Ditto.
* config/i386/i386.c
(ix86_builtins): Add IX86_BUILTIN_SCATTERALTSIV8DF,
IX86_BUILTIN_SCATTERALTDIV16SF, IX86_BUILTIN_SCATTERALTSIV8DI,
IX86_BUILTIN_SCATTERALTDIV16SI.
(ix86_init_mmx_sse_builtins): Define __builtin_ia32_scatteraltsiv8df,
__builtin_ia32_scatteraltdiv8sf, __builtin_ia32_scatteraltsiv8di,
__builtin_ia32_scatteraltdiv8si.
(ix86_expand_builtin): Handle IX86_BUILTIN_SCATTERALTSIV8DF,
IX86_BUILTIN_SCATTERALTDIV16SF, IX86_BUILTIN_SCATTERALTSIV8DI,
IX86_BUILTIN_SCATTERALTDIV16SI.
(ix86_vectorize_builtin_scatter): New.
(TARGET_VECTORIZE_BUILTIN_SCATTER): Define as
ix86_vectorize_builtin_scatter.

Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com>
Co-Authored-By: Petr Murzin <petr.murzin@intel.com>
From-SVN: r227482

gcc/ChangeLog
gcc/config/i386/i386-builtin-types.def
gcc/config/i386/i386.c

index 3b783cccbc1b92448b3d50e02d80987f1af05c6c..7ef424fad923d866a94bb41136f208818d87a41f 100644 (file)
@@ -1,3 +1,26 @@
+2015-09-04  Andrey Turetskiy  <andrey.turetskiy@intel.com>
+           Petr Murzin  <petr.murzin@intel.com>
+           Kirill Yukhin <kirill.yukhin@intel.com>
+
+       * config/i386/i386-builtin-types.def
+       (VOID_PFLOAT_HI_V8DI_V16SF_INT): New.
+       (VOID_PDOUBLE_QI_V16SI_V8DF_INT): Ditto.
+       (VOID_PINT_HI_V8DI_V16SI_INT): Ditto.
+       (VOID_PLONGLONG_QI_V16SI_V8DI_INT): Ditto.
+       * config/i386/i386.c
+       (ix86_builtins): Add IX86_BUILTIN_SCATTERALTSIV8DF,
+       IX86_BUILTIN_SCATTERALTDIV16SF, IX86_BUILTIN_SCATTERALTSIV8DI,
+       IX86_BUILTIN_SCATTERALTDIV16SI.
+       (ix86_init_mmx_sse_builtins): Define __builtin_ia32_scatteraltsiv8df,
+       __builtin_ia32_scatteraltdiv8sf, __builtin_ia32_scatteraltsiv8di,
+       __builtin_ia32_scatteraltdiv8si.
+       (ix86_expand_builtin): Handle IX86_BUILTIN_SCATTERALTSIV8DF,
+       IX86_BUILTIN_SCATTERALTDIV16SF, IX86_BUILTIN_SCATTERALTSIV8DI,
+       IX86_BUILTIN_SCATTERALTDIV16SI.
+       (ix86_vectorize_builtin_scatter): New.
+       (TARGET_VECTORIZE_BUILTIN_SCATTER): Define as
+       ix86_vectorize_builtin_scatter.
+
 2015-09-04  Andrey Turetskiy  <andrey.turetskiy@intel.com>
            Petr Murzin  <petr.murzin@intel.com>
            Kirill Yukhin <kirill.yukhin@intel.com>
index ee31ee34c484ec9a1269217d4798bd90b0ae7693..b892f086798c8ebd86fdf320a10dca4711c880f9 100644 (file)
@@ -1021,6 +1021,10 @@ DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT)
 DEF_FUNCTION_TYPE (VOID, PINT, QI, V4DI, V4SI, INT)
 DEF_FUNCTION_TYPE (VOID, PINT, QI, V2DI, V4SI, INT)
 DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V8DI, V16SF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V16SI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, HI, V8DI, V16SI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V16SI, V8DI, INT)
 
 DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT)
 DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4DI, V4DI, INT)
index c69c738caa0d57eb168f3f7cc0a9ce8544e31cfb..d78f4e7f1759d83bccbabdd1cf7725d5ee7affbd 100644 (file)
@@ -30388,6 +30388,10 @@ enum ix86_builtins
   IX86_BUILTIN_GATHER3SIV16SI,
   IX86_BUILTIN_GATHER3SIV8DF,
   IX86_BUILTIN_GATHER3SIV8DI,
+  IX86_BUILTIN_SCATTERALTSIV8DF,
+  IX86_BUILTIN_SCATTERALTDIV16SF,
+  IX86_BUILTIN_SCATTERALTSIV8DI,
+  IX86_BUILTIN_SCATTERALTDIV16SI,
   IX86_BUILTIN_SCATTERDIV16SF,
   IX86_BUILTIN_SCATTERDIV16SI,
   IX86_BUILTIN_SCATTERDIV8DF,
@@ -34204,6 +34208,21 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
               VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
               IX86_BUILTIN_SCATTERDIV2DI);
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
+              VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
+              IX86_BUILTIN_SCATTERALTSIV8DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
+              VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
+              IX86_BUILTIN_SCATTERALTDIV16SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
+              VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
+              IX86_BUILTIN_SCATTERALTSIV8DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
+              VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
+              IX86_BUILTIN_SCATTERALTDIV16SI);
 
   /* AVX512PF */
   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
@@ -39860,6 +39879,18 @@ rdseed_step:
     case IX86_BUILTIN_GATHERPFDPD:
       icode = CODE_FOR_avx512pf_gatherpfv8sidf;
       goto vec_prefetch_gen;
+    case IX86_BUILTIN_SCATTERALTSIV8DF:
+      icode = CODE_FOR_avx512f_scattersiv8df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTDIV16SF:
+      icode = CODE_FOR_avx512f_scatterdiv16sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTSIV8DI:
+      icode = CODE_FOR_avx512f_scattersiv8di;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTDIV16SI:
+      icode = CODE_FOR_avx512f_scatterdiv16si;
+      goto scatter_gen;
     case IX86_BUILTIN_GATHERPFDPS:
       icode = CODE_FOR_avx512pf_gatherpfv16sisf;
       goto vec_prefetch_gen;
@@ -40123,6 +40154,36 @@ rdseed_step:
       mode3 = insn_data[icode].operand[3].mode;
       mode4 = insn_data[icode].operand[4].mode;
 
+      /* Scatter instruction stores operand op3 to memory with
+        indices from op2 and scale from op4 under writemask op1.
+        If index operand op2 has more elements then source operand
+        op3 one need to use only its low half. And vice versa.  */
+      switch (fcode)
+       {
+       case IX86_BUILTIN_SCATTERALTSIV8DF:
+       case IX86_BUILTIN_SCATTERALTSIV8DI:
+         half = gen_reg_rtx (V8SImode);
+         if (!nonimmediate_operand (op2, V16SImode))
+           op2 = copy_to_mode_reg (V16SImode, op2);
+         emit_insn (gen_vec_extract_lo_v16si (half, op2));
+         op2 = half;
+         break;
+       case IX86_BUILTIN_SCATTERALTDIV16SF:
+       case IX86_BUILTIN_SCATTERALTDIV16SI:
+         half = gen_reg_rtx (mode3);
+         if (mode3 == V8SFmode)
+           gen = gen_vec_extract_lo_v16sf;
+         else
+           gen = gen_vec_extract_lo_v16si;
+         if (!nonimmediate_operand (op3, GET_MODE (op3)))
+           op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+         emit_insn (gen (half, op3));
+         op3 = half;
+         break;
+       default:
+         break;
+       }
+
       /* Force memory operand only with base register here.  But we
         don't want to do it on memory operand for other builtin
         functions.  */
@@ -41202,6 +41263,62 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
   return ix86_get_builtin (code);
 }
 
+/* Returns a decl of a function that implements scatter store with
+   register type VECTYPE and index type INDEX_TYPE and SCALE.
+   Return NULL_TREE if it is not available.  */
+
+static tree
+ix86_vectorize_builtin_scatter (const_tree vectype,
+                               const_tree index_type, int scale)
+{
+  bool si;
+  enum ix86_builtins code;
+
+  if (!TARGET_AVX512F)
+    return NULL_TREE;
+
+  if ((TREE_CODE (index_type) != INTEGER_TYPE
+       && !POINTER_TYPE_P (index_type))
+      || (TYPE_MODE (index_type) != SImode
+         && TYPE_MODE (index_type) != DImode))
+    return NULL_TREE;
+
+  if (TYPE_PRECISION (index_type) > POINTER_SIZE)
+    return NULL_TREE;
+
+  /* v*scatter* insn sign extends index to pointer mode.  */
+  if (TYPE_PRECISION (index_type) < POINTER_SIZE
+      && TYPE_UNSIGNED (index_type))
+    return NULL_TREE;
+
+  /* Scale can be 1, 2, 4 or 8.  */
+  if (scale <= 0
+      || scale > 8
+      || (scale & (scale - 1)) != 0)
+    return NULL_TREE;
+
+  si = TYPE_MODE (index_type) == SImode;
+  switch (TYPE_MODE (vectype))
+    {
+    case V8DFmode:
+      code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
+      break;
+    case V8DImode:
+      code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
+      break;
+    case V16SFmode:
+      code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
+      break;
+    case V16SImode:
+      code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
+      break;
+    default:
+      return NULL_TREE;
+    }
+
+  return ix86_builtins[code];
+}
+
 /* Returns a code for a target-specific builtin that implements
    reciprocal of the function, or NULL_TREE if not available.  */
 
@@ -52332,6 +52449,9 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
 #undef TARGET_VECTORIZE_BUILTIN_GATHER
 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
 
+#undef TARGET_VECTORIZE_BUILTIN_SCATTER
+#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
+
 #undef TARGET_BUILTIN_RECIPROCAL
 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal