i386.c (override_options): Initialize ix86_veclib_handler to ix86_veclibabi_svml...
authorUros Bizjak <uros@gcc.gnu.org>
Fri, 28 Mar 2008 18:17:00 +0000 (19:17 +0100)
committerUros Bizjak <uros@gcc.gnu.org>
Fri, 28 Mar 2008 18:17:00 +0000 (19:17 +0100)
        * config/i386/i386.c (override_options): Initialize
        ix86_veclib_handler to ix86_veclibabi_svml when
        -mveclibabi=svml is used.
        (ix86_veclibabi_svml): New function for SVML ABI style
        vectorization support.
        * doc/invoke.texi (-mveclibabi) [svml]: Document new target option.

testsuite/ChangeLog:

        * gcc.target/i386/vectorize6.c: New test.

From-SVN: r133692

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/doc/invoke.texi
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/vectorize6.c [new file with mode: 0644]

index 16590e4a9327f4c43cc545694044c99bbe0fd2eb..7f0fe265f4e39759cf7593d022a6ac1adc7f6c40 100644 (file)
@@ -1,3 +1,12 @@
+2008-03-28  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.c (override_options): Initialize
+       ix86_veclib_handler to ix86_veclibabi_svml when
+       -mveclibabi=svml is used.
+       (ix86_veclibabi_svml): New function for SVML ABI style
+       vectorization support.
+       * doc/invoke.texi (-mveclibabi) [svml]: Document new target option.
+
 2008-03-28  Rafael Espindola  <espindola@google.com>
 
        * fold-const.c (tree_unary_nonnegative_warnv_p): Make it public.
@@ -34,8 +43,7 @@
        ASSERT_EXPR <name, expr OP limit>.
        (register_edge_assert_for_1): Adjust callers.
        (find_assert_locations): Likewise.
-       (process_assert_insertions_for): Build condition from
-       expression.
+       (process_assert_insertions_for): Build condition from expression.
        (extract_range_from_assert): Handle ASSERT_EXPRs
        of the form ASSERT_EXPR <name, expr OP limit>.
        (register_edge_assert_for_2): New helper registering
@@ -54,7 +62,7 @@
 2008-03-28  Nick Clifton  <nickc@redhat.com>
 
        * config/mn10300/mn10300.c (mn10300_secondary_reload_class):
-        Return GENERAL_REGS for stack adjustment reloads.
+       Return GENERAL_REGS for stack adjustment reloads.
 
 2008-03-28  Andrew Pinski  <andrew_pinski@playstation.sony.com>
 
        tree_to_aff_combination_expand.
        (get_inner_reference_aff): New function.
        * tree-parloops.c (loop_parallel_p): Free vectorizer info.
-        * tree-ssa-loop-im.c: Include tree-affine.h and pointer-set.h.
-        (struct lim_aux_data): sm_done field removed.
-        (mem_ref_loc_p, mem_ref_locs_p): New types.
-        (struct mem_ref): Added id, stored, accesses_in_loop,
-        indep_loop, dep_loop, indep_ref, dep_ref fields.
-        Removed is_stored, locs and next fields.
-        (memory_accesses): New variable.
-        (movement_possibility): Do not allow moving statements
-        that store to memory.
-        (outermost_indep_loop, simple_mem_ref_in_stmt, mem_ref_in_stmt):
-        New functions.
-        (determine_max_movement): For statements with memory references,
-        find the outermost loop in that the reference is independent.
-        (move_computations_stmt): Mark the virtual operands for
-        renaming.
-        (memref_free, mem_ref_alloc, mem_ref_locs_alloc, mark_ref_stored,
-        gather_mem_refs_stmt, gather_mem_refs_in_loops, vtoe_hash, vtoe_eq,
-        vtoe_free, record_vop_access, get_vop_accesses, get_vop_stores,
-        add_vop_ref_mapping, create_vop_ref_mapping_loop,
-        create_vop_ref_mapping, analyze_memory_references,
-        cannot_overlap_p, mem_refs_may_alias_p, rewrite_mem_ref_loc,
-        get_all_locs_in_loop, ref_always_accessed_p,
-        refs_independent_p, record_indep_loop, ref_indep_loop_p_1,
-        ref_indep_loop_p, can_sm_ref_p, find_refs_for_sm,
-        store_motion_loop, store_motion): New functions.
-        (struct vop_to_refs_elt): New type.
-        (record_mem_ref_loc, free_mem_ref_locs, rewrite_mem_refs,
-        memref_hash, memref_eq, hoist_memory_references): Rewritten.
-        (schedule_sm): Replaced by...
-        (execute_sm): ... this.
-        (determine_lsm_ref, hoist_memory_references,
-        loop_suitable_for_sm, gather_mem_refs_stmt, gather_mem_refs,
-        find_more_ref_vops, free_mem_ref, free_mem_refs,
-        determine_lsm_loop, determine_lsm): Removed.
-        (tree_ssa_lim_finalize): Free data structures used by store
-        motion.
-        (tree_ssa_lim): Call analyze_memory_references.  Use
-        store_motion instead of determine_lsm.
+       * tree-ssa-loop-im.c: Include tree-affine.h and pointer-set.h.
+       (struct lim_aux_data): sm_done field removed.
+       (mem_ref_loc_p, mem_ref_locs_p): New types.
+       (struct mem_ref): Added id, stored, accesses_in_loop,
+       indep_loop, dep_loop, indep_ref, dep_ref fields.
+       Removed is_stored, locs and next fields.
+       (memory_accesses): New variable.
+       (movement_possibility): Do not allow moving statements
+       that store to memory.
+       (outermost_indep_loop, simple_mem_ref_in_stmt, mem_ref_in_stmt):
+       New functions.
+       (determine_max_movement): For statements with memory references,
+       find the outermost loop in that the reference is independent.
+       (move_computations_stmt): Mark the virtual operands for renaming.
+       (memref_free, mem_ref_alloc, mem_ref_locs_alloc, mark_ref_stored,
+       gather_mem_refs_stmt, gather_mem_refs_in_loops, vtoe_hash, vtoe_eq,
+       vtoe_free, record_vop_access, get_vop_accesses, get_vop_stores,
+       add_vop_ref_mapping, create_vop_ref_mapping_loop,
+       create_vop_ref_mapping, analyze_memory_references,
+       cannot_overlap_p, mem_refs_may_alias_p, rewrite_mem_ref_loc,
+       get_all_locs_in_loop, ref_always_accessed_p,
+       refs_independent_p, record_indep_loop, ref_indep_loop_p_1,
+       ref_indep_loop_p, can_sm_ref_p, find_refs_for_sm,
+       store_motion_loop, store_motion): New functions.
+       (struct vop_to_refs_elt): New type.
+       (record_mem_ref_loc, free_mem_ref_locs, rewrite_mem_refs,
+       memref_hash, memref_eq, hoist_memory_references): Rewritten.
+       (schedule_sm): Replaced by...
+       (execute_sm): ... this.
+       (determine_lsm_ref, hoist_memory_references,
+       loop_suitable_for_sm, gather_mem_refs_stmt, gather_mem_refs,
+       find_more_ref_vops, free_mem_ref, free_mem_refs,
+       determine_lsm_loop, determine_lsm): Removed.
+       (tree_ssa_lim_finalize): Free data structures used by store motion.
+       (tree_ssa_lim): Call analyze_memory_references.  Use
+       store_motion instead of determine_lsm.
 
 2008-03-27  Paolo Bonzini  <bonzini@gnu.org>
 
 
 2008-03-23  H.J. Lu  <hongjiu.lu@intel.com>
 
-       * config/i386/i386.h (STATIC_CHAIN_REGNUM): Use R10_REG and
-       CX_REG.
+       * config/i386/i386.h (STATIC_CHAIN_REGNUM): Use R10_REG and CX_REG.
 
 2008-03-23  Zuxy Meng <zuxy.meng@gmail.com>
 
index 84a54d78bb039b000c13f76496493ecd00dae85f..4fc8fcd336ee1f030796e3e458d8eb503af9ea26 100644 (file)
@@ -1830,6 +1830,7 @@ static int ix86_isa_flags_explicit;
 
 /* Vectorization library interface and handlers.  */
 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
+static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
 
 /* Implement TARGET_HANDLE_OPTION.  */
@@ -2673,7 +2674,9 @@ override_options (void)
   /* Use external vectorized library in vectorizing intrinsics.  */
   if (ix86_veclibabi_string)
     {
-      if (strcmp (ix86_veclibabi_string, "acml") == 0)
+      if (strcmp (ix86_veclibabi_string, "svml") == 0)
+       ix86_veclib_handler = ix86_veclibabi_svml;
+      else if (strcmp (ix86_veclibabi_string, "acml") == 0)
        ix86_veclib_handler = ix86_veclibabi_acml;
       else
        error ("unknown vectorization library ABI type (%s) for "
@@ -21415,8 +21418,120 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
   return NULL_TREE;
 }
 
-/* Handler for an ACML-style interface to a library with vectorized
-   intrinsics.  */
+/* Handler for an SVML-style interface to
+   a library with vectorized intrinsics.  */
+
+static tree
+ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
+{
+  char name[20];
+  tree fntype, new_fndecl, args;
+  unsigned arity;
+  const char *bname;
+  enum machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* The SVML is suitable for unsafe math only.  */
+  if (!flag_unsafe_math_optimizations)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  switch (fn)
+    {
+    case BUILT_IN_EXP:
+    case BUILT_IN_LOG:
+    case BUILT_IN_LOG10:
+    case BUILT_IN_POW:
+    case BUILT_IN_TANH:
+    case BUILT_IN_TAN:
+    case BUILT_IN_ATAN:
+    case BUILT_IN_ATAN2:
+    case BUILT_IN_ATANH:
+    case BUILT_IN_CBRT:
+    case BUILT_IN_SINH:
+    case BUILT_IN_SIN:
+    case BUILT_IN_ASINH:
+    case BUILT_IN_ASIN:
+    case BUILT_IN_COSH:
+    case BUILT_IN_COS:
+    case BUILT_IN_ACOSH:
+    case BUILT_IN_ACOS:
+      if (el_mode != DFmode || n != 2)
+       return NULL_TREE;
+      break;
+
+    case BUILT_IN_EXPF:
+    case BUILT_IN_LOGF:
+    case BUILT_IN_LOG10F:
+    case BUILT_IN_POWF:
+    case BUILT_IN_TANHF:
+    case BUILT_IN_TANF:
+    case BUILT_IN_ATANF:
+    case BUILT_IN_ATAN2F:
+    case BUILT_IN_ATANHF:
+    case BUILT_IN_CBRTF:
+    case BUILT_IN_SINHF:
+    case BUILT_IN_SINF:
+    case BUILT_IN_ASINHF:
+    case BUILT_IN_ASINF:
+    case BUILT_IN_COSHF:
+    case BUILT_IN_COSF:
+    case BUILT_IN_ACOSHF:
+    case BUILT_IN_ACOSF:
+      if (el_mode != SFmode || n != 4)
+       return NULL_TREE;
+      break;
+
+    default:
+      return NULL_TREE;
+    }
+
+  bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+
+  if (fn == BUILT_IN_LOGF)
+    strcpy (name, "vmlsLn4");
+  else if (fn == BUILT_IN_LOG)
+    strcpy (name, "vmldLn2");
+  else if (n == 4)
+    {
+      sprintf (name, "vmls%s", bname+10);
+      name[strlen (name)-1] = '4';
+    }
+  else
+    sprintf (name, "vmld%s2", bname+10);
+
+  /* Convert to uppercase. */
+  name[4] &= ~0x20;
+
+  arity = 0;
+  for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+       args = TREE_CHAIN (args))
+    arity++;
+
+  if (arity == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
+/* Handler for an ACML-style interface to
+   a library with vectorized intrinsics.  */
 
 static tree
 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
index 649a64b057b16b9d6c8974df41d1710499a6203b..871881038135195abb59544b64d1eac54b645224 100644 (file)
@@ -10765,15 +10765,26 @@ decreased by up to 2 ulp (i.e. the inverse of 1.0 equals 0.99999994).
 @item -mveclibabi=@var{type}
 @opindex mveclibabi
 Specifies the ABI type to use for vectorizing intrinsics using an
-external library.  Supported types are @code{acml} for the AMD
-math core library style of interfacing.  GCC will currently emit
-calls to @code{__vrd2_sin}, @code{__vrd2_cos}, @code{__vrd2_exp},
-@code{__vrd2_log}, @code{__vrd2_log2}, @code{__vrd2_log10},
-@code{__vrs4_sinf}, @code{__vrs4_cosf}, @code{__vrs4_expf},
-@code{__vrs4_logf}, @code{__vrs4_log2f}, @code{__vrs4_log10f}
-and @code{__vrs4_powf} when using this type and @option{-ftree-vectorize}
-is enabled.  A ACML ABI compatible library will have to be specified
-at link time.
+external library.  Supported types are @code{svml} for the Intel short
+vector math library and @code{acml} for the AMD math core library style
+of interfacing.  GCC will currently emit calls to @code{vmldExp2},
+@code{vmldLn2}, @code{vmldLog102}, @code{vmldLog102}, @code{vmldPow2},
+@code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2},
+@code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2},
+@code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2},
+@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4}, @code{vmlsLog104},
+@code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4},
+@code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4},
+@code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4},
+@code{vmlsCos4}, @code{vmlsAcosh4} and @code{vmlsAcos4} for corresponding
+function type when @option{-mveclibabi=svml} is used and @code{__vrd2_sin},
+@code{__vrd2_cos}, @code{__vrd2_exp}, @code{__vrd2_log}, @code{__vrd2_log2},
+@code{__vrd2_log10}, @code{__vrs4_sinf}, @code{__vrs4_cosf},
+@code{__vrs4_expf}, @code{__vrs4_logf}, @code{__vrs4_log2f},
+@code{__vrs4_log10f} and @code{__vrs4_powf} for corresponding function type
+when @option{-mveclibabi=acml} is used. Both @option{-ftree-vectorize} and
+@option{-funsafe-math-optimizations} have to be enabled. A SVML or ACML ABI
+compatible library will have to be specified at link time.
 
 @item -mpush-args
 @itemx -mno-push-args
index 2ac6eb02a205caf2a5ed6ea03373006dd92d7716..3bd9eab567a6945ce5c107333669ccf0bc929c6d 100644 (file)
@@ -1,3 +1,7 @@
+2008-03-28  Uros Bizjak  <ubizjak@gmail.com>
+
+       * gcc.target/i386/vectorize6.c: New test.
+
 2008-03-28  Tobias Burnus  <burnus@net-b.de>
 
        PR fortran/35721
@@ -20,7 +24,7 @@
 2008-03-28  Andrew Pinski  <andrew_pinski@playstation.sony.com>
 
        PR target/31334
-       * gcc.target/powerpc/altivec-25.c: Nnew testcase.
+       * gcc.target/powerpc/altivec-25.c: New testcase.
 
 2008-03-27  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
 
diff --git a/gcc/testsuite/gcc.target/i386/vectorize6.c b/gcc/testsuite/gcc.target/i386/vectorize6.c
new file mode 100644 (file)
index 0000000..78ec53d
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -ftree-vectorize -mveclibabi=svml -ffast-math" } */
+
+double x[256];
+
+extern double sin(double);
+
+void foo(void)
+{
+  int i;
+
+  for (i=0; i<256; ++i)
+    x[i] = sin(x[i]);
+}
+
+/* { dg-final { scan-assembler "vmldSin2" } } */