rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector loads...
authorWill Schmidt <will_schmidt@vnet.ibm.com>
Tue, 19 Sep 2017 13:42:48 +0000 (13:42 +0000)
committerWill Schmidt <willschm@gcc.gnu.org>
Tue, 19 Sep 2017 13:42:48 +0000 (13:42 +0000)
[gcc]

2017-09-19  Will Schmidt  <will_schmidt@vnet.ibm.com>

    * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
    for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
    * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
    Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.

From-SVN: r252975

gcc/ChangeLog
gcc/config/rs6000/rs6000-c.c
gcc/config/rs6000/rs6000.c

index 98806a35f5993c892d08c3a6baf08b7f7830e8bb..d2a7c14a8731fbb620080827c62ca8af94937bae 100644 (file)
@@ -1,3 +1,10 @@
+2017-09-19  Will Schmidt  <will_schmidt@vnet.ibm.com>
+
+       * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
+       for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
+       * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
+       Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.
+
 2017-09-19  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/82244
index d27f5635c82cd7c113fc5040a6c3a189cdd89773..a49db972e912935735f78e40734d2f8f751caa07 100644 (file)
@@ -6472,85 +6472,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       return stmt;
     }
 
-  /* Expand vec_ld into an expression that masks the address and
-     performs the load.  We need to expand this early to allow
+  /* Expand vec_st into an expression that masks the address and
+     performs the store.  We need to expand this early to allow
      the best aliasing, as by the time we get into RTL we no longer
      are able to honor __restrict__, for example.  We may want to
      consider this for all memory access built-ins.
 
      When -maltivec=be is specified, or the wrong number of arguments
      is provided, simply punt to existing built-in processing.  */
-  if (fcode == ALTIVEC_BUILTIN_VEC_LD
-      && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
-      && nargs == 2)
-    {
-      tree arg0 = (*arglist)[0];
-      tree arg1 = (*arglist)[1];
-
-      /* Strip qualifiers like "const" from the pointer arg.  */
-      tree arg1_type = TREE_TYPE (arg1);
-      if (TREE_CODE (arg1_type) == ARRAY_TYPE && c_dialect_cxx ())
-       {
-         /* Force array-to-pointer decay for C++.  */
-         arg1 = default_conversion (arg1);
-         arg1_type = TREE_TYPE (arg1);
-       }
-      if (!POINTER_TYPE_P (arg1_type))
-       goto bad;
-
-      tree inner_type = TREE_TYPE (arg1_type);
-      if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0)
-       {
-         arg1_type = build_pointer_type (build_qualified_type (inner_type,
-                                                               0));
-         arg1 = fold_convert (arg1_type, arg1);
-       }
-
-      /* Construct the masked address.  Let existing error handling take
-        over if we don't have a constant offset.  */
-      arg0 = fold (arg0);
-
-      if (TREE_CODE (arg0) == INTEGER_CST)
-       {
-         if (!ptrofftype_p (TREE_TYPE (arg0)))
-           arg0 = build1 (NOP_EXPR, sizetype, arg0);
-
-         tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type,
-                                      arg1, arg0);
-         tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr,
-                                         build_int_cst (arg1_type, -16));
-
-         /* Find the built-in to get the return type so we can convert
-            the result properly (or fall back to default handling if the
-            arguments aren't compatible).  */
-         for (desc = altivec_overloaded_builtins;
-              desc->code && desc->code != fcode; desc++)
-           continue;
-
-         for (; desc->code == fcode; desc++)
-           if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
-               && (rs6000_builtin_type_compatible (TREE_TYPE (arg1),
-                                                   desc->op2)))
-             {
-               tree ret_type = rs6000_builtin_type (desc->ret_type);
-               if (TYPE_MODE (ret_type) == V2DImode)
-                 /* Type-based aliasing analysis thinks vector long
-                    and vector long long are different and will put them
-                    in distinct alias classes.  Force our return type
-                    to be a may-alias type to avoid this.  */
-                 ret_type
-                   = build_pointer_type_for_mode (ret_type, Pmode,
-                                                  true/*can_alias_all*/);
-               else
-                 ret_type = build_pointer_type (ret_type);
-               aligned = build1 (NOP_EXPR, ret_type, aligned);
-               tree ret_val = build_indirect_ref (loc, aligned, RO_NULL);
-               return ret_val;
-             }
-       }
-    }
 
-  /* Similarly for stvx.  */
   if (fcode == ALTIVEC_BUILTIN_VEC_ST
       && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
       && nargs == 3)
index bc1c4db5db47a312817d910a5539ab6888305a3e..1978634f02e55668bff9d4672e7d45ced3272284 100644 (file)
@@ -16546,6 +16546,48 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
        update_call_from_tree (gsi, res);
        return true;
       }
+    /* Vector loads.  */
+    case ALTIVEC_BUILTIN_LVX_V16QI:
+    case ALTIVEC_BUILTIN_LVX_V8HI:
+    case ALTIVEC_BUILTIN_LVX_V4SI:
+    case ALTIVEC_BUILTIN_LVX_V4SF:
+    case ALTIVEC_BUILTIN_LVX_V2DI:
+    case ALTIVEC_BUILTIN_LVX_V2DF:
+      {
+        arg0 = gimple_call_arg (stmt, 0);  // offset
+        arg1 = gimple_call_arg (stmt, 1);  // address
+        /* Do not fold for -maltivec=be on LE targets.  */
+        if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
+           return false;
+        lhs = gimple_call_lhs (stmt);
+        location_t loc = gimple_location (stmt);
+        /* Since arg1 may be cast to a different type, just use ptr_type_node
+           here instead of trying to enforce TBAA on pointer types.  */
+        tree arg1_type = ptr_type_node;
+        tree lhs_type = TREE_TYPE (lhs);
+        /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
+           the tree using the value from arg0.  The resulting type will match
+           the type of arg1.  */
+        gimple_seq stmts = NULL;
+        tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
+        tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+                                      arg1_type, arg1, temp_offset);
+        /* Mask off any lower bits from the address.  */
+        tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
+                                         arg1_type, temp_addr,
+                                         build_int_cst (arg1_type, -16));
+        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+        /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
+           take an offset, but since we've already incorporated the offset
+           above, here we just pass in a zero.  */
+        gimple *g;
+        g = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
+                                               build_int_cst (arg1_type, 0)));
+        gimple_set_location (g, loc);
+        gsi_replace (gsi, g, true);
+        return true;
+      }
+
     default:
        if (TARGET_DEBUG_BUILTIN)
           fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",