tree-vect-stmts.c (vectorizable_store): Perform vector extracts via vectors if suppor...
authorRichard Biener <rguenther@suse.de>
Wed, 2 Aug 2017 06:57:12 +0000 (06:57 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 2 Aug 2017 06:57:12 +0000 (06:57 +0000)
2017-08-02  Richard Biener  <rguenther@suse.de>

* tree-vect-stmts.c (vectorizable_store): Perform vector extracts
via vectors if supported, integer extracts via punning if supported
or otherwise vector extracts.

From-SVN: r250813

gcc/ChangeLog
gcc/tree-vect-stmts.c

index 46d33971a41983e94f6cc890712d6bd7390263a1..faacba284224c0aa058801117599250dda69ff2f 100644 (file)
@@ -1,3 +1,9 @@
+2017-08-02  Richard Biener  <rguenther@suse.de>
+
+       * tree-vect-stmts.c (vectorizable_store): Perform vector extracts
+       via vectors if supported, integer extracts via punning if supported
+       or otherwise vector extracts.
+
 2017-08-02  Richard Biener  <rguenther@suse.de>
 
        * tree-ssa-pre.c (bitmap_insert_into_set_1): Remove and inline
index eecc1a43bb35f0fa6895a1c07c952c2f6dd53e27..ee32c5671e292627eb845500acbae2dd66ac119d 100644 (file)
@@ -6002,6 +6002,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
       unsigned nstores = nunits;
       unsigned lnel = 1;
       tree ltype = elem_type;
+      tree lvectype = vectype;
       if (slp)
        {
          if (group_size < nunits
@@ -6010,6 +6011,45 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
              nstores = nunits / group_size;
              lnel = group_size;
              ltype = build_vector_type (elem_type, group_size);
+             lvectype = vectype;
+
+             /* First check if vec_extract optab doesn't support extraction
+                of vector elts directly.  */
+             machine_mode elmode = TYPE_MODE (elem_type);
+             machine_mode vmode = mode_for_vector (elmode, group_size);
+             if (! VECTOR_MODE_P (vmode)
+                 || (convert_optab_handler (vec_extract_optab,
+                                            TYPE_MODE (vectype), vmode)
+                     == CODE_FOR_nothing))
+               {
+                 /* Try to avoid emitting an extract of vector elements
+                    by performing the extracts using an integer type of the
+                    same size, extracting from a vector of those and then
+                    re-interpreting it as the original vector type if
+                    supported.  */
+                 unsigned lsize
+                   = group_size * GET_MODE_BITSIZE (elmode);
+                 elmode = mode_for_size (lsize, MODE_INT, 0);
+                 vmode = mode_for_vector (elmode, nunits / group_size);
+                 /* If we can't construct such a vector fall back to
+                    element extracts from the original vector type and
+                    element size stores.  */
+                 if (VECTOR_MODE_P (vmode)
+                     && (convert_optab_handler (vec_extract_optab,
+                                                vmode, elmode)
+                         != CODE_FOR_nothing))
+                   {
+                     nstores = nunits / group_size;
+                     lnel = group_size;
+                     ltype = build_nonstandard_integer_type (lsize, 1);
+                     lvectype = build_vector_type (ltype, nstores);
+                   }
+                 /* Else fall back to vector extraction anyway.
+                    Fewer stores are more important than avoiding spilling
+                    of the vector we extract from.  Compared to the
+                    construction case in vectorizable_load no store-forwarding
+                    issue exists here for reasonable archs.  */
+               }
            }
          else if (group_size >= nunits
                   && group_size % nunits == 0)
@@ -6017,6 +6057,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
              nstores = 1;
              lnel = nunits;
              ltype = vectype;
+             lvectype = vectype;
            }
          ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
          ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
@@ -6087,7 +6128,16 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
                    }
                }
-
+             /* Pun the vector to extract from if necessary.  */
+             if (lvectype != vectype)
+               {
+                 tree tem = make_ssa_name (lvectype);
+                 gimple *pun
+                   = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
+                                                       lvectype, vec_oprnd));
+                 vect_finish_stmt_generation (stmt, pun, gsi);
+                 vec_oprnd = tem;
+               }
              for (i = 0; i < nstores; i++)
                {
                  tree newref, newoff;