+2019-07-15 Kewen Lin <linkw@gcc.gnu.org>
+
+ PR tree-optimization/88497
+ * tree-ssa-reassoc.c (reassociate_bb): Swap the positions of
+ GIMPLE_BINARY_RHS check and gimple_visited_p check, call new
+ function undistribute_bitref_for_vector.
+ (undistribute_bitref_for_vector): New function.
+ (cleanup_vinfo_map): Likewise.
+ (sort_by_mach_mode): Likewise.
+
2019-07-14 Uroš Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (nonmemory_szext_operand): New mode attribute.
+2019-07-15 Kewen Lin <linkw@gcc.gnu.org>
+
+ PR tree-optimization/88497
+ * gcc.dg/tree-ssa/pr88497-1.c: New test.
+ * gcc.dg/tree-ssa/pr88497-2.c: Likewise.
+ * gcc.dg/tree-ssa/pr88497-3.c: Likewise.
+ * gcc.dg/tree-ssa/pr88497-4.c: Likewise.
+ * gcc.dg/tree-ssa/pr88497-5.c: Likewise.
+ * gcc.dg/tree-ssa/pr88497-6.c: Likewise.
+ * gcc.dg/tree-ssa/pr88497-7.c: Likewise.
+
2019-07-14 Jerry DeLisle <jvdelisle@gcc.gnu.org>
PR fortran/87233
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vsx_hw { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target sse2_runtime { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+/* { dg-additional-options "-mvsx" { target { powerpc*-*-* } } } */
+/* { dg-additional-options "-msse2" { target { i?86-*-* x86_64-*-* } } } */
+
+/* To test reassoc can undistribute vector bit_field_ref summation.
+
+ arg1 and arg2 are two arrays whose elements of type vector double.
+ Assuming:
+ A0 = arg1[0], A1 = arg1[1], A2 = arg1[2], A3 = arg1[3],
+ B0 = arg2[0], B1 = arg2[1], B2 = arg2[2], B3 = arg2[3],
+
+ Then:
+ V0 = A0 * B0, V1 = A1 * B1, V2 = A2 * B2, V3 = A3 * B3,
+
+ reassoc transforms
+
+ accumulator += V0[0] + V0[1] + V1[0] + V1[1] + V2[0] + V2[1]
+ + V3[0] + V3[1];
+
+ into:
+
+ T = V0 + V1 + V2 + V3
+ accumulator += T[0] + T[1];
+
+ Fewer bit_field_refs, only two for 128 or more bits vector. */
+
+typedef double v2df __attribute__ ((vector_size (16)));
+__attribute__ ((noinline)) double
+test (double accumulator, v2df arg1[], v2df arg2[])
+{
+ v2df temp;
+ temp = arg1[0] * arg2[0];
+ accumulator += temp[0] + temp[1];
+ temp = arg1[1] * arg2[1];
+ accumulator += temp[0] + temp[1];
+ temp = arg1[2] * arg2[2];
+ accumulator += temp[0] + temp[1];
+ temp = arg1[3] * arg2[3];
+ accumulator += temp[0] + temp[1];
+ return accumulator;
+}
+
+extern void abort (void);
+
+int
+main ()
+{
+ v2df v2[4] = {{1.0, 2.0}, {4.0, 8.0}, {1.0, 3.0}, {9.0, 27.0}};
+ v2df v3[4] = {{1.0, 4.0}, {16.0, 64.0}, {1.0, 2.0}, {3.0, 4.0}};
+ double acc = 100.0;
+ double res = test (acc, v2, v3);
+ if (res != 827.0)
+ abort ();
+ return 0;
+}
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 2 "reassoc1" { target { powerpc*-*-* i?86-*-* x86_64-*-* } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-require-effective-target powerpc_altivec_ok { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target sse2 { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+/* { dg-additional-options "-maltivec" { target { powerpc*-*-* } } } */
+/* { dg-additional-options "-msse2" { target { i?86-*-* x86_64-*-* } } } */
+
+/* To test reassoc can undistribute vector bit_field_ref on multiplication.
+
+ v1, v2, v3, v4 of type vector float.
+
+ reassoc transforms
+
+ accumulator *= v1[0] * v1[1] * v1[2] * v1[3] *
+ v2[0] * v2[1] * v2[2] * v2[3] *
+ v3[0] * v3[1] * v3[2] * v3[3] *
+ v4[0] * v4[1] * v4[2] * v4[3] ;
+
+ into:
+
+ T = v1 * v2 * v3 * v4;
+ accumulator *= T[0] * T[1] * T[2] * T[3];
+
+ Fewer bit_field_refs, only four for 128 or more bits vector. */
+
+typedef float v4sf __attribute__ ((vector_size (16)));
+float
+test (float accumulator, v4sf v1, v4sf v2, v4sf v3, v4sf v4)
+{
+ accumulator *= v1[0] * v1[1] * v1[2] * v1[3];
+ accumulator *= v2[0] * v2[1] * v2[2] * v2[3];
+ accumulator *= v3[0] * v3[1] * v3[2] * v3[3];
+ accumulator *= v4[0] * v4[1] * v4[2] * v4[3];
+ return accumulator;
+}
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "reassoc1" { target { powerpc*-*-* i?86-*-* x86_64-*-* } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target powerpc_altivec_ok { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target sse2 { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+/* { dg-additional-options "-maltivec" { target { powerpc*-*-* } } } */
+/* { dg-additional-options "-msse2" { target { i?86-*-* x86_64-*-* } } } */
+
+/* To test reassoc can undistribute vector bit_field_ref on bitwise AND.
+
+ v1, v2, v3, v4 of type vector int.
+
+ reassoc transforms
+
+ accumulator &= v1[0] & v1[1] & v1[2] & v1[3] &
+ v2[0] & v2[1] & v2[2] & v2[3] &
+ v3[0] & v3[1] & v3[2] & v3[3] &
+ v4[0] & v4[1] & v4[2] & v4[3] ;
+
+ into:
+
+ T = v1 & v2 & v3 & v4;
+ accumulator &= T[0] & T[1] & T[2] & T[3];
+
+ Fewer bit_field_refs, only four for 128 or more bits vector. */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+int
+test (int accumulator, v4si v1, v4si v2, v4si v3, v4si v4)
+{
+ accumulator &= v1[0] & v1[1] & v1[2] & v1[3];
+ accumulator &= v2[0] & v2[1] & v2[2] & v2[3];
+ accumulator &= v3[0] & v3[1] & v3[2] & v3[3];
+ accumulator &= v4[0] & v4[1] & v4[2] & v4[3];
+ return accumulator;
+}
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "reassoc1" { target { powerpc*-*-* i?86-*-* x86_64-*-* } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target powerpc_altivec_ok { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target sse2 { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+/* { dg-additional-options "-maltivec" { target { powerpc*-*-* } } } */
+/* { dg-additional-options "-msse2" { target { i?86-*-* x86_64-*-* } } } */
+
+/* To test reassoc can undistribute vector bit_field_ref on bitwise IOR.
+
+ v1, v2, v3, v4 of type vector int.
+
+ reassoc transforms
+
+ accumulator |= v1[0] | v1[1] | v1[2] | v1[3] |
+ v2[0] | v2[1] | v2[2] | v2[3] |
+ v3[0] | v3[1] | v3[2] | v3[3] |
+ v4[0] | v4[1] | v4[2] | v4[3] ;
+
+ into:
+
+ T = v1 | v2 | v3 | v4;
+ accumulator |= T[0] | T[1] | T[2] | T[3];
+
+ Fewer bit_field_refs, only four for 128 or more bits vector. */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+int
+test (int accumulator, v4si v1, v4si v2, v4si v3, v4si v4)
+{
+ accumulator |= v1[0] | v1[1] | v1[2] | v1[3];
+ accumulator |= v2[0] | v2[1] | v2[2] | v2[3];
+ accumulator |= v3[0] | v3[1] | v3[2] | v3[3];
+ accumulator |= v4[0] | v4[1] | v4[2] | v4[3];
+ return accumulator;
+}
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "reassoc1" { target { powerpc*-*-* i?86-*-* x86_64-*-* } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target powerpc_altivec_ok { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target sse2 { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+/* { dg-additional-options "-maltivec" { target { powerpc*-*-* } } } */
+/* { dg-additional-options "-msse2" { target { i?86-*-* x86_64-*-* } } } */
+
+/* To test reassoc can undistribute vector bit_field_ref on bitwise XOR.
+
+ v1, v2, v3, v4 of type vector int.
+
+ reassoc transforms
+
+ accumulator ^= v1[0] ^ v1[1] ^ v1[2] ^ v1[3] ^
+ v2[0] ^ v2[1] ^ v2[2] ^ v2[3] ^
+ v3[0] ^ v3[1] ^ v3[2] ^ v3[3] ^
+ v4[0] ^ v4[1] ^ v4[2] ^ v4[3] ;
+
+ into:
+
+ T = v1 ^ v2 ^ v3 ^ v4;
+ accumulator ^= T[0] ^ T[1] ^ T[2] ^ T[3];
+
+ Fewer bit_field_refs, only four for 128 or more bits vector. */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+int
+test (int accumulator, v4si v1, v4si v2, v4si v3, v4si v4)
+{
+ accumulator ^= v1[0] ^ v1[1] ^ v1[2] ^ v1[3];
+ accumulator ^= v2[0] ^ v2[1] ^ v2[2] ^ v2[3];
+ accumulator ^= v3[0] ^ v3[1] ^ v3[2] ^ v3[3];
+ accumulator ^= v4[0] ^ v4[1] ^ v4[2] ^ v4[3];
+ return accumulator;
+}
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "reassoc1" { target { powerpc*-*-* i?86-*-* x86_64-*-* } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f -ffast-math -fdump-tree-reassoc1" } */
+
+/* To test reassoc can undistribute vector bit_field_ref on multiple
+ vector machine modes.
+
+ v1, v2 of type vector 4 x float
+ v3, v4 of type vector 8 x float
+ v5, v6 of type vector 16 x float
+
+ reassoc transforms
+
+ accumulator += v1[0] + v1[1] + v1[2] + v1[3] +
+ v2[0] + v2[1] + v2[2] + v2[3] +
+ v3[0] + v3[1] + v3[2] + v3[3] +
+ v3[4] + v3[5] + v3[6] + v3[7] +
+ v4[0] + v4[1] + v4[2] + v4[3] +
+ v4[4] + v4[5] + v4[6] + v4[7] +
+ v5[0] + v5[1] + v5[2] + v5[3] +
+ v5[4] + v5[5] + v5[6] + v5[7] +
+ v5[8] + v5[9] + v5[10] + v5[11] +
+ v5[12] + v5[13] + v5[14] + v5[15] +
+ v6[0] + v6[1] + v6[2] + v6[3] +
+ v6[4] + v6[5] + v6[6] + v6[7] +
+ v6[8] + v6[9] + v6[10] + v6[11] +
+ v6[12] + v6[13] + v6[14] + v6[15] ;
+
+ into:
+
+ T12 = v1 + v2;
+ T34 = v3 + v4;
+ T56 = v5 + v6;
+ accumulator += T12[0] + T12[1] + T12[2] + T12[3] +
+ accumulator += T34[0] + T34[1] + T34[2] + T34[3] +
+ accumulator += T34[4] + T34[5] + T34[6] + T34[7] +
+ accumulator += T56[0] + T56[1] + T56[2] + T56[3] +
+ accumulator += T56[4] + T56[5] + T56[6] + T56[7] +
+ accumulator += T56[8] + T56[9] + T56[10] + T56[11] +
+ accumulator += T56[12] + T56[13] + T56[14] + T56[15] ; */
+
+typedef float v4sf __attribute__((vector_size(16)));
+typedef float v8sf __attribute__((vector_size(32)));
+typedef float v16sf __attribute__((vector_size(64)));
+
+float
+test (float accumulator, v4sf v1, v4sf v2, v8sf v3, v8sf v4, v16sf v5, v16sf v6)
+{
+ accumulator += v1[0] + v1[1] + v1[2] + v1[3];
+ accumulator += v2[0] + v2[1] + v2[2] + v2[3];
+ accumulator += v3[0] + v3[1] + v3[2] + v3[3];
+ accumulator += v3[4] + v3[5] + v3[6] + v3[7];
+ accumulator += v4[0] + v4[1] + v4[2] + v4[3];
+ accumulator += v4[4] + v4[5] + v4[6] + v4[7];
+ accumulator += v5[0] + v5[1] + v5[2] + v5[3];
+ accumulator += v5[4] + v5[5] + v5[6] + v5[7];
+ accumulator += v5[8] + v5[9] + v5[10] + v5[11];
+ accumulator += v5[12] + v5[13] + v5[14] + v5[15];
+ accumulator += v6[0] + v6[1] + v6[2] + v6[3];
+ accumulator += v6[4] + v6[5] + v6[6] + v6[7];
+ accumulator += v6[8] + v6[9] + v6[10] + v6[11];
+ accumulator += v6[12] + v6[13] + v6[14] + v6[15];
+ return accumulator;
+}
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 28 "reassoc1" } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f_runtime } */
+/* { dg-options "-O2 -mavx512f -ffast-math -fdump-tree-reassoc1" } */
+
+/* To test reassoc can undistribute vector bit_field_ref on multiple
+ vector machine modes, bypass those modes with only one candidate.
+
+ v1, v2 of type vector 4 x float
+ v3 of type vector 8 x float
+ v5, v6 of type vector 16 x float
+
+ reassoc transforms
+
+ accumulator += v1[0] + v1[1] + v1[2] + v1[3] +
+ v2[0] + v2[1] + v2[2] + v2[3] +
+ v3[0] + v3[1] + v3[2] + v3[3] +
+ v3[4] + v3[5] + v3[6] + v3[7] +
+ v5[0] + v5[1] + v5[2] + v5[3] +
+ v5[4] + v5[5] + v5[6] + v5[7] +
+ v5[8] + v5[9] + v5[10] + v5[11] +
+ v5[12] + v5[13] + v5[14] + v5[15] +
+ v6[0] + v6[1] + v6[2] + v6[3] +
+ v6[4] + v6[5] + v6[6] + v6[7] +
+ v6[8] + v6[9] + v6[10] + v6[11] +
+ v6[12] + v6[13] + v6[14] + v6[15] ;
+
+ into:
+
+ T12 = v1 + v2;
+ T56 = v5 + v6;
+ accumulator += T12[0] + T12[1] + T12[2] + T12[3] +
+ accumulator += v3[0] + v3[1] + v3[2] + v3[3] +
+ accumulator += v3[4] + v3[5] + v3[6] + v3[7] +
+ accumulator += T56[0] + T56[1] + T56[2] + T56[3] +
+ accumulator += T56[4] + T56[5] + T56[6] + T56[7] +
+ accumulator += T56[8] + T56[9] + T56[10] + T56[11] +
+ accumulator += T56[12] + T56[13] + T56[14] + T56[15] ; */
+
+typedef float v4sf __attribute__((vector_size(16)));
+typedef float v8sf __attribute__((vector_size(32)));
+typedef float v16sf __attribute__((vector_size(64)));
+
+__attribute__ ((noinline))
+float test(float accumulator, v4sf v1, v4sf v2, v8sf v3, v16sf v5, v16sf v6) {
+ accumulator += v1[0] + v1[1] + v1[2] + v1[3];
+ accumulator += v2[0] + v2[1] + v2[2] + v2[3];
+ accumulator += v3[0] + v3[1] + v3[2] + v3[3];
+ accumulator += v3[4] + v3[5] + v3[6] + v3[7];
+ accumulator += v5[0] + v5[1] + v5[2] + v5[3];
+ accumulator += v5[4] + v5[5] + v5[6] + v5[7];
+ accumulator += v5[8] + v5[9] + v5[10] + v5[11];
+ accumulator += v5[12] + v5[13] + v5[14] + v5[15];
+ accumulator += v6[0] + v6[1] + v6[2] + v6[3];
+ accumulator += v6[4] + v6[5] + v6[6] + v6[7];
+ accumulator += v6[8] + v6[9] + v6[10] + v6[11];
+ accumulator += v6[12] + v6[13] + v6[14] + v6[15];
+ return accumulator;
+}
+
+extern void abort (void);
+
+int
+main ()
+{
+ v4sf v1 = {1.0, 2.0, 3.0, 4.0 };
+ v4sf v2 = {5.0, 6.0, 7.0, 8.0 };
+ v8sf v3 = {9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
+ v16sf v5 = {17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0};
+ v16sf v6 = {33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0};
+ float acc = 24.0;
+ double res = test (acc, v1, v2, v3, v5, v6);
+ if (res != 1200.0)
+ abort();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 28 "reassoc1" } } */
return changed;
}
+/* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
+ first: element index for each relevant BIT_FIELD_REF.
+ second: the index of vec ops* for each relevant BIT_FIELD_REF. */
+typedef std::pair<unsigned, unsigned> v_info_elem;
+typedef auto_vec<v_info_elem, 32> v_info;
+typedef v_info *v_info_ptr;
+
+/* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
+static int
+sort_by_mach_mode (const void *p_i, const void *p_j)
+{
+ const tree tr1 = *((const tree *) p_i);
+ const tree tr2 = *((const tree *) p_j);
+ unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
+ unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
+ if (mode1 > mode2)
+ return 1;
+ else if (mode1 < mode2)
+ return -1;
+ else
+ return 0;
+}
+
+/* Cleanup hash map for VECTOR information. */
+static void
+cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
+{
+ for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
+ it != info_map.end (); ++it)
+ {
+ v_info_ptr info = (*it).second;
+ delete info;
+ (*it).second = NULL;
+ }
+}
+
+/* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
+ V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
+ is transformed to
+ Vs = (V1 + V2 + ... + Vn)
+ Vs[0] + Vs[1] + ... + Vs[k]
+
+ The basic steps are listed below:
+
+ 1) Check the addition chain *OPS by looking those summands coming from
+ VECTOR bit_field_ref on VECTOR type. Put the information into
+ v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
+
+ 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
+ continuous, they can cover the whole VECTOR perfectly without any holes.
+ Obtain one VECTOR list which contain candidates to be transformed.
+
+ 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
+ candidates with same mode, build the addition statements for them and
+ generate BIT_FIELD_REFs accordingly.
+
+ TODO:
+ The current implementation requires the whole VECTORs should be fully
+ covered, but it can be extended to support partial, checking adjacent
+ but not fill the whole, it may need some cost model to define the
+ boundary to do or not.
+*/
+static bool
+undistribute_bitref_for_vector (enum tree_code opcode,
+ vec<operand_entry *> *ops, struct loop *loop)
+{
+ if (ops->length () <= 1)
+ return false;
+
+ if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode != BIT_XOR_EXPR
+ && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
+ return false;
+
+ hash_map<tree, v_info_ptr> v_info_map;
+ operand_entry *oe1;
+ unsigned i;
+
+ /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
+ information into map. */
+ FOR_EACH_VEC_ELT (*ops, i, oe1)
+ {
+ enum tree_code dcode;
+ gimple *oe1def;
+
+ if (TREE_CODE (oe1->op) != SSA_NAME)
+ continue;
+ oe1def = SSA_NAME_DEF_STMT (oe1->op);
+ if (!is_gimple_assign (oe1def))
+ continue;
+ dcode = gimple_assign_rhs_code (oe1def);
+ if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
+ continue;
+
+ tree rhs = gimple_assign_rhs1 (oe1def);
+ tree vec = TREE_OPERAND (rhs, 0);
+ tree vec_type = TREE_TYPE (vec);
+
+ if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
+ continue;
+
+ /* Ignore it if target machine can't support this VECTOR type. */
+ if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
+ continue;
+
+ /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
+ if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
+ continue;
+
+ tree elem_type = TREE_TYPE (vec_type);
+ unsigned HOST_WIDE_INT elem_size
+ = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
+ if (maybe_ne (bit_field_size (rhs), elem_size))
+ continue;
+
+ unsigned idx;
+ if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
+ continue;
+
+ /* Ignore it if target machine can't support this type of VECTOR
+ operation. */
+ optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
+ if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
+ continue;
+
+ bool existed;
+ v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
+ if (!existed)
+ info = new v_info;
+ info->safe_push (std::make_pair (idx, i));
+ }
+
+ /* At least two VECTOR to combine. */
+ if (v_info_map.elements () <= 1)
+ {
+ cleanup_vinfo_map (v_info_map);
+ return false;
+ }
+
+ /* Verify all VECTOR candidates by checking two conditions:
+ 1) sorted offsets are adjacent, no holes.
+ 2) can fill the whole VECTOR perfectly.
+ And add the valid candidates to a vector for further handling. */
+ auto_vec<tree> valid_vecs (v_info_map.elements ());
+ for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
+ it != v_info_map.end (); ++it)
+ {
+ tree cand_vec = (*it).first;
+ v_info_ptr cand_info = (*it).second;
+ unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant ();
+ if (cand_info->length () != num_elems)
+ continue;
+ sbitmap holes = sbitmap_alloc (num_elems);
+ bitmap_ones (holes);
+ bool valid = true;
+ v_info_elem *curr;
+ FOR_EACH_VEC_ELT (*cand_info, i, curr)
+ {
+ if (!bitmap_bit_p (holes, curr->first))
+ {
+ valid = false;
+ break;
+ }
+ else
+ bitmap_clear_bit (holes, curr->first);
+ }
+ if (valid && bitmap_empty_p (holes))
+ valid_vecs.quick_push (cand_vec);
+ sbitmap_free (holes);
+ }
+
+ /* At least two VECTOR to combine. */
+ if (valid_vecs.length () <= 1)
+ {
+ cleanup_vinfo_map (v_info_map);
+ return false;
+ }
+
+ valid_vecs.qsort (sort_by_mach_mode);
+ /* Go through all candidates by machine mode order, query the mode_to_total
+ to get the total number for each mode and skip the single one. */
+ for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
+ {
+ tree tvec = valid_vecs[i];
+ enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
+
+ /* Skip modes with only a single candidate. */
+ if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
+ continue;
+
+ unsigned int idx, j;
+ gimple *sum = NULL;
+ v_info_ptr info_ptr;
+ tree sum_vec = tvec;
+ v_info_elem *elem;
+
+ /* Build the sum for all candidates with same mode. */
+ do
+ {
+ sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec,
+ valid_vecs[i + 1], opcode);
+ sum_vec = gimple_get_lhs (sum);
+ info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
+ /* Update those related ops of current candidate VECTOR. */
+ FOR_EACH_VEC_ELT (*info_ptr, j, elem)
+ {
+ idx = elem->second;
+ gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
+ /* Set this then op definition will get DCEd later. */
+ gimple_set_visited (def, true);
+ if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR
+ || opcode == BIT_IOR_EXPR)
+ (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
+ else if (opcode == MULT_EXPR)
+ (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
+ else
+ {
+ gcc_assert (opcode == BIT_AND_EXPR);
+ (*ops)[idx]->op
+ = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
+ }
+ (*ops)[idx]->rank = 0;
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Generating addition -> ");
+ print_gimple_stmt (dump_file, sum, 0);
+ }
+ i++;
+ }
+ while ((i < valid_vecs.length () - 1)
+ && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
+
+ /* Referring to first valid VECTOR with this mode, generate the
+ BIT_FIELD_REF statements accordingly. */
+ info_ptr = *(v_info_map.get (tvec));
+ gcc_assert (sum);
+ tree elem_type = TREE_TYPE (TREE_TYPE (tvec));
+ FOR_EACH_VEC_ELT (*info_ptr, j, elem)
+ {
+ idx = elem->second;
+ tree dst = make_ssa_name (elem_type);
+ gimple *gs = gimple_build_assign (
+ dst, BIT_FIELD_REF,
+ build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type),
+ bitsize_int (elem->first
+ * tree_to_uhwi (TYPE_SIZE (elem_type)))));
+ insert_stmt_after (gs, sum);
+ gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
+ /* Set this then op definition will get DCEd later. */
+ gimple_set_visited (def, true);
+ (*ops)[idx]->op = gimple_assign_lhs (gs);
+ (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Generating bit_field_ref -> ");
+ print_gimple_stmt (dump_file, gs, 0);
+ }
+ }
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
+
+ cleanup_vinfo_map (v_info_map);
+
+ return true;
+}
+
/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
expression, examine the other OPS to see if any of them are comparisons
of the same values, which we may be able to combine or eliminate.
tree lhs, rhs1, rhs2;
enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
- /* If this is not a gimple binary expression, there is
- nothing for us to do with it. */
- if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
- continue;
-
/* If this was part of an already processed statement,
we don't need to touch it again. */
if (gimple_visited_p (stmt))
continue;
}
+ /* If this is not a gimple binary expression, there is
+ nothing for us to do with it. */
+ if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
+ continue;
+
lhs = gimple_assign_lhs (stmt);
rhs1 = gimple_assign_rhs1 (stmt);
rhs2 = gimple_assign_rhs2 (stmt);
ops.qsort (sort_by_operand_rank);
optimize_ops_list (rhs_code, &ops);
}
-
+ if (undistribute_bitref_for_vector (rhs_code, &ops,
+ loop_containing_stmt (stmt)))
+ {
+ ops.qsort (sort_by_operand_rank);
+ optimize_ops_list (rhs_code, &ops);
+ }
if (rhs_code == PLUS_EXPR
&& transform_add_to_multiply (&ops))
ops.qsort (sort_by_operand_rank);