--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target vect_double } */
+
+void
+gemm (const double* __restrict__ A, const double* __restrict__ B,
+ double* __restrict__ C)
+{
+ unsigned int l_m = 0;
+ unsigned int l_n = 0;
+ unsigned int l_k = 0;
+
+ for ( l_n = 0; l_n < 9; l_n++ ) {
+ /* Use -O3 so this loop is unrolled completely early. */
+ for ( l_m = 0; l_m < 10; l_m++ ) { C[(l_n*10)+l_m] = 0.0; }
+ for ( l_k = 0; l_k < 17; l_k++ ) {
+ /* Use -O3 so this loop is unrolled completely early. */
+ for ( l_m = 0; l_m < 10; l_m++ ) {
+ C[(l_n*10)+l_m] += A[(l_k*20)+l_m] * B[(l_n*20)+l_k];
+ }
+ }
+ }
+}
+
+/* Exact scanning is difficult but we expect all loads and stores
+ and computations to be vectorized. */
+/* { dg-final { scan-tree-dump "optimized: basic block" "slp1" } } */
tree tmp_var, tmp_off;
split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
- /* See whether we have an SSA_NAME whose range is known
- to be [A, B]. */
- if (TREE_CODE (tmp_var) != SSA_NAME)
- return false;
+ /* See whether we have an known range [A, B] for tmp_var. */
wide_int var_min, var_max;
- value_range_kind vr_type = get_range_info (tmp_var, &var_min,
- &var_max);
- wide_int var_nonzero = get_nonzero_bits (tmp_var);
signop sgn = TYPE_SIGN (itype);
- if (intersect_range_with_nonzero_bits (vr_type, &var_min,
- &var_max, var_nonzero,
- sgn) != VR_RANGE)
+ if (TREE_CODE (tmp_var) == SSA_NAME)
+ {
+ value_range_kind vr_type
+ = get_range_info (tmp_var, &var_min, &var_max);
+ wide_int var_nonzero = get_nonzero_bits (tmp_var);
+ if (intersect_range_with_nonzero_bits (vr_type, &var_min,
+ &var_max,
+ var_nonzero,
+ sgn) != VR_RANGE)
+ return false;
+ }
+ else if (determine_value_range (tmp_var, &var_min, &var_max)
+ != VR_RANGE)
return false;
/* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)