+2009-09-03 Razya Ladelsky <razya@il.ibm.com>
+
+ PR tree-optimization/38275
+
+ * tree-parloops.c (parallelize_loops): Replace profitability condition
+ for expected number of iterations.
+
+2009-09-03 Alon Dayan <alond@il.ibm.com>
+
+ PR tree-optimization/38275
+
+ * testsuite/gcc.dg/autopar/reduc-1char.c: Increase number
+ of iterations. Adjust the logic accordingly.
+ * testsuite/gcc.dg/autopar/reduc-2char.c: Ditto.
+ * testsuite/gcc.dg/autopar/reduc-1.c: Ditto.
+ * testsuite/gcc.dg/autopar/reduc-2.c: Ditto.
+ * testsuite/gcc.dg/autopar/reduc-3.c: Ditto.
+ * testsuite/gcc.dg/autopar/reduc-6.c: Ditto.
+ * testsuite/gcc.dg/autopar/reduc-7.c: Ditto.
+ * testsuite/gcc.dg/autopar/reduc-8.c: Ditto.
+ * testsuite/gcc.dg/autopar/reduc-9.c: Ditto.
+ * testsuite/gcc.dg/autopar/pr39500-1.c: Ditto.
+ * testsuite/gcc.dg/autopar/reduc-1short.c: Ditto.
+ * testsuite/gcc.dg/autopar/reduc-2short.c: Ditto.
+ * testsuite/gcc.dg/autopar/parallelization-1.c: Ditto.
+
+
2009-09-03 Alexandre Oliva <aoliva@redhat.com>
* doc/invoke.texi (BUILD_CONFIG): Document --with-build-config.
void parloop (int N)
{
int i;
- int x[10000000];
+ int x[10000990];
for (i = 0; i < N; i++)
x[i] = i + 3;
int main(void)
{
- parloop(10000000);
+ parloop(10000);
return 0;
}
int main (void)
{
int i;
- int x[1000];
+ int x[100000];
- for (i = 0; i < 100; i++)
- x[i] = x[i+100];
+ for (i = 0; i < 10000; i++)
+ x[i] = x[i+10000];
- for (i = 0; i < 100; i++)
+ for (i = 0; i < 10000; i++)
{
- if (x[i] != x[i+100])
+ if (x[i] != x[i+10000])
abort ();
}
#include <stdarg.h>
#include <stdlib.h>
-#define N 16
-#define DIFF 242
-
-unsigned int ub[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
-unsigned int uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-
-/* Reduction of unsigned-int. */
-
-void main1 (unsigned int x, unsigned int max_result, unsigned int min_result)
-{
- int i;
- unsigned int udiff = 2;
- unsigned int umax = x;
- unsigned int umin = x;
-
- /* Summation. */
- for (i = 0; i < N; i++) {
- udiff += (ub[i] - uc[i]);
- }
-
- /* Maximum. */
- for (i = 0; i < N; i++) {
- umax = umax < uc[i] ? uc[i] : umax;
- }
-
- /* Minimum. */
- for (i = 0; i < N; i++) {
- umin = umin > uc[i] ? uc[i] : umin;
- }
-
- /* check results: */
- if (udiff != DIFF)
- abort ();
- if (umax != max_result)
- abort ();
- if (umin != min_result)
- abort ();
+ #define N 1600
+ #define DIFF 2558402
+
+unsigned int ub[N];
+unsigned int uc[N];
+
+ /* Reduction of unsigned-int. */
+
+ void main1 (unsigned int x, unsigned int max_result, unsigned int min_result)
+ {
+ int i;
+ unsigned int udiff = 2;
+ unsigned int umax = x;
+ unsigned int umin = x;
+
+ /* Summation. */
+ for (i = 0; i < N; i++) {
+ udiff += (ub[i] - uc[i]);
+ }
+
+ /* Maximum. */
+ for (i = 0; i < N; i++) {
+ umax = umax < uc[i] ? uc[i] : umax;
+ }
+
+ /* Minimum. */
+ for (i = 0; i < N; i++) {
+ umin = umin > uc[i] ? uc[i] : umin;
+ }
+
+ /* check results: */
+ if (udiff != DIFF)
+ abort ();
+ if (umax != max_result)
+ abort ();
+ if (umin != min_result)
+ abort ();
+ }
+
+ __attribute__((noinline))
+ void init_arrays ()
+ {
+ int i;
+
+ ub[0] = 1;
+ uc[0] = 1;
+ for (i=1; i<N; i++)
+ {
+ ub[i] = i * 3;
+ uc[i] = i;
+ }
}
int main (void)
{
-
- main1 (100, 100, 1);
- main1 (0, 15, 0);
+ init_arrays ();
+ main1 (2000, 2000, 1);
+ main1 (0, 1599, 0);
return 0;
}
+
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdarg.h>
#include <stdlib.h>
-#define N 16
+#define N 1600
#define DIFF 242
unsigned char ub[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
abort ();
}
+ __attribute__((noinline))
+ void init_arrays ()
+ {
+ int i;
+
+ for (i=16; i<N; i++)
+ {
+ ub[i] = 1;
+ uc[i] = 1;
+ }
+}
+
int main (void)
-{
+{
+ init_arrays();
main1 (100, 100, 1);
main1 (0, 15, 0);
return 0;
}
+
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdarg.h>
#include <stdlib.h>
-#define N 16
+#define N 1600
#define DIFF 242
unsigned short ub[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
abort ();
}
+ __attribute__((noinline))
+ void init_arrays ()
+ {
+ int i;
+
+ for (i=16; i<N; i++)
+ {
+ ub[i] = 1;
+ uc[i] = 1;
+ }
+}
+
int main (void)
{
+ init_arrays();
main1 (100, 100, 1);
main1 (0, 15, 0);
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdarg.h>
#include <stdlib.h>
-#define N 16
-#define DIFF 240
+#define N 1600
+#define DIFF 2558400
-int b[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
-int c[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+int b[N];
+int c[N];
/* Reduction of signed-int. */
for (i = 0; i < N; i++) {
min = min > c[i] ? c[i] : min;
}
-
+
/* check results: */
if (diff != DIFF)
abort ();
abort ();
}
+ __attribute__((noinline))
+ void init_arrays ()
+ {
+ int i;
+
+ b[0] = 1;
+ c[0] = 1;
+ for (i=1; i<N; i++)
+ {
+ b[i] = i * 3;
+ c[i] = i;
+ }
+}
+
int main (void)
{
- main1 (100, 100, 1);
- main1 (0, 15, 0);
+ init_arrays ();
+ main1 (2000, 2000, 1);
+ main1 (0, 1599, 0);
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdarg.h>
#include <stdlib.h>
-#define N 16
+#define N 1600
#define DIFF 121
signed char b[N] = {1,2,3,6,8,10,12,14,16,18,20,22,24,26,28,30};
abort ();
}
+ __attribute__((noinline))
+ void init_arrays ()
+ {
+ int i;
+
+ for (i=16; i<N; i++)
+ {
+ b[i] = 1;
+ c[i] = 1;
+ }
+}
+
int main (void)
{
+ init_arrays();
main1 (100, 100, 1);
main1 (0, 15, 0);
return 0;
}
+
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdarg.h>
#include <stdlib.h>
-#define N 16
+#define N 1600
#define DIFF 242
short b[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
abort ();
}
+ __attribute__((noinline))
+ void init_arrays ()
+ {
+ int i;
+
+ for (i=16; i<N; i++)
+ {
+ b[i] = 1;
+ c[i] = 1;
+ }
+}
+
int main (void)
{
+ init_arrays();
main1 (100, 100, 1);
main1 (0, 15, 0);
return 0;
}
+
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdarg.h>
#include <stdlib.h>
-#define N 16
+#define N 1600
-unsigned int ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
-unsigned int uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+unsigned int ub[N];
+unsigned int uc[N];
/* Reduction of unsigned-int. */
return 0;
}
+__attribute__((noinline))
+void init_arrays ()
+{
+ int i;
+
+ for (i=0; i<N; i++)
+ {
+ ub[i] = i * 3;
+ uc[i] = i;
+ }
+}
+
int main (void)
{
- main1 (N, 240);
- main1 (N-1, 210);
+ init_arrays ();
+ main1 (N, 2558400);
+ main1 (N-1, 2555202);
return 0;
}
+
/* { dg-final { scan-tree-dump-times "Detected reduction" 1 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdarg.h>
#include <stdlib.h>
-#define N 16
-#define DIFF 242
+#define N 1600
+#define DIFF 2558402
+
__attribute__ ((noinline))
int main1 (float x, float max_result)
{
int i;
- float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
- float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ float b[N];
+ float c[N];
float diff = 2;
float max = x;
float min = 10;
+ for (i=0; i<N; i++)
+ {
+ b[i] = i * 3;
+ c[i] = i;
+ }
+
for (i = 0; i < N; i++) {
diff += (b[i] - c[i]);
}
int main (void)
{
- main1 (100 ,100);
- main1 (0, 15);
+ main1 (2000, 2000);
+ main1 (0, 1599);
return 0;
}
+
/* need -ffast-math to parallelize these loops. */
/* { dg-final { scan-tree-dump-times "Detected reduction" 0 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 0 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "FAILED: it is not a part of reduction" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdlib.h>
-#define N 32
+#define N 3200
extern void abort (void);
typedef unsigned char T;
int main (void)
{
- static unsigned char const A[N] = {
+ static unsigned char A[N] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f
};
- static unsigned char const B[N] = {
+ static unsigned char B[N] = {
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
};
- static unsigned char const C[N] = {
+ static unsigned char C[N] = {
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
};
-
+ int i;
+
+ for (i=32; i<N; i++)
+ {
+ A[i]= 0x01;
+ B[i]= 0x70;
+ C[i]= 0xff;
+ }
+
testmin (A, 10, 1);
testmin (B, 0x7f, 0x70);
testmin (C, 0x7f, 0x09);
return 0;
}
+
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdlib.h>
-#define N 32
+#define N 3200
extern void abort (void);
typedef signed char T;
int main (void)
{
- static signed char const A[N] = {
+ static signed char A[N] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f
};
- static signed char const B[N] = {
+ static signed char B[N] = {
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
};
- static signed char const C[N] = {
+ static signed char C[N] = {
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
};
+ int i;
+
+ for (i=32; i<N; i++)
+ {
+ A[i]= 0x01;
+ B[i]= 0x70;
+ C[i]= 0xff;
+ }
+
testmin (A, 0, 0);
testmin (B, 0, 0x80);
testmin (C, 0, 0x80);
return 0;
}
+
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
#include <stdlib.h>
-#define N 32
+#define N 3200
extern void abort (void);
typedef unsigned short T;
int main (void)
{
- static unsigned short const A[N] = {
+ static unsigned short A[N] = {
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010,
0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700,
0x7ff8, 0x7ff9, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffd, 0x7ffe, 0x7fff
};
- static unsigned short const B[N] = {
+ static unsigned short B[N] = {
0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700,
0x7ff8, 0x7ff9, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffd, 0x7ffe, 0x7fff,
0x8000, 0x8001, 0x8002, 0x8003, 0x8004, 0x8005, 0x8006, 0x8007,
0x8008, 0x8009, 0x800a, 0x800b, 0x800c, 0x800d, 0x800e, 0x800f
};
- static unsigned short const C[N] = {
+ static unsigned short C[N] = {
0xffff, 0xfffe, 0xfffd, 0xfffc, 0xfffb, 0xfffa, 0xfff9, 0xfff8,
0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010,
0x8000, 0x8001, 0x8002, 0x8003, 0x8004, 0x8005, 0x8006, 0x8007,
0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700,
};
+ int i;
+
+ for (i=32; i<N; i++)
+ {
+ A[i]= 0x0001;
+ B[i]= 0x7000;
+ C[i]= 0xffff;
+ }
+
testmin (A, 10, 1);
testmin (B, 0x7fff, 0x7000);
testmin (C, 0x7fff, 0x0009);
return 0;
}
+
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
/* FIXME: Bypass this check as graphite doesn't update the
count and frequency correctly now. */
if (!flag_loop_parallelize_all
- && (expected_loop_iterations (loop) <= n_threads
+ && (estimated_loop_iterations_int (loop, false)<= n_threads * MIN_PER_THREAD
/* Do not bother with loops in cold areas. */
|| optimize_loop_nest_for_size_p (loop)))
continue;