From: Vladimir Makarov Date: Fri, 12 Jan 2018 17:00:36 +0000 (+0000) Subject: re PR rtl-optimization/80481 (Unoptimal additional copy instructions) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0550a77b6e45a2a1c7da160ea32c518e25d5ca97;p=gcc.git re PR rtl-optimization/80481 (Unoptimal additional copy instructions) 2018-01-12 Vladimir Makarov PR rtl-optimization/80481 * ira-color.c (get_cap_member): New function. (allocnos_conflict_by_live_ranges_p): Use it. (slot_coalesced_allocno_live_ranges_intersect_p): Add assert. (setup_slot_coalesced_allocno_live_ranges): Ditto. 2018-01-12 Vladimir Makarov PR rtl-optimization/80481 * g++.dg/pr80481.C: New. From-SVN: r256590 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 05cc049abc4..7483549f80c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2018-01-12 Vladimir Makarov + + PR rtl-optimization/80481 + * ira-color.c (get_cap_member): New function. + (allocnos_conflict_by_live_ranges_p): Use it. + (slot_coalesced_allocno_live_ranges_intersect_p): Add assert. + (setup_slot_coalesced_allocno_live_ranges): Ditto. + 2018-01-12 Uros Bizjak PR target/83628 diff --git a/gcc/ira-color.c b/gcc/ira-color.c index 43f5d57cf3e..c8b6ab4bcdf 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -1905,6 +1905,18 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) /* An array used to sort copies. */ static ira_copy_t *sorted_copies; +/* If allocno A is a cap, return non-cap allocno from which A is + created. Otherwise, return A. */ +static ira_allocno_t +get_cap_member (ira_allocno_t a) +{ + ira_allocno_t member; + + while ((member = ALLOCNO_CAP_MEMBER (a)) != NULL) + a = member; + return a; +} + /* Return TRUE if live ranges of allocnos A1 and A2 intersect. It is used to find a conflict for new allocnos or allocnos with the different allocno classes. */ @@ -1924,6 +1936,10 @@ allocnos_conflict_by_live_ranges_p (ira_allocno_t a1, ira_allocno_t a2) && ORIGINAL_REGNO (reg1) == ORIGINAL_REGNO (reg2)) return false; + /* We don't keep live ranges for caps because they can be quite big. + Use ranges of non-cap allocno from which caps are created. */ + a1 = get_cap_member (a1); + a2 = get_cap_member (a2); for (i = 0; i < n1; i++) { ira_object_t c1 = ALLOCNO_OBJECT (a1, i); @@ -4027,7 +4043,7 @@ slot_coalesced_allocno_live_ranges_intersect_p (ira_allocno_t allocno, int n) { int i; int nr = ALLOCNO_NUM_OBJECTS (a); - + gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL); for (i = 0; i < nr; i++) { ira_object_t obj = ALLOCNO_OBJECT (a, i); @@ -4057,6 +4073,7 @@ setup_slot_coalesced_allocno_live_ranges (ira_allocno_t allocno) a = ALLOCNO_COALESCE_DATA (a)->next) { int nr = ALLOCNO_NUM_OBJECTS (a); + gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL); for (i = 0; i < nr; i++) { ira_object_t obj = ALLOCNO_OBJECT (a, i); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 64e14f283d5..b843cf6356f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-01-12 Vladimir Makarov + + PR rtl-optimization/80481 + * g++.dg/pr80481.C: New. + 2018-01-12 Uros Bizjak PR target/83628 diff --git a/gcc/testsuite/g++.dg/pr80481.C b/gcc/testsuite/g++.dg/pr80481.C new file mode 100644 index 00000000000..316da0fdbfb --- /dev/null +++ b/gcc/testsuite/g++.dg/pr80481.C @@ -0,0 +1,70 @@ +// { dg-do compile { target i?86-*-* x86_64-*-* } } +// { dg-options "-Ofast -funroll-loops -fopenmp -march=knl" } +// { dg-final { scan-assembler-not "vmovaps" } } + +#include + +#include + +#define max(a, b) ( (a) > (b) ? (a) : (b) ) + +struct Sdata { + float w; + float s; + float r; + float t; + float v; +}; + extern int N1, N2, N3; + +#define func(p, up, down) ((p)*(up) + (1.0f-(p)) * (down)) + +void foo (Sdata *in, int idx, float *out) +{ + float* y1 = (float*)_mm_malloc(sizeof(float) * N1,16); + float* y2 = (float*)_mm_malloc(sizeof(float) * N1,16); + float* y3 = (float*)_mm_malloc(sizeof(float) * N1,16); + float* y4 = (float*)_mm_malloc(sizeof(float) * N1,16); + + for (int k = idx; k < idx + N3; k++) { + float x1 = in[k].r; + float x2 = in[k].s; + float x3 = in[k].w; + float x4 = in[k].v; + float x5 = in[k].t; + x5 /= N2; + float u = exp(x4 * sqrt(x5)); + float d = exp(-x4 * sqrt(x5)); + float a = exp(x1 * x5); + float m = exp(-x1 * x5); + float p = (a - d) / (u - d); + y2[0] = x2; + y3[0] = float(1.f); + for (int i = 1; i <= N2; i++) { + y2[i] = u * y2[i - 1]; + y3[i] = d * y3[i - 1]; + } +#pragma omp simd + for (int i = 0; i <= N2; i++) { + y1[i] = + max((x3 - y2[N2 - i] * y3[i]), float(0.f)); + } + for (int i = N2 - 1; i >= 0; i--) { +#pragma omp simd + for (int j = 0; j <= i; j++) { + y4[j] = func(p,y1[j],y1[j+1]) * m; + } +#pragma omp simd + for (int j = 0; j <= i; j++) { + float t1 = y2[i - j] * y3[j]; + float t2 = max(x3 - t1, float(0.f)); + y1[j] = max(t2, y4[j]); + } + } + out[k] = y1[0]; + } + _mm_free(y1); + _mm_free(y2); + _mm_free(y3); + _mm_free(y4); +}