From 1542d97a4ed360e4874afc04a6d5e8b31c0ce3e3 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 8 Sep 2011 07:58:54 +0200 Subject: [PATCH] re PR target/50310 (ICE: in gen_vcondv2div2df, at config/i386/sse.md:1435 with -O -ftree-vectorize and __builtin_isunordered()) PR target/50310 * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Return code early if TARGET_AVX. (ix86_expand_fp_vcond): Handle LTGT and UNEQ. * gcc.c-torture/execute/ieee/pr50310.c: New test. * gcc.dg/pr50310-2.c: New test. From-SVN: r178673 --- gcc/ChangeLog | 7 ++ gcc/config/i386/i386.c | 32 +++++++- gcc/testsuite/ChangeLog | 6 ++ .../gcc.c-torture/execute/ieee/pr50310.c | 73 +++++++++++++++++++ gcc/testsuite/gcc.dg/pr50310-2.c | 47 ++++++++++++ 5 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c create mode 100644 gcc/testsuite/gcc.dg/pr50310-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3f79363feaf..6a96c334a2b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2011-09-08 Jakub Jelinek + + PR target/50310 + * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Return + code early if TARGET_AVX. + (ix86_expand_fp_vcond): Handle LTGT and UNEQ. + 2011-09-07 Jakub Jelinek * config/i386/sse.md (sseinsnmode): Remove 32-byte integer vector diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ff8c49f0bf4..7bdab73011c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -18318,6 +18318,11 @@ ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, { rtx tmp; + /* AVX supports all the needed comparisons, no need to swap arguments + nor help reload. */ + if (TARGET_AVX) + return code; + switch (code) { case LTGT: @@ -18573,7 +18578,32 @@ ix86_expand_fp_vcond (rtx operands[]) code = ix86_prepare_sse_fp_compare_args (operands[0], code, &operands[4], &operands[5]); if (code == UNKNOWN) - return false; + { + rtx temp; + switch (GET_CODE (operands[3])) + { + case LTGT: + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], + operands[5], operands[1], operands[2]); + code = AND; + break; + case UNEQ: + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], + operands[5], operands[1], operands[2]); + code = IOR; + break; + default: + gcc_unreachable (); + } + cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, + OPTAB_DIRECT); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; + } if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], operands[5], operands[1], operands[2])) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5189d62c7bf..364d0dcf996 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2011-09-08 Jakub Jelinek + + PR target/50310 + * gcc.c-torture/execute/ieee/pr50310.c: New test. + * gcc.dg/pr50310-2.c: New test. + 2011-09-07 Janus Weil PR fortran/48095 diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c b/gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c new file mode 100644 index 00000000000..8d323ca78f6 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c @@ -0,0 +1,73 @@ +/* PR target/50310 */ + +extern void abort (void); +double s1[4], s2[4], s3[64]; + +void +foo (void) +{ + int i; + for (i = 0; i < 4; i++) + s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[13 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[15 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0; +} + +int +main () +{ + int i; + s1[0] = 5.0; + s1[1] = 6.0; + s1[2] = 5.0; + s1[3] = __builtin_nan (""); + s2[0] = 6.0; + s2[1] = 5.0; + s2[2] = 5.0; + s2[3] = 5.0; + asm volatile ("" : : : "memory"); + foo (); + asm volatile ("" : : : "memory"); + for (i = 0; i < 16 * 4; i++) + if (i >= 12 * 4 && (i & 3) == 3) + { + if (s3[i] != 0.0) abort (); + } + else + { + static int masks[] = { 2, 2|4, 1, 1|4, 1|2, 8, 2, 1 }; + if (s3[i] + != (((1 << (i & 3)) & ((i & 4) ? ~masks[i / 8] : masks[i / 8])) + ? -1.0 : 0.0)) + abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr50310-2.c b/gcc/testsuite/gcc.dg/pr50310-2.c new file mode 100644 index 00000000000..29621729e59 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr50310-2.c @@ -0,0 +1,47 @@ +/* PR target/50310 */ +/* { dg-do run } */ +/* { dg-options "-O3" } */ +/* { dg-options "-O3 -mavx" { target avx_runtime } } */ + +double s1[4], s2[4], s3[64]; + +int +main (void) +{ + int i; + asm volatile ("" : : : "memory"); + for (i = 0; i < 4; i++) + s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[13 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0; + for (i = 0; i < 4; i++) + s3[15 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0; + asm volatile ("" : : : "memory"); + return 0; +} -- 2.30.2