From: liuhongt Date: Thu, 7 Jan 2021 02:15:33 +0000 (+0800) Subject: Fix ICE: Don't generate integer mask comparision for 128/256-bits vector when op_true... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8d0737d8f4b10bffe0411507ad2dc21ba7679883;p=gcc.git Fix ICE: Don't generate integer mask comparision for 128/256-bits vector when op_true/op_false are NULL or constm1_rtx/const0_rtx [PR98537] avx512vl-pr92686-vpcmp-{1,2,intelasm-1}.c are used to guard code generation of integer mask comparison, but for vector comparison to vector dest, integer mask comparison is disliked, so delete these useless tests. gcc/ChangeLog: PR target/98537 * config/i386/i386-expand.c (ix86_expand_sse_cmp): Don't generate integer mask comparison for 128/256-bits vector when op_true/op_false is NULL_RTX or CONSTM1_RTX/CONST0_RTX. Also delete redundant !maskcmp condition. (ix86_expand_int_vec_cmp): Ditto but no redundant deletion here. (ix86_expand_sse_movcc): Delete definition of maskcmp, add the condition directly to if (maskcmp), add extra check for cmpmode, it should be MODE_INT. (ix86_expand_fp_vec_cmp): Pass NULL to ix86_expand_sse_cmp's parameters op_true/op_false. (ix86_use_mask_cmp_p): New. gcc/testsuite/ChangeLog: PR target/98537 * g++.target/i386/avx512bw-pr98537-1.C: New test. * g++.target/i386/avx512vl-pr98537-1.C: New test. * g++.target/i386/avx512vl-pr98537-2.C: New test. * gcc.target/i386/avx512vl-pr88547-1.c: Adjust testcase, integer mask comparison should not be generated. * gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Remove. * gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto. * gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c: Ditto. --- diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index d64b4acc7dc..02d314226d1 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -3469,6 +3469,33 @@ ix86_valid_mask_cmp_mode (machine_mode mode) return vector_size == 64 || TARGET_AVX512VL; } +/* Return true if integer mask comparison should be used. */ +static bool +ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode, + rtx op_true, rtx op_false) +{ + if (GET_MODE_SIZE (mode) == 64) + return true; + + /* When op_true is NULL, op_false must be NULL, or vice versa. */ + gcc_assert (!op_true == !op_false); + + /* When op_true/op_false is NULL or cmp_mode is not valid mask cmp mode, + vector dest is required. */ + if (!op_true || !ix86_valid_mask_cmp_mode (cmp_mode)) + return false; + + /* Exclude those that could be optimized in ix86_expand_sse_movcc. */ + if (op_false == CONST0_RTX (mode) + || op_true == CONST0_RTX (mode) + || (INTEGRAL_MODE_P (mode) + && (op_true == CONSTM1_RTX (mode) + || op_false == CONSTM1_RTX (mode)))) + return false; + + return true; +} + /* Expand an SSE comparison. Return the register with the result. */ static rtx @@ -3485,7 +3512,7 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, bool maskcmp = false; rtx x; - if (ix86_valid_mask_cmp_mode (cmp_ops_mode)) + if (ix86_use_mask_cmp_p (mode, cmp_ops_mode, op_true, op_false)) { unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode); maskcmp = true; @@ -3517,7 +3544,7 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1); - if (cmp_mode != mode && !maskcmp) + if (cmp_mode != mode) { x = force_reg (cmp_ops_mode, x); convert_move (dest, x, false); @@ -3544,9 +3571,6 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) return; } - /* In AVX512F the result of comparison is an integer mask. */ - bool maskcmp = mode != cmpmode && ix86_valid_mask_cmp_mode (mode); - rtx t2, t3, x; /* If we have an integer mask and FP value then we need @@ -3557,8 +3581,11 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) cmp = gen_rtx_SUBREG (mode, cmp, 0); } - if (maskcmp) + /* In AVX512F the result of comparison is an integer mask. */ + if (mode != cmpmode + && GET_MODE_CLASS (cmpmode) == MODE_INT) { + gcc_assert (ix86_valid_mask_cmp_mode (mode)); /* Using vector move with mask register. */ cmp = force_reg (cmpmode, cmp); /* Optimize for mask zero. */ @@ -4016,7 +4043,7 @@ ix86_expand_fp_vec_cmp (rtx operands[]) } else cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], - operands[1], operands[2]); + NULL, NULL); if (operands[0] != cmp) emit_move_insn (operands[0], cmp); @@ -4041,7 +4068,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, ; /* AVX512F supports all of the comparsions on all 128/256/512-bit vector int types. */ - else if (ix86_valid_mask_cmp_mode (mode)) + else if (ix86_use_mask_cmp_p (data_mode, mode, op_true, op_false)) ; else { diff --git a/gcc/testsuite/g++.target/i386/avx512bw-pr98537-1.C b/gcc/testsuite/g++.target/i386/avx512bw-pr98537-1.C new file mode 100644 index 00000000000..969684a222b --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512bw-pr98537-1.C @@ -0,0 +1,11 @@ +/* PR target/98537 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -std=c++11" } */ + +#define TYPEV char +#define TYPEW short + +#define T_ARR \ + __attribute__ ((target ("avx512vl,avx512bw"))) + +#include "avx512vl-pr98537-1.C" diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr98537-1.C b/gcc/testsuite/g++.target/i386/avx512vl-pr98537-1.C new file mode 100644 index 00000000000..b2ba91111da --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512vl-pr98537-1.C @@ -0,0 +1,40 @@ +/* PR target/98537 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -std=c++11" } */ + +#ifndef TYPEV +#define TYPEV int +#endif + +#ifndef TYPEW +#define TYPEW long long +#endif + +#ifndef T_ARR +#define T_ARR \ + __attribute__ ((target ("avx512vl"))) +#endif + +typedef TYPEV V __attribute__((__vector_size__(32))); +typedef TYPEW W __attribute__((__vector_size__(32))); + +W c, d; +struct B {}; +B e; +struct C { W i; }; +void foo (C); + +C +operator== (B, B) +{ + W r = (V)c == (V)d; + return {r}; +} + +void +T_ARR +bar () +{ + B a; + foo (a == e); +} diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr98537-2.C b/gcc/testsuite/g++.target/i386/avx512vl-pr98537-2.C new file mode 100644 index 00000000000..42c9682746d --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512vl-pr98537-2.C @@ -0,0 +1,8 @@ +/* PR target/98537 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -std=c++11" } */ + +#define TYPEV float +#define TYPEW double + +#include "avx512vl-pr98537-1.C" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c index a3ffeca4354..af15a6364a4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c @@ -1,12 +1,14 @@ /* PR target/88547 */ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-xop -mavx512vl -mno-avx512bw -mno-avx512dq" } */ +/* { dg-options "-O2 -mno-xop -mavx512vl -mavx512bw -mavx512dq" } */ /* { dg-final { scan-assembler-not "vpmingt\[bwdq]\[\t ]" } } */ +/* { dg-final { scan-assembler-not "%k\[0-9\]" } } */ /* { dg-final { scan-assembler-times "vpminub\[\t ]" 2 } } */ /* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */ /* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */ /* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "vpcmp\[dq\]\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "vpcmpu\[dq\]\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "vpternlog\[qd\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */ +/* { dg-final { scan-assembler-times "vpminsq\[\t ]" 2 } } */ #include "avx2-pr88547-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c deleted file mode 100644 index 5b79d4d36f9..00000000000 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c +++ /dev/null @@ -1,112 +0,0 @@ -/* PR target/88547 */ -/* { dg-do compile } */ -/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop" } */ -/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ -/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ -/* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */ -/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[\t ]" 8 } } */ - -typedef signed char v32qi __attribute__((vector_size(32))); -typedef unsigned char v32uqi __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef unsigned short v16uhi __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef unsigned v8usi __attribute__((vector_size(32))); -typedef long long v4di __attribute__((vector_size(32))); -typedef unsigned long long v4udi __attribute__((vector_size(32))); - -__attribute__((noipa)) v32qi -f1 (v32qi x, v32qi y) -{ - return x >= y; -} - -__attribute__((noipa)) v32uqi -f2 (v32uqi x, v32uqi y) -{ - return x >= y; -} - -__attribute__((noipa)) v32qi -f3 (v32qi x, v32qi y) -{ - return x <= y; -} - -__attribute__((noipa)) v32uqi -f4 (v32uqi x, v32uqi y) -{ - return x <= y; -} - -__attribute__((noipa)) v16hi -f5 (v16hi x, v16hi y) -{ - return x >= y; -} - -__attribute__((noipa)) v16uhi -f6 (v16uhi x, v16uhi y) -{ - return x >= y; -} - -__attribute__((noipa)) v16hi -f7 (v16hi x, v16hi y) -{ - return x <= y; -} - -__attribute__((noipa)) v16uhi -f8 (v16uhi x, v16uhi y) -{ - return x <= y; -} - -__attribute__((noipa)) v8si -f9 (v8si x, v8si y) -{ - return x >= y; -} - -__attribute__((noipa)) v8usi -f10 (v8usi x, v8usi y) -{ - return x >= y; -} - -__attribute__((noipa)) v8si -f11 (v8si x, v8si y) -{ - return x <= y; -} - -__attribute__((noipa)) v8usi -f12 (v8usi x, v8usi y) -{ - return x <= y; -} - -__attribute__((noipa)) v4di -f13 (v4di x, v4di y) -{ - return x >= y; -} - -__attribute__((noipa)) v4udi -f14 (v4udi x, v4udi y) -{ - return x >= y; -} - -__attribute__((noipa)) v4di -f15 (v4di x, v4di y) -{ - return x <= y; -} - -__attribute__((noipa)) v4udi -f16 (v4udi x, v4udi y) -{ - return x <= y; -} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c deleted file mode 100644 index 6be24ff30f4..00000000000 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c +++ /dev/null @@ -1,91 +0,0 @@ -/* { dg-do run } */ -/* { dg-require-effective-target avx512bw } */ -/* { dg-require-effective-target avx512vl } */ -/* { dg-options "-O2 -mavx512bw -mavx512vl" } */ - -#ifndef CHECK -#define CHECK "avx512f-helper.h" -#endif - -#include CHECK - -#ifndef TEST -#define TEST avx512vl_test -#endif - -#include "avx512vl-pr92686-vpcmp-1.c" - -#define NUM 256 - -#define TEST_SIGNED(vtype, type, N, fn, op) \ -do \ - { \ - union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ - int i, sign = 1; \ - type res; \ - for (i = 0; i < NUM; i++) \ - { \ - src1.i[i] = i * i * sign; \ - src2.i[i] = (i + 20) * sign; \ - sign = -sign; \ - } \ - for (i = 0; i < NUM; i += N) \ - dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ - \ - for (i = 0; i < NUM; i++) \ - { \ - res = src1.i[i] op src2.i[i] ? -1 : 0; \ - if (res != dst.i[i]) \ - abort (); \ - } \ - } \ -while (0) - -#define TEST_UNSIGNED(vtype, type, N, fn, op) \ -do \ - { \ - union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ - int i; \ - type res; \ - \ - for (i = 0; i < NUM; i++) \ - { \ - src1.i[i] = i * i; \ - src2.i[i] = i + 20; \ - if ((i % 4)) \ - src2.i[i] |= (1ULL << (sizeof (type) \ - * __CHAR_BIT__ - 1)); \ - } \ - \ - for (i = 0; i < NUM; i += N) \ - dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ - \ - for (i = 0; i < NUM; i++) \ - { \ - res = src1.i[i] op src2.i[i] ? -1 : 0; \ - if (res != dst.i[i]) \ - abort (); \ - } \ - } \ -while (0) - -static void -TEST (void) -{ - TEST_SIGNED (v32qi, signed char, 32, f1, >=); - TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, >=); - TEST_SIGNED (v32qi, signed char, 32, f3, <=); - TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, <=); - TEST_SIGNED (v16hi, short int, 16, f5, >=); - TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, >=); - TEST_SIGNED (v16hi, short int, 16, f7, <=); - TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, <=); - TEST_SIGNED (v8si, int, 8, f9, >=); - TEST_UNSIGNED (v8usi, unsigned int, 8, f10, >=); - TEST_SIGNED (v8si, int, 8, f11, <=); - TEST_UNSIGNED (v8usi, unsigned int, 8, f12, <=); - TEST_SIGNED (v4di, long long int, 4, f13, >=); - TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, >=); - TEST_SIGNED (v4di, long long int, 4, f15, <=); - TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, <=); -} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c deleted file mode 100644 index 907386db08b..00000000000 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c +++ /dev/null @@ -1,111 +0,0 @@ -/* PR target/88547 */ -/* { dg-do assemble } */ -/* { dg-require-effective-target masm_intel } */ -/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop -masm=intel" } */ -/* { dg-require-effective-target avx512bw } */ -/* { dg-require-effective-target avx512vl } */ - -typedef signed char v32qi __attribute__((vector_size(32))); -typedef unsigned char v32uqi __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef unsigned short v16uhi __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef unsigned v8usi __attribute__((vector_size(32))); -typedef long long v4di __attribute__((vector_size(32))); -typedef unsigned long long v4udi __attribute__((vector_size(32))); - -__attribute__((noipa)) v32qi -f1 (v32qi x, v32qi y) -{ - return x >= y; -} - -__attribute__((noipa)) v32uqi -f2 (v32uqi x, v32uqi y) -{ - return x >= y; -} - -__attribute__((noipa)) v32qi -f3 (v32qi x, v32qi y) -{ - return x <= y; -} - -__attribute__((noipa)) v32uqi -f4 (v32uqi x, v32uqi y) -{ - return x <= y; -} - -__attribute__((noipa)) v16hi -f5 (v16hi x, v16hi y) -{ - return x >= y; -} - -__attribute__((noipa)) v16uhi -f6 (v16uhi x, v16uhi y) -{ - return x >= y; -} - -__attribute__((noipa)) v16hi -f7 (v16hi x, v16hi y) -{ - return x <= y; -} - -__attribute__((noipa)) v16uhi -f8 (v16uhi x, v16uhi y) -{ - return x <= y; -} - -__attribute__((noipa)) v8si -f9 (v8si x, v8si y) -{ - return x >= y; -} - -__attribute__((noipa)) v8usi -f10 (v8usi x, v8usi y) -{ - return x >= y; -} - -__attribute__((noipa)) v8si -f11 (v8si x, v8si y) -{ - return x <= y; -} - -__attribute__((noipa)) v8usi -f12 (v8usi x, v8usi y) -{ - return x <= y; -} - -__attribute__((noipa)) v4di -f13 (v4di x, v4di y) -{ - return x >= y; -} - -__attribute__((noipa)) v4udi -f14 (v4udi x, v4udi y) -{ - return x >= y; -} - -__attribute__((noipa)) v4di -f15 (v4di x, v4di y) -{ - return x <= y; -} - -__attribute__((noipa)) v4udi -f16 (v4udi x, v4udi y) -{ - return x <= y; -}