return vector_size == 64 || TARGET_AVX512VL;
}
+/* Return true if integer mask comparison should be used. */
+static bool
+ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode,
+ rtx op_true, rtx op_false)
+{
+ if (GET_MODE_SIZE (mode) == 64)
+ return true;
+
+ /* When op_true is NULL, op_false must be NULL, or vice versa. */
+ gcc_assert (!op_true == !op_false);
+
+ /* When op_true/op_false is NULL or cmp_mode is not valid mask cmp mode,
+ vector dest is required. */
+ if (!op_true || !ix86_valid_mask_cmp_mode (cmp_mode))
+ return false;
+
+ /* Exclude those that could be optimized in ix86_expand_sse_movcc. */
+ if (op_false == CONST0_RTX (mode)
+ || op_true == CONST0_RTX (mode)
+ || (INTEGRAL_MODE_P (mode)
+ && (op_true == CONSTM1_RTX (mode)
+ || op_false == CONSTM1_RTX (mode))))
+ return false;
+
+ return true;
+}
+
/* Expand an SSE comparison. Return the register with the result. */
static rtx
bool maskcmp = false;
rtx x;
- if (ix86_valid_mask_cmp_mode (cmp_ops_mode))
+ if (ix86_use_mask_cmp_p (mode, cmp_ops_mode, op_true, op_false))
{
unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
maskcmp = true;
x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
- if (cmp_mode != mode && !maskcmp)
+ if (cmp_mode != mode)
{
x = force_reg (cmp_ops_mode, x);
convert_move (dest, x, false);
return;
}
- /* In AVX512F the result of comparison is an integer mask. */
- bool maskcmp = mode != cmpmode && ix86_valid_mask_cmp_mode (mode);
-
rtx t2, t3, x;
/* If we have an integer mask and FP value then we need
cmp = gen_rtx_SUBREG (mode, cmp, 0);
}
- if (maskcmp)
+ /* In AVX512F the result of comparison is an integer mask. */
+ if (mode != cmpmode
+ && GET_MODE_CLASS (cmpmode) == MODE_INT)
{
+ gcc_assert (ix86_valid_mask_cmp_mode (mode));
/* Using vector move with mask register. */
cmp = force_reg (cmpmode, cmp);
/* Optimize for mask zero. */
}
else
cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
- operands[1], operands[2]);
+ NULL, NULL);
if (operands[0] != cmp)
emit_move_insn (operands[0], cmp);
;
/* AVX512F supports all of the comparsions
on all 128/256/512-bit vector int types. */
- else if (ix86_valid_mask_cmp_mode (mode))
+ else if (ix86_use_mask_cmp_p (data_mode, mode, op_true, op_false))
;
else
{
--- /dev/null
+/* PR target/98537 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -std=c++11" } */
+
+#define TYPEV char
+#define TYPEW short
+
+#define T_ARR \
+ __attribute__ ((target ("avx512vl,avx512bw")))
+
+#include "avx512vl-pr98537-1.C"
--- /dev/null
+/* PR target/98537 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -std=c++11" } */
+
+#ifndef TYPEV
+#define TYPEV int
+#endif
+
+#ifndef TYPEW
+#define TYPEW long long
+#endif
+
+#ifndef T_ARR
+#define T_ARR \
+ __attribute__ ((target ("avx512vl")))
+#endif
+
+typedef TYPEV V __attribute__((__vector_size__(32)));
+typedef TYPEW W __attribute__((__vector_size__(32)));
+
+W c, d;
+struct B {};
+B e;
+struct C { W i; };
+void foo (C);
+
+C
+operator== (B, B)
+{
+ W r = (V)c == (V)d;
+ return {r};
+}
+
+void
+T_ARR
+bar ()
+{
+ B a;
+ foo (a == e);
+}
--- /dev/null
+/* PR target/98537 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -std=c++11" } */
+
+#define TYPEV float
+#define TYPEW double
+
+#include "avx512vl-pr98537-1.C"
/* PR target/88547 */
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-xop -mavx512vl -mno-avx512bw -mno-avx512dq" } */
+/* { dg-options "-O2 -mno-xop -mavx512vl -mavx512bw -mavx512dq" } */
/* { dg-final { scan-assembler-not "vpmingt\[bwdq]\[\t ]" } } */
+/* { dg-final { scan-assembler-not "%k\[0-9\]" } } */
/* { dg-final { scan-assembler-times "vpminub\[\t ]" 2 } } */
/* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */
/* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */
/* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpcmp\[dq\]\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpcmpu\[dq\]\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpternlog\[qd\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsq\[\t ]" 2 } } */
#include "avx2-pr88547-1.c"
+++ /dev/null
-/* PR target/88547 */
-/* { dg-do compile } */
-/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop" } */
-/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
-/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
-/* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */
-/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[\t ]" 8 } } */
-
-typedef signed char v32qi __attribute__((vector_size(32)));
-typedef unsigned char v32uqi __attribute__((vector_size(32)));
-typedef short v16hi __attribute__((vector_size(32)));
-typedef unsigned short v16uhi __attribute__((vector_size(32)));
-typedef int v8si __attribute__((vector_size(32)));
-typedef unsigned v8usi __attribute__((vector_size(32)));
-typedef long long v4di __attribute__((vector_size(32)));
-typedef unsigned long long v4udi __attribute__((vector_size(32)));
-
-__attribute__((noipa)) v32qi
-f1 (v32qi x, v32qi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v32uqi
-f2 (v32uqi x, v32uqi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v32qi
-f3 (v32qi x, v32qi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v32uqi
-f4 (v32uqi x, v32uqi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v16hi
-f5 (v16hi x, v16hi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v16uhi
-f6 (v16uhi x, v16uhi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v16hi
-f7 (v16hi x, v16hi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v16uhi
-f8 (v16uhi x, v16uhi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v8si
-f9 (v8si x, v8si y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v8usi
-f10 (v8usi x, v8usi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v8si
-f11 (v8si x, v8si y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v8usi
-f12 (v8usi x, v8usi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v4di
-f13 (v4di x, v4di y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v4udi
-f14 (v4udi x, v4udi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v4di
-f15 (v4di x, v4di y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v4udi
-f16 (v4udi x, v4udi y)
-{
- return x <= y;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-require-effective-target avx512bw } */
-/* { dg-require-effective-target avx512vl } */
-/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
-
-#ifndef CHECK
-#define CHECK "avx512f-helper.h"
-#endif
-
-#include CHECK
-
-#ifndef TEST
-#define TEST avx512vl_test
-#endif
-
-#include "avx512vl-pr92686-vpcmp-1.c"
-
-#define NUM 256
-
-#define TEST_SIGNED(vtype, type, N, fn, op) \
-do \
- { \
- union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \
- int i, sign = 1; \
- type res; \
- for (i = 0; i < NUM; i++) \
- { \
- src1.i[i] = i * i * sign; \
- src2.i[i] = (i + 20) * sign; \
- sign = -sign; \
- } \
- for (i = 0; i < NUM; i += N) \
- dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \
- \
- for (i = 0; i < NUM; i++) \
- { \
- res = src1.i[i] op src2.i[i] ? -1 : 0; \
- if (res != dst.i[i]) \
- abort (); \
- } \
- } \
-while (0)
-
-#define TEST_UNSIGNED(vtype, type, N, fn, op) \
-do \
- { \
- union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \
- int i; \
- type res; \
- \
- for (i = 0; i < NUM; i++) \
- { \
- src1.i[i] = i * i; \
- src2.i[i] = i + 20; \
- if ((i % 4)) \
- src2.i[i] |= (1ULL << (sizeof (type) \
- * __CHAR_BIT__ - 1)); \
- } \
- \
- for (i = 0; i < NUM; i += N) \
- dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \
- \
- for (i = 0; i < NUM; i++) \
- { \
- res = src1.i[i] op src2.i[i] ? -1 : 0; \
- if (res != dst.i[i]) \
- abort (); \
- } \
- } \
-while (0)
-
-static void
-TEST (void)
-{
- TEST_SIGNED (v32qi, signed char, 32, f1, >=);
- TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, >=);
- TEST_SIGNED (v32qi, signed char, 32, f3, <=);
- TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, <=);
- TEST_SIGNED (v16hi, short int, 16, f5, >=);
- TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, >=);
- TEST_SIGNED (v16hi, short int, 16, f7, <=);
- TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, <=);
- TEST_SIGNED (v8si, int, 8, f9, >=);
- TEST_UNSIGNED (v8usi, unsigned int, 8, f10, >=);
- TEST_SIGNED (v8si, int, 8, f11, <=);
- TEST_UNSIGNED (v8usi, unsigned int, 8, f12, <=);
- TEST_SIGNED (v4di, long long int, 4, f13, >=);
- TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, >=);
- TEST_SIGNED (v4di, long long int, 4, f15, <=);
- TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, <=);
-}
+++ /dev/null
-/* PR target/88547 */
-/* { dg-do assemble } */
-/* { dg-require-effective-target masm_intel } */
-/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop -masm=intel" } */
-/* { dg-require-effective-target avx512bw } */
-/* { dg-require-effective-target avx512vl } */
-
-typedef signed char v32qi __attribute__((vector_size(32)));
-typedef unsigned char v32uqi __attribute__((vector_size(32)));
-typedef short v16hi __attribute__((vector_size(32)));
-typedef unsigned short v16uhi __attribute__((vector_size(32)));
-typedef int v8si __attribute__((vector_size(32)));
-typedef unsigned v8usi __attribute__((vector_size(32)));
-typedef long long v4di __attribute__((vector_size(32)));
-typedef unsigned long long v4udi __attribute__((vector_size(32)));
-
-__attribute__((noipa)) v32qi
-f1 (v32qi x, v32qi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v32uqi
-f2 (v32uqi x, v32uqi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v32qi
-f3 (v32qi x, v32qi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v32uqi
-f4 (v32uqi x, v32uqi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v16hi
-f5 (v16hi x, v16hi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v16uhi
-f6 (v16uhi x, v16uhi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v16hi
-f7 (v16hi x, v16hi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v16uhi
-f8 (v16uhi x, v16uhi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v8si
-f9 (v8si x, v8si y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v8usi
-f10 (v8usi x, v8usi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v8si
-f11 (v8si x, v8si y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v8usi
-f12 (v8usi x, v8usi y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v4di
-f13 (v4di x, v4di y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v4udi
-f14 (v4udi x, v4udi y)
-{
- return x >= y;
-}
-
-__attribute__((noipa)) v4di
-f15 (v4di x, v4di y)
-{
- return x <= y;
-}
-
-__attribute__((noipa)) v4udi
-f16 (v4udi x, v4udi y)
-{
- return x <= y;
-}