From 1c4153dd029bb2c325eb2232b123abf940537178 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 14 Oct 2011 21:25:07 +0200 Subject: [PATCH] sse.md (vec_widen_smult_hi_v8hi, [...]): Macroize using VI2_AVX2 mode iterator and any_extend code iterator. * config/i386/sse.md (vec_widen_smult_hi_v8hi, vec_widen_smult_lo_v8hi, vec_widen_umult_hi_v8hi, vec_widen_umult_lo_v8hi): Macroize using VI2_AVX2 mode iterator and any_extend code iterator. (vec_widen_mult_hi_v8si, vec_widen_mult_lo_v8si): New expanders. (vec_widen_smult_hi_v4si, vec_widen_smult_lo_v4si): Enable also for TARGET_SSE4_1 using pmuldq insn. (sdot_prodv8hi): Macroize using VI2_AVX2 iterator. (sse2_sse4_1): New code attr. (udot_prodv4si): Macroize using any_extend code iterator. (dot_prodv8si): New expander. * gcc.target/i386/sse2-mul-1.c: New test. * gcc.target/i386/sse4_1-mul-1.c: New test. * gcc.target/i386/avx-mul-1.c: New test. * gcc.target/i386/xop-mul-1.c: New test. * gcc.target/i386/avx2-mul-1.c: New test. From-SVN: r180005 --- gcc/ChangeLog | 15 ++ gcc/config/i386/sse.md | 234 ++++++++++++------- gcc/testsuite/ChangeLog | 8 + gcc/testsuite/gcc.target/i386/avx-mul-1.c | 13 ++ gcc/testsuite/gcc.target/i386/avx2-mul-1.c | 13 ++ gcc/testsuite/gcc.target/i386/sse2-mul-1.c | 209 +++++++++++++++++ gcc/testsuite/gcc.target/i386/sse4_1-mul-1.c | 13 ++ gcc/testsuite/gcc.target/i386/xop-mul-1.c | 13 ++ 8 files changed, 429 insertions(+), 89 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx-mul-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-mul-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mul-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-mul-1.c create mode 100644 gcc/testsuite/gcc.target/i386/xop-mul-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a7237566d2a..da4707105bd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2011-10-14 Jakub Jelinek + + * config/i386/sse.md (vec_widen_smult_hi_v8hi, + vec_widen_smult_lo_v8hi, vec_widen_umult_hi_v8hi, + vec_widen_umult_lo_v8hi): Macroize using VI2_AVX2 + mode iterator and any_extend code iterator. + (vec_widen_mult_hi_v8si, vec_widen_mult_lo_v8si): New + expanders. + (vec_widen_smult_hi_v4si, vec_widen_smult_lo_v4si): Enable + also for TARGET_SSE4_1 using pmuldq insn. + (sdot_prodv8hi): Macroize using VI2_AVX2 iterator. + (sse2_sse4_1): New code attr. + (udot_prodv4si): Macroize using any_extend code iterator. + (dot_prodv8si): New expander. + 2011-10-14 Yakovlev Vladimir * config/i386/i386.c (atom_cost): Changed cost for loading diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 016eae2d371..ff77003fa61 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5507,83 +5507,97 @@ DONE; }) -(define_expand "vec_widen_smult_hi_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (any_extend: + (match_operand:VI2_AVX2 1 "register_operand" "")) + (match_operand:VI2_AVX2 2 "register_operand" "")] "TARGET_SSE2" { rtx op1, op2, t1, t2, dest; op1 = operands[1]; op2 = operands[2]; - t1 = gen_reg_rtx (V8HImode); - t2 = gen_reg_rtx (V8HImode); - dest = gen_lowpart (V8HImode, operands[0]); + t1 = gen_reg_rtx (mode); + t2 = gen_reg_rtx (mode); + dest = gen_lowpart (mode, operands[0]); - emit_insn (gen_mulv8hi3 (t1, op1, op2)); - emit_insn (gen_smulv8hi3_highpart (t2, op1, op2)); - emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2)); + emit_insn (gen_mul3 (t1, op1, op2)); + emit_insn (gen_mul3_highpart (t2, op1, op2)); + emit_insn (gen_vec_interleave_high (dest, t1, t2)); DONE; }) -(define_expand "vec_widen_smult_lo_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (any_extend: + (match_operand:VI2_AVX2 1 "register_operand" "")) + (match_operand:VI2_AVX2 2 "register_operand" "")] "TARGET_SSE2" { rtx op1, op2, t1, t2, dest; op1 = operands[1]; op2 = operands[2]; - t1 = gen_reg_rtx (V8HImode); - t2 = gen_reg_rtx (V8HImode); - dest = gen_lowpart (V8HImode, operands[0]); + t1 = gen_reg_rtx (mode); + t2 = gen_reg_rtx (mode); + dest = gen_lowpart (mode, operands[0]); - emit_insn (gen_mulv8hi3 (t1, op1, op2)); - emit_insn (gen_smulv8hi3_highpart (t2, op1, op2)); - emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2)); + emit_insn (gen_mul3 (t1, op1, op2)); + emit_insn (gen_mul3_highpart (t2, op1, op2)); + emit_insn (gen_vec_interleave_low (dest, t1, t2)); DONE; }) -(define_expand "vec_widen_umult_hi_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] - "TARGET_SSE2" +(define_expand "vec_widen_mult_hi_v8si" + [(match_operand:V4DI 0 "register_operand" "") + (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" "")) + (match_operand:V8SI 2 "nonimmediate_operand" "")] + "TARGET_AVX2" { - rtx op1, op2, t1, t2, dest; - - op1 = operands[1]; - op2 = operands[2]; - t1 = gen_reg_rtx (V8HImode); - t2 = gen_reg_rtx (V8HImode); - dest = gen_lowpart (V8HImode, operands[0]); + rtx t1, t2, t3, t4; - emit_insn (gen_mulv8hi3 (t1, op1, op2)); - emit_insn (gen_umulv8hi3_highpart (t2, op1, op2)); - emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2)); + t1 = gen_reg_rtx (V4DImode); + t2 = gen_reg_rtx (V4DImode); + t3 = gen_reg_rtx (V8SImode); + t4 = gen_reg_rtx (V8SImode); + emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]), + const0_rtx, const2_rtx, + const1_rtx, GEN_INT (3))); + emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]), + const0_rtx, const2_rtx, + const1_rtx, GEN_INT (3))); + emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), + GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6)))); + emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), + GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6)))); + emit_insn (gen_avx2_mulv4siv4di3 (operands[0], t3, t4)); DONE; }) -(define_expand "vec_widen_umult_lo_v8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] - "TARGET_SSE2" +(define_expand "vec_widen_mult_lo_v8si" + [(match_operand:V4DI 0 "register_operand" "") + (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" "")) + (match_operand:V8SI 2 "nonimmediate_operand" "")] + "TARGET_AVX2" { - rtx op1, op2, t1, t2, dest; - - op1 = operands[1]; - op2 = operands[2]; - t1 = gen_reg_rtx (V8HImode); - t2 = gen_reg_rtx (V8HImode); - dest = gen_lowpart (V8HImode, operands[0]); + rtx t1, t2, t3, t4; - emit_insn (gen_mulv8hi3 (t1, op1, op2)); - emit_insn (gen_umulv8hi3_highpart (t2, op1, op2)); - emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2)); + t1 = gen_reg_rtx (V4DImode); + t2 = gen_reg_rtx (V4DImode); + t3 = gen_reg_rtx (V8SImode); + t4 = gen_reg_rtx (V8SImode); + emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]), + const0_rtx, const2_rtx, + const1_rtx, GEN_INT (3))); + emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]), + const0_rtx, const2_rtx, + const1_rtx, GEN_INT (3))); + emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), + GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6)))); + emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), + GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6)))); + emit_insn (gen_avx2_mulv4siv4di3 (operands[0], t3, t4)); DONE; }) @@ -5591,24 +5605,28 @@ [(match_operand:V2DI 0 "register_operand" "") (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")] - "TARGET_XOP" + "TARGET_SSE4_1" { - rtx t1, t2; + rtx op1, op2, t1, t2; + op1 = operands[1]; + op2 = operands[2]; t1 = gen_reg_rtx (V4SImode); t2 = gen_reg_rtx (V4SImode); - emit_insn (gen_sse2_pshufd_1 (t1, operands[1], - GEN_INT (0), - GEN_INT (2), - GEN_INT (1), - GEN_INT (3))); - emit_insn (gen_sse2_pshufd_1 (t2, operands[2], - GEN_INT (0), - GEN_INT (2), - GEN_INT (1), - GEN_INT (3))); - emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2)); + if (TARGET_XOP) + { + emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), + GEN_INT (1), GEN_INT (3))); + emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), + GEN_INT (1), GEN_INT (3))); + emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2)); + DONE; + } + + emit_insn (gen_vec_interleave_highv4si (t1, op1, op1)); + emit_insn (gen_vec_interleave_highv4si (t2, op2, op2)); + emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2)); DONE; }) @@ -5616,24 +5634,28 @@ [(match_operand:V2DI 0 "register_operand" "") (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")] - "TARGET_XOP" + "TARGET_SSE4_1" { - rtx t1, t2; + rtx op1, op2, t1, t2; + op1 = operands[1]; + op2 = operands[2]; t1 = gen_reg_rtx (V4SImode); t2 = gen_reg_rtx (V4SImode); - emit_insn (gen_sse2_pshufd_1 (t1, operands[1], - GEN_INT (0), - GEN_INT (2), - GEN_INT (1), - GEN_INT (3))); - emit_insn (gen_sse2_pshufd_1 (t2, operands[2], - GEN_INT (0), - GEN_INT (2), - GEN_INT (1), - GEN_INT (3))); - emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2)); + if (TARGET_XOP) + { + emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), + GEN_INT (1), GEN_INT (3))); + emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), + GEN_INT (1), GEN_INT (3))); + emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2)); + DONE; + } + + emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1)); + emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2)); + emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2)); DONE; }) @@ -5675,30 +5697,35 @@ DONE; }) -(define_expand "sdot_prodv8hi" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "") - (match_operand:V4SI 3 "register_operand" "")] +(define_expand "sdot_prod" + [(match_operand: 0 "register_operand" "") + (match_operand:VI2_AVX2 1 "register_operand" "") + (match_operand:VI2_AVX2 2 "register_operand" "") + (match_operand: 3 "register_operand" "")] "TARGET_SSE2" { - rtx t = gen_reg_rtx (V4SImode); - emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2])); - emit_insn (gen_addv4si3 (operands[0], operands[3], t)); + rtx t = gen_reg_rtx (mode); + emit_insn (gen__pmaddwd (t, operands[1], operands[2])); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_PLUS (mode, + operands[3], t))); DONE; }) -(define_expand "udot_prodv4si" +(define_code_attr sse2_sse4_1 + [(zero_extend "sse2") (sign_extend "sse4_1")]) + +(define_expand "dot_prodv4si" [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "") + (any_extend:V2DI (match_operand:V4SI 1 "register_operand" "")) (match_operand:V4SI 2 "register_operand" "") (match_operand:V2DI 3 "register_operand" "")] - "TARGET_SSE2" + " == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1" { rtx t1, t2, t3, t4; t1 = gen_reg_rtx (V2DImode); - emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2])); + emit_insn (gen__mulv2siv2di3 (t1, operands[1], operands[2])); emit_insn (gen_addv2di3 (t1, t1, operands[3])); t2 = gen_reg_rtx (V4SImode); @@ -5711,12 +5738,41 @@ GEN_INT (32))); t4 = gen_reg_rtx (V2DImode); - emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3)); + emit_insn (gen__mulv2siv2di3 (t4, t2, t3)); emit_insn (gen_addv2di3 (operands[0], t1, t4)); DONE; }) +(define_expand "dot_prodv8si" + [(match_operand:V4DI 0 "register_operand" "") + (any_extend:V4DI (match_operand:V8SI 1 "register_operand" "")) + (match_operand:V8SI 2 "register_operand" "") + (match_operand:V4DI 3 "register_operand" "")] + "TARGET_AVX2" +{ + rtx t1, t2, t3, t4; + + t1 = gen_reg_rtx (V4DImode); + emit_insn (gen_avx2_mulv4siv4di3 (t1, operands[1], operands[2])); + emit_insn (gen_addv4di3 (t1, t1, operands[3])); + + t2 = gen_reg_rtx (V8SImode); + t3 = gen_reg_rtx (V8SImode); + emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2), + gen_lowpart (V2TImode, operands[1]), + GEN_INT (32))); + emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3), + gen_lowpart (V2TImode, operands[2]), + GEN_INT (32))); + + t4 = gen_reg_rtx (V4DImode); + emit_insn (gen_avx2_mulv4siv4di3 (t4, t2, t3)); + + emit_insn (gen_addv4di3 (operands[0], t1, t4)); + DONE; +}) + (define_insn "ashr3" [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") (ashiftrt:VI24_AVX2 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 26f866d6130..ca69717cae6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2011-10-14 Jakub Jelinek + + * gcc.target/i386/sse2-mul-1.c: New test. + * gcc.target/i386/sse4_1-mul-1.c: New test. + * gcc.target/i386/avx-mul-1.c: New test. + * gcc.target/i386/xop-mul-1.c: New test. + * gcc.target/i386/avx2-mul-1.c: New test. + 2011-10-14 Jason Merrill PR c++/50563 diff --git a/gcc/testsuite/gcc.target/i386/avx-mul-1.c b/gcc/testsuite/gcc.target/i386/avx-mul-1.c new file mode 100644 index 00000000000..0d511c95cb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-mul-1.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -mavx" } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include "sse2-mul-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx2-mul-1.c b/gcc/testsuite/gcc.target/i386/avx2-mul-1.c new file mode 100644 index 00000000000..0351fbb7c3d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-mul-1.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-options "-O3 -mavx2" } */ + +#ifndef CHECK_H +#define CHECK_H "avx2-check.h" +#endif + +#ifndef TEST +#define TEST avx2_test +#endif + +#include "sse2-mul-1.c" diff --git a/gcc/testsuite/gcc.target/i386/sse2-mul-1.c b/gcc/testsuite/gcc.target/i386/sse2-mul-1.c new file mode 100644 index 00000000000..5c792e8a4aa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-mul-1.c @@ -0,0 +1,209 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-options "-O3 -msse2" } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#ifndef TEST +#define TEST sse2_test +#endif + +#include CHECK_H + +#include + +#define N 512 +static short a1[N], a2[N], a3[N]; +static unsigned short b1[N], b2[N], b3[N]; +static int c1[N], c2[N], c3[N]; +static unsigned int d1[N], d2[N], d3[N]; +static long long e1[N], e2[N], e3[N]; +static unsigned long long g1[N], g2[N], g3[N]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + int i; + for (i = 0; i < N; ++i) + a1[i] = a2[i] * a3[i]; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + int i; + for (i = 0; i < N; ++i) + b1[i] = b2[i] * b3[i]; +} + +__attribute__((noinline, noclone)) void +f3 (void) +{ + int i; + for (i = 0; i < N; ++i) + c1[i] = c2[i] * c3[i]; +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + int i; + for (i = 0; i < N; ++i) + d1[i] = d2[i] * d3[i]; +} + +__attribute__((noinline, noclone)) void +f5 (void) +{ + int i; + for (i = 0; i < N; ++i) + e1[i] = e2[i] * e3[i]; +} + +__attribute__((noinline, noclone)) void +f6 (void) +{ + int i; + for (i = 0; i < N; ++i) + g1[i] = g2[i] * g3[i]; +} + +__attribute__((noinline, noclone)) void +f7 (void) +{ + int i; + for (i = 0; i < N; ++i) + c1[i] = a2[i] * a3[i]; +} + +__attribute__((noinline, noclone)) void +f8 (void) +{ + int i; + for (i = 0; i < N; ++i) + d1[i] = (unsigned int) b2[i] * b3[i]; +} + +__attribute__((noinline, noclone)) void +f9 (void) +{ + int i; + for (i = 0; i < N; ++i) + e1[i] = (long long) c2[i] * (long long) c3[i]; +} + +__attribute__((noinline, noclone)) void +f10 (void) +{ + int i; + for (i = 0; i < N; ++i) + g1[i] = (unsigned long long) d2[i] * (unsigned long long) d3[i]; +} + +__attribute__((noinline, noclone)) int +f11 (void) +{ + int i, r = 0; + for (i = 0; i < N; ++i) + r += a2[i] * a3[i]; + return r; +} + +__attribute__((noinline, noclone)) unsigned int +f12 (void) +{ + int i; + unsigned r = 0; + for (i = 0; i < N; ++i) + r += (unsigned int) b2[i] * b3[i]; + return r; +} + +__attribute__((noinline, noclone)) long long +f13 (void) +{ + int i; + long long r = 0; + for (i = 0; i < N; ++i) + r += (long long) c2[i] * (long long) c3[i]; + return r; +} + +__attribute__((noinline, noclone)) unsigned long long +f14 (void) +{ + int i; + unsigned long long r = 0; + for (i = 0; i < N; ++i) + r += (unsigned long long) d2[i] * (unsigned long long) d3[i]; + return r; +} + +static void +TEST (void) +{ + int i; + int s1 = 0; + unsigned int s2 = 0; + long long s3 = 0; + unsigned long long s4 = 0; + for (i = 0; i < N; ++i) + { + asm volatile ("" : : "r" (&s1) : "memory"); + asm volatile ("" : : "r" (&s2) : "memory"); + asm volatile ("" : : "r" (&s3) : "memory"); + asm volatile ("" : : "r" (&s4) : "memory"); + b2[i] = (int) random (); + b3[i] = (int) random (); + a2[i] = b2[i]; + a3[i] = b3[i]; + d2[i] = (((int) random ()) << 16) | b2[i]; + d3[i] = (((int) random ()) << 16) | b3[i]; + c2[i] = d2[i]; + c3[i] = d3[i]; + s1 += a2[i] * a3[i]; + s2 += (unsigned int) b2[i] * b3[i]; + s3 += (long long) c2[i] * (long long) c3[i]; + s4 += (unsigned long long) d2[i] * (unsigned long long) d3[i]; + } + f1 (); + f2 (); + f3 (); + f4 (); + f5 (); + f6 (); + for (i = 0; i < N; ++i) + { + if (a1[i] != (short) (a2[i] * a3[i])) + abort (); + if (b1[i] != (unsigned short) (b2[i] * b3[i])) + abort (); + if (c1[i] != c2[i] * c3[i]) + abort (); + if (d1[i] != d2[i] * d3[i]) + abort (); + if (e1[i] != e2[i] * e3[i]) + abort (); + if (g1[i] != g2[i] * g3[i]) + abort (); + } + f7 (); + f8 (); + f9 (); + f10 (); + for (i = 0; i < N; ++i) + { + if (c1[i] != a2[i] * a3[i]) + abort (); + if (d1[i] != b2[i] * b3[i]) + abort (); + if (e1[i] != (long long) c2[i] * (long long) c3[i]) + abort (); + if (g1[i] != (unsigned long long) d2[i] * (unsigned long long) d3[i]) + abort (); + } + if (f11 () != s1 || f12 () != s2 || f13 () != s3 || f14 () != s4) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-mul-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-mul-1.c new file mode 100644 index 00000000000..20d03a515d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-mul-1.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O3 -msse4.1" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include "sse2-mul-1.c" diff --git a/gcc/testsuite/gcc.target/i386/xop-mul-1.c b/gcc/testsuite/gcc.target/i386/xop-mul-1.c new file mode 100644 index 00000000000..47ef1bc02bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xop-mul-1.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ +/* { dg-require-effective-target xop } */ +/* { dg-options "-O3 -mxop" } */ + +#ifndef CHECK_H +#define CHECK_H "xop-check.h" +#endif + +#ifndef TEST +#define TEST xop_test +#endif + +#include "sse2-mul-1.c" -- 2.30.2