From 287cc750b0887e86cb309d976b17c7ee95f7ad48 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 19 Oct 2020 16:04:39 +0800 Subject: [PATCH] Support variable index vec_set. gcc/ChangeLog: PR target/97194 * config/i386/i386-expand.c (ix86_expand_vector_set_var): New function. * config/i386/i386-protos.h (ix86_expand_vector_set_var): New Decl. * config/i386/predicates.md (vec_setm_operand): New predicate, true for const_int_operand or register_operand under TARGET_AVX2. * config/i386/sse.md (vec_set): Support both constant and variable index vec_set. gcc/testsuite/ChangeLog: * gcc.target/i386/avx2-vec-set-1.c: New test. * gcc.target/i386/avx2-vec-set-2.c: New test. * gcc.target/i386/avx512bw-vec-set-1.c: New test. * gcc.target/i386/avx512bw-vec-set-2.c: New test. * gcc.target/i386/avx512f-vec-set-2.c: New test. * gcc.target/i386/avx512vl-vec-set-2.c: New test. --- gcc/config/i386/i386-expand.c | 106 ++++++++++++++++++ gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/predicates.md | 6 + gcc/config/i386/sse.md | 9 +- .../gcc.target/i386/avx2-vec-set-1.c | 49 ++++++++ .../gcc.target/i386/avx2-vec-set-2.c | 50 +++++++++ .../gcc.target/i386/avx512bw-vec-set-1.c | 20 ++++ .../gcc.target/i386/avx512bw-vec-set-2.c | 44 ++++++++ .../gcc.target/i386/avx512f-vec-set-2.c | 42 +++++++ .../gcc.target/i386/avx512vl-vec-set-2.c | 55 +++++++++ 10 files changed, 379 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx2-vec-set-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-vec-set-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-vec-set-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-vec-set-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vec-set-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vec-set-2.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 044faf3423f..73e3358b290 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -14564,6 +14564,112 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) ix86_expand_vector_init_general (mmx_ok, mode, target, vals); } +/* Implemented as + V setg (V v, int idx, T val) + { + V idxv = (V){idx, idx, idx, idx, idx, idx, idx, idx}; + V valv = (V){val, val, val, val, val, val, val, val}; + V mask = ((V){0, 1, 2, 3, 4, 5, 6, 7} == idxv); + v = (v & ~mask) | (valv & mask); + return v; + }. */ +void +ix86_expand_vector_set_var (rtx target, rtx val, rtx idx) +{ + rtx vec[64]; + machine_mode mode = GET_MODE (target); + machine_mode cmp_mode = mode; + int n_elts = GET_MODE_NUNITS (mode); + rtx valv,idxv,constv,idx_tmp; + bool ok = false; + + /* 512-bits vector byte/word broadcast and comparison only available + under TARGET_AVX512BW, break 512-bits vector into two 256-bits vector + when without TARGET_AVX512BW. */ + if ((mode == V32HImode || mode == V64QImode) && !TARGET_AVX512BW) + { + gcc_assert (TARGET_AVX512F); + rtx vhi, vlo, idx_hi; + machine_mode half_mode; + rtx (*extract_hi)(rtx, rtx); + rtx (*extract_lo)(rtx, rtx); + + if (mode == V32HImode) + { + half_mode = V16HImode; + extract_hi = gen_vec_extract_hi_v32hi; + extract_lo = gen_vec_extract_lo_v32hi; + } + else + { + half_mode = V32QImode; + extract_hi = gen_vec_extract_hi_v64qi; + extract_lo = gen_vec_extract_lo_v64qi; + } + + vhi = gen_reg_rtx (half_mode); + vlo = gen_reg_rtx (half_mode); + idx_hi = gen_reg_rtx (GET_MODE (idx)); + emit_insn (extract_hi (vhi, target)); + emit_insn (extract_lo (vlo, target)); + vec[0] = idx_hi; + vec[1] = idx; + vec[2] = GEN_INT (n_elts/2); + ix86_expand_binary_operator (MINUS, GET_MODE (idx), vec); + ix86_expand_vector_set_var (vhi, val, idx_hi); + ix86_expand_vector_set_var (vlo, val, idx); + emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, vlo, vhi))); + return; + } + + if (FLOAT_MODE_P (GET_MODE_INNER (mode))) + { + switch (mode) + { + case E_V2DFmode: + cmp_mode = V2DImode; + break; + case E_V4DFmode: + cmp_mode = V4DImode; + break; + case E_V8DFmode: + cmp_mode = V8DImode; + break; + case E_V4SFmode: + cmp_mode = V4SImode; + break; + case E_V8SFmode: + cmp_mode = V8SImode; + break; + case E_V16SFmode: + cmp_mode = V16SImode; + break; + default: + gcc_unreachable (); + } + } + + for (int i = 0; i != n_elts; i++) + vec[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (cmp_mode, gen_rtvec_v (n_elts, vec)); + valv = gen_reg_rtx (mode); + idxv = gen_reg_rtx (cmp_mode); + idx_tmp = convert_to_mode (GET_MODE_INNER (cmp_mode), idx, 1); + + ok = ix86_expand_vector_init_duplicate (false, mode, valv, val); + gcc_assert (ok); + ok = ix86_expand_vector_init_duplicate (false, cmp_mode, idxv, idx_tmp); + gcc_assert (ok); + vec[0] = target; + vec[1] = valv; + vec[2] = target; + vec[3] = gen_rtx_EQ (mode, idxv, constv); + vec[4] = idxv; + vec[5] = constv; + ok = ix86_expand_int_vcond (vec); + gcc_assert (ok); +} + void ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) { diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index a3d9f9eaf14..65347a59b79 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -245,6 +245,7 @@ extern rtx ix86_rewrite_tls_address (rtx); extern void ix86_expand_vector_init (bool, rtx, rtx); extern void ix86_expand_vector_set (bool, rtx, rtx, int); +extern void ix86_expand_vector_set_var (rtx, rtx, rtx); extern void ix86_expand_vector_extract (bool, rtx, rtx, int); extern void ix86_expand_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx); diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 36f9dfcc586..be5aaa4d76f 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1023,6 +1023,12 @@ return op == const1_rtx || op == constm1_rtx; }) +;; True for registers, or const_int_operand, used to vec_setm expander. +(define_predicate "vec_setm_operand" + (ior (and (match_operand 0 "register_operand") + (match_test "TARGET_AVX2")) + (match_code "const_int"))) + ;; True for registers, or 1 or -1. Used to optimize double-word shifts. (define_predicate "reg_or_pm1_operand" (ior (match_operand 0 "register_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8437ad27087..11936809561 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -8098,11 +8098,14 @@ (define_expand "vec_set" [(match_operand:V 0 "register_operand") (match_operand: 1 "register_operand") - (match_operand 2 "const_int_operand")] + (match_operand 2 "vec_setm_operand")] "TARGET_SSE" { - ix86_expand_vector_set (false, operands[0], operands[1], - INTVAL (operands[2])); + if (CONST_INT_P (operands[2])) + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + else + ix86_expand_vector_set_var (operands[0], operands[1], operands[2]); DONE; }) diff --git a/gcc/testsuite/gcc.target/i386/avx2-vec-set-1.c b/gcc/testsuite/gcc.target/i386/avx2-vec-set-1.c new file mode 100644 index 00000000000..4c16ec5dfc4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-vec-set-1.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2 -mno-avx512f" } */ +/* { dg-final { scan-assembler-times {(?n)vpcmpeq[bwdq]} 12 } } */ +/* { dg-final { scan-assembler-times {(?n)vp?blendv} 12 } } */ + +typedef char v32qi __attribute__ ((vector_size (32))); +typedef char v16qi __attribute__ ((vector_size (16))); + +typedef short v16hi __attribute__ ((vector_size (32))); +typedef short v8hi __attribute__ ((vector_size (16))); + +typedef int v8si __attribute__ ((vector_size (32))); +typedef int v4si __attribute__ ((vector_size (16))); + +typedef long long v4di __attribute__ ((vector_size (32))); +typedef long long v2di __attribute__ ((vector_size (16))); + +typedef float v8sf __attribute__ ((vector_size (32))); +typedef float v4sf __attribute__ ((vector_size (16))); + +typedef double v4df __attribute__ ((vector_size (32))); +typedef double v2df __attribute__ ((vector_size (16))); + +#define FOO(VTYPE, TYPE) \ + VTYPE \ + __attribute__ ((noipa)) \ + foo_##VTYPE (VTYPE a, TYPE b, unsigned int c) \ + { \ + a[c] = b; \ + return a; \ + } \ + +FOO (v16qi, char); +FOO (v32qi, char); + +FOO (v8hi, short); +FOO (v16hi, short); + +FOO (v4si, int); +FOO (v8si, int); + +FOO (v2di, long long); +FOO (v4di, long long); + +FOO (v4sf, float); +FOO (v8sf, float); + +FOO (v2df, double); +FOO (v4df, double); diff --git a/gcc/testsuite/gcc.target/i386/avx2-vec-set-2.c b/gcc/testsuite/gcc.target/i386/avx2-vec-set-2.c new file mode 100644 index 00000000000..9086ef406f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-vec-set-2.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-options "-O2 -mavx2" } */ + + +#ifndef CHECK +#define CHECK "avx2-check.h" +#endif + +#ifndef TEST +#define TEST avx2_test +#endif + +#include CHECK + +#include "avx2-vec-set-1.c" + +#define CALC_TEST(vtype, type, N, idx) \ +do \ + { \ + int i,val = idx * idx - idx * 3 + 16; \ + type res[N],exp[N]; \ + vtype resv; \ + for (i = 0; i < N; i++) \ + { \ + res[i] = i * i - i * 3 + 15; \ + exp[i] = res[i]; \ + } \ + exp[idx] = val; \ + resv = foo_##vtype (*(vtype *)&res[0], val, idx); \ + for (i = 0; i < N; i++) \ + { \ + if (resv[i] != exp[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +TEST (void) +{ + CALC_TEST (v32qi, char, 32, 17); + CALC_TEST (v16qi, char, 16, 5); + CALC_TEST (v16hi, short, 16, 9); + CALC_TEST (v8hi, short, 8, 6); + CALC_TEST (v8si, int, 8, 3); + CALC_TEST (v4si, int, 4, 2); + CALC_TEST (v4di, long long, 4, 1); + CALC_TEST (v2di, long long, 2, 0); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-1.c new file mode 100644 index 00000000000..5cfbc85732e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times {(?n)(?:vp?broadcast|vmovddup)} 36 } } */ +/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]+\$0} 18 } } */ + +typedef char v64qi __attribute__ ((vector_size (64))); +typedef short v32hi __attribute__ ((vector_size (64))); +typedef int v16si __attribute__ ((vector_size (64))); +typedef long long v8di __attribute__ ((vector_size (64))); +typedef float v16sf __attribute__ ((vector_size (64))); +typedef double v8df __attribute__ ((vector_size (64))); + +#include "avx2-vec-set-1.c" + +FOO (v64qi, char); +FOO (v32hi, short); +FOO (v16si, int); +FOO (v8di, long long); +FOO (v16sf, float); +FOO (v8df, double); diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-2.c new file mode 100644 index 00000000000..22e64183ebd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vec-set-2.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-options "-O2 -mavx512bw" } */ + + +#ifndef CHECK +#define CHECK "avx512f-check.h" +#endif + +#define AVX512BW + +#include CHECK + +#include "avx512bw-vec-set-1.c" + +#define CALC_TEST(vtype, type, N, idx) \ +do \ + { \ + int i,val = idx * idx - idx * 3 + 16; \ + type res[N],exp[N]; \ + vtype resv; \ + for (i = 0; i < N; i++) \ + { \ + res[i] = i * i - i * 3 + 15; \ + exp[i] = res[i]; \ + } \ + exp[idx] = val; \ + resv = foo_##vtype (*(vtype *)&res[0], val, idx); \ + for (i = 0; i < N; i++) \ + { \ + if (resv[i] != exp[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +test_512 (void) +{ + CALC_TEST (v64qi, char, 64, 50); + CALC_TEST (v32hi, short, 32, 30); + CALC_TEST (v16si, int, 16, 15); + CALC_TEST (v8di, long long, 8, 7); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vec-set-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vec-set-2.c new file mode 100644 index 00000000000..8f2aa03ec11 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vec-set-2.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512f -mno-avx512bw" } */ + + +#ifndef CHECK +#define CHECK "avx512f-check.h" +#endif + +#define AVX512F + +#include CHECK + +#include "avx512bw-vec-set-1.c" + +#define CALC_TEST(vtype, type, N, idx) \ +do \ + { \ + int i,val = idx * idx - idx * 3 + 16; \ + type res[N],exp[N]; \ + vtype resv; \ + for (i = 0; i < N; i++) \ + { \ + res[i] = i * i - i * 3 + 15; \ + exp[i] = res[i]; \ + } \ + exp[idx] = val; \ + resv = foo_##vtype (*(vtype *)&res[0], val, idx); \ + for (i = 0; i < N; i++) \ + { \ + if (resv[i] != exp[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +test_512 (void) +{ + CALC_TEST (v64qi, char, 64, 50); + CALC_TEST (v32hi, short, 32, 30); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vec-set-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vec-set-2.c new file mode 100644 index 00000000000..4f327427a64 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vec-set-2.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl" } */ + + +#ifndef CHECK +#define CHECK "avx512f-check.h" +#endif + +#define AVX512VL +#define AVX512BW + +#include CHECK + +#include "avx512bw-vec-set-1.c" + +#define CALC_TEST(vtype, type, N, idx) \ +do \ + { \ + int i,val = idx * idx - idx * 3 + 16; \ + type res[N],exp[N]; \ + vtype resv; \ + for (i = 0; i < N; i++) \ + { \ + res[i] = i * i - i * 3 + 15; \ + exp[i] = res[i]; \ + } \ + exp[idx] = val; \ + resv = foo_##vtype (*(vtype *)&res[0], val, idx); \ + for (i = 0; i < N; i++) \ + { \ + if (resv[i] != exp[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +test_256 (void) +{ + CALC_TEST (v32qi, char, 32, 17); + CALC_TEST (v16hi, short, 16, 9); + CALC_TEST (v8si, int, 8, 3); + CALC_TEST (v4di, long long, 4, 1); +} + +static void +test_128 (void) +{ + CALC_TEST (v16qi, char, 16, 5); + CALC_TEST (v8hi, short, 8, 6); + CALC_TEST (v4si, int, 4, 2); + CALC_TEST (v2di, long long, 2, 0); +} -- 2.30.2