ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
}
+/* Implemented as
+ V setg (V v, int idx, T val)
+ {
+ V idxv = (V){idx, idx, idx, idx, idx, idx, idx, idx};
+ V valv = (V){val, val, val, val, val, val, val, val};
+ V mask = ((V){0, 1, 2, 3, 4, 5, 6, 7} == idxv);
+ v = (v & ~mask) | (valv & mask);
+ return v;
+ }. */
+void
+ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
+{
+ rtx vec[64];
+ machine_mode mode = GET_MODE (target);
+ machine_mode cmp_mode = mode;
+ int n_elts = GET_MODE_NUNITS (mode);
+ rtx valv,idxv,constv,idx_tmp;
+ bool ok = false;
+
+ /* 512-bits vector byte/word broadcast and comparison only available
+ under TARGET_AVX512BW, break 512-bits vector into two 256-bits vector
+ when without TARGET_AVX512BW. */
+ if ((mode == V32HImode || mode == V64QImode) && !TARGET_AVX512BW)
+ {
+ gcc_assert (TARGET_AVX512F);
+ rtx vhi, vlo, idx_hi;
+ machine_mode half_mode;
+ rtx (*extract_hi)(rtx, rtx);
+ rtx (*extract_lo)(rtx, rtx);
+
+ if (mode == V32HImode)
+ {
+ half_mode = V16HImode;
+ extract_hi = gen_vec_extract_hi_v32hi;
+ extract_lo = gen_vec_extract_lo_v32hi;
+ }
+ else
+ {
+ half_mode = V32QImode;
+ extract_hi = gen_vec_extract_hi_v64qi;
+ extract_lo = gen_vec_extract_lo_v64qi;
+ }
+
+ vhi = gen_reg_rtx (half_mode);
+ vlo = gen_reg_rtx (half_mode);
+ idx_hi = gen_reg_rtx (GET_MODE (idx));
+ emit_insn (extract_hi (vhi, target));
+ emit_insn (extract_lo (vlo, target));
+ vec[0] = idx_hi;
+ vec[1] = idx;
+ vec[2] = GEN_INT (n_elts/2);
+ ix86_expand_binary_operator (MINUS, GET_MODE (idx), vec);
+ ix86_expand_vector_set_var (vhi, val, idx_hi);
+ ix86_expand_vector_set_var (vlo, val, idx);
+ emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, vlo, vhi)));
+ return;
+ }
+
+ if (FLOAT_MODE_P (GET_MODE_INNER (mode)))
+ {
+ switch (mode)
+ {
+ case E_V2DFmode:
+ cmp_mode = V2DImode;
+ break;
+ case E_V4DFmode:
+ cmp_mode = V4DImode;
+ break;
+ case E_V8DFmode:
+ cmp_mode = V8DImode;
+ break;
+ case E_V4SFmode:
+ cmp_mode = V4SImode;
+ break;
+ case E_V8SFmode:
+ cmp_mode = V8SImode;
+ break;
+ case E_V16SFmode:
+ cmp_mode = V16SImode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ for (int i = 0; i != n_elts; i++)
+ vec[i] = GEN_INT (i);
+ constv = gen_rtx_CONST_VECTOR (cmp_mode, gen_rtvec_v (n_elts, vec));
+ valv = gen_reg_rtx (mode);
+ idxv = gen_reg_rtx (cmp_mode);
+ idx_tmp = convert_to_mode (GET_MODE_INNER (cmp_mode), idx, 1);
+
+ ok = ix86_expand_vector_init_duplicate (false, mode, valv, val);
+ gcc_assert (ok);
+ ok = ix86_expand_vector_init_duplicate (false, cmp_mode, idxv, idx_tmp);
+ gcc_assert (ok);
+ vec[0] = target;
+ vec[1] = valv;
+ vec[2] = target;
+ vec[3] = gen_rtx_EQ (mode, idxv, constv);
+ vec[4] = idxv;
+ vec[5] = constv;
+ ok = ix86_expand_int_vcond (vec);
+ gcc_assert (ok);
+}
+
void
ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
{
extern void ix86_expand_vector_init (bool, rtx, rtx);
extern void ix86_expand_vector_set (bool, rtx, rtx, int);
+extern void ix86_expand_vector_set_var (rtx, rtx, rtx);
extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
extern void ix86_expand_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
return op == const1_rtx || op == constm1_rtx;
})
+;; True for registers, or const_int_operand, used to vec_setm expander.
+(define_predicate "vec_setm_operand"
+ (ior (and (match_operand 0 "register_operand")
+ (match_test "TARGET_AVX2"))
+ (match_code "const_int")))
+
;; True for registers, or 1 or -1. Used to optimize double-word shifts.
(define_predicate "reg_or_pm1_operand"
(ior (match_operand 0 "register_operand")
(define_expand "vec_set<mode>"
[(match_operand:V 0 "register_operand")
(match_operand:<ssescalarmode> 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_operand")]
"TARGET_SSE"
{
- ix86_expand_vector_set (false, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2 -mno-avx512f" } */
+/* { dg-final { scan-assembler-times {(?n)vpcmpeq[bwdq]} 12 } } */
+/* { dg-final { scan-assembler-times {(?n)vp?blendv} 12 } } */
+
+typedef char v32qi __attribute__ ((vector_size (32)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef int v4si __attribute__ ((vector_size (16)));
+
+typedef long long v4di __attribute__ ((vector_size (32)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+
+typedef float v8sf __attribute__ ((vector_size (32)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+
+typedef double v4df __attribute__ ((vector_size (32)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+#define FOO(VTYPE, TYPE) \
+ VTYPE \
+ __attribute__ ((noipa)) \
+ foo_##VTYPE (VTYPE a, TYPE b, unsigned int c) \
+ { \
+ a[c] = b; \
+ return a; \
+ } \
+
+FOO (v16qi, char);
+FOO (v32qi, char);
+
+FOO (v8hi, short);
+FOO (v16hi, short);
+
+FOO (v4si, int);
+FOO (v8si, int);
+
+FOO (v2di, long long);
+FOO (v4di, long long);
+
+FOO (v4sf, float);
+FOO (v8sf, float);
+
+FOO (v2df, double);
+FOO (v4df, double);
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-options "-O2 -mavx2" } */
+
+
+#ifndef CHECK
+#define CHECK "avx2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx2_test
+#endif
+
+#include CHECK
+
+#include "avx2-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx) \
+do \
+ { \
+ int i,val = idx * idx - idx * 3 + 16; \
+ type res[N],exp[N]; \
+ vtype resv; \
+ for (i = 0; i < N; i++) \
+ { \
+ res[i] = i * i - i * 3 + 15; \
+ exp[i] = res[i]; \
+ } \
+ exp[idx] = val; \
+ resv = foo_##vtype (*(vtype *)&res[0], val, idx); \
+ for (i = 0; i < N; i++) \
+ { \
+ if (resv[i] != exp[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+static void
+TEST (void)
+{
+ CALC_TEST (v32qi, char, 32, 17);
+ CALC_TEST (v16qi, char, 16, 5);
+ CALC_TEST (v16hi, short, 16, 9);
+ CALC_TEST (v8hi, short, 8, 6);
+ CALC_TEST (v8si, int, 8, 3);
+ CALC_TEST (v4si, int, 4, 2);
+ CALC_TEST (v4di, long long, 4, 1);
+ CALC_TEST (v2di, long long, 2, 0);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times {(?n)(?:vp?broadcast|vmovddup)} 36 } } */
+/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]+\$0} 18 } } */
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__ ((vector_size (64)));
+typedef float v16sf __attribute__ ((vector_size (64)));
+typedef double v8df __attribute__ ((vector_size (64)));
+
+#include "avx2-vec-set-1.c"
+
+FOO (v64qi, char);
+FOO (v32hi, short);
+FOO (v16si, int);
+FOO (v8di, long long);
+FOO (v16sf, float);
+FOO (v8df, double);
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-options "-O2 -mavx512bw" } */
+
+
+#ifndef CHECK
+#define CHECK "avx512f-check.h"
+#endif
+
+#define AVX512BW
+
+#include CHECK
+
+#include "avx512bw-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx) \
+do \
+ { \
+ int i,val = idx * idx - idx * 3 + 16; \
+ type res[N],exp[N]; \
+ vtype resv; \
+ for (i = 0; i < N; i++) \
+ { \
+ res[i] = i * i - i * 3 + 15; \
+ exp[i] = res[i]; \
+ } \
+ exp[idx] = val; \
+ resv = foo_##vtype (*(vtype *)&res[0], val, idx); \
+ for (i = 0; i < N; i++) \
+ { \
+ if (resv[i] != exp[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+static void
+test_512 (void)
+{
+ CALC_TEST (v64qi, char, 64, 50);
+ CALC_TEST (v32hi, short, 32, 30);
+ CALC_TEST (v16si, int, 16, 15);
+ CALC_TEST (v8di, long long, 8, 7);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f -mno-avx512bw" } */
+
+
+#ifndef CHECK
+#define CHECK "avx512f-check.h"
+#endif
+
+#define AVX512F
+
+#include CHECK
+
+#include "avx512bw-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx) \
+do \
+ { \
+ int i,val = idx * idx - idx * 3 + 16; \
+ type res[N],exp[N]; \
+ vtype resv; \
+ for (i = 0; i < N; i++) \
+ { \
+ res[i] = i * i - i * 3 + 15; \
+ exp[i] = res[i]; \
+ } \
+ exp[idx] = val; \
+ resv = foo_##vtype (*(vtype *)&res[0], val, idx); \
+ for (i = 0; i < N; i++) \
+ { \
+ if (resv[i] != exp[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+static void
+test_512 (void)
+{
+ CALC_TEST (v64qi, char, 64, 50);
+ CALC_TEST (v32hi, short, 32, 30);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+
+#ifndef CHECK
+#define CHECK "avx512f-check.h"
+#endif
+
+#define AVX512VL
+#define AVX512BW
+
+#include CHECK
+
+#include "avx512bw-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx) \
+do \
+ { \
+ int i,val = idx * idx - idx * 3 + 16; \
+ type res[N],exp[N]; \
+ vtype resv; \
+ for (i = 0; i < N; i++) \
+ { \
+ res[i] = i * i - i * 3 + 15; \
+ exp[i] = res[i]; \
+ } \
+ exp[idx] = val; \
+ resv = foo_##vtype (*(vtype *)&res[0], val, idx); \
+ for (i = 0; i < N; i++) \
+ { \
+ if (resv[i] != exp[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+static void
+test_256 (void)
+{
+ CALC_TEST (v32qi, char, 32, 17);
+ CALC_TEST (v16hi, short, 16, 9);
+ CALC_TEST (v8si, int, 8, 3);
+ CALC_TEST (v4di, long long, 4, 1);
+}
+
+static void
+test_128 (void)
+{
+ CALC_TEST (v16qi, char, 16, 5);
+ CALC_TEST (v8hi, short, 8, 6);
+ CALC_TEST (v4si, int, 4, 2);
+ CALC_TEST (v2di, long long, 2, 0);
+}