-/* Copyright (C) 1988-2019 Free Software Foundation, Inc.
+/* Copyright (C) 1988-2020 Free Software Foundation, Inc.
This file is part of GCC.
#include "gimplify.h"
#include "dwarf2.h"
#include "tm-constrs.h"
-#include "params.h"
#include "cselib.h"
#include "sched-int.h"
#include "opts.h"
case E_DImode:
half_mode = SImode;
break;
+ case E_P2HImode:
+ half_mode = HImode;
+ break;
+ case E_P2QImode:
+ half_mode = QImode;
+ break;
default:
gcc_unreachable ();
}
JUMP_LABEL (insn) = qimode_label;
/* Generate original signed/unsigned divimod. */
- div = gen_divmod4_1 (operands[0], operands[1],
- operands[2], operands[3]);
- emit_insn (div);
+ emit_insn (gen_divmod4_1 (operands[0], operands[1],
+ operands[2], operands[3]));
/* Branch to the end. */
emit_jump_insn (gen_jump (end_label));
}
/* Extract remainder from AH. */
- tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
- tmp0, GEN_INT (8), GEN_INT (8));
- if (REG_P (operands[1]))
- insn = emit_move_insn (operands[1], tmp1);
- else
- {
- /* Need a new scratch register since the old one has result
- of 8bit divide. */
- scratch = gen_reg_rtx (GET_MODE (operands[1]));
- emit_move_insn (scratch, tmp1);
- insn = emit_move_insn (operands[1], scratch);
- }
+ scratch = gen_lowpart (GET_MODE (operands[1]), scratch);
+ tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]), scratch,
+ GEN_INT (8), GEN_INT (8));
+ insn = emit_move_insn (operands[1], tmp1);
set_unique_reg_note (insn, REG_EQUAL, mod);
/* Zero extend quotient from AL. */
machine_mode vmode = mode;
rtvec par;
- if (vector_mode)
- use_sse = true;
- else if (mode == TFmode)
+ if (vector_mode || mode == TFmode)
use_sse = true;
else if (TARGET_SSE_MATH)
{
Create the appropriate mask now. */
mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
use = gen_rtx_USE (VOIDmode, mask);
- if (vector_mode)
+ if (vector_mode || mode == TFmode)
par = gen_rtvec (2, set, use);
else
{
{
gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
- /* We may be reversing unordered compare to normal compare, that
- is not valid in general (we may convert non-trapping condition
- to trapping one), however on i386 we currently emit all
- comparisons unordered. */
- new_code = reverse_condition_maybe_unordered (code);
+ /* We may be reversing a non-trapping
+ comparison to a trapping comparison. */
+ if (HONOR_NANS (cmp_mode) && flag_trapping_math
+ && code != EQ && code != NE
+ && code != ORDERED && code != UNORDERED)
+ new_code = UNKNOWN;
+ else
+ new_code = reverse_condition_maybe_unordered (code);
}
else
new_code = ix86_reverse_condition (code, cmp_mode);
}
if (cf != 0)
{
- tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
+ tmp = plus_constant (mode, tmp, cf);
nops++;
}
if (!rtx_equal_p (tmp, out))
{
gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
- /* We may be reversing unordered compare to normal compare,
- that is not valid in general (we may convert non-trapping
- condition to trapping one), however on i386 we currently
- emit all comparisons unordered. */
- new_code = reverse_condition_maybe_unordered (code);
+ /* We may be reversing a non-trapping
+ comparison to a trapping comparison. */
+ if (HONOR_NANS (cmp_mode) && flag_trapping_math
+ && code != EQ && code != NE
+ && code != ORDERED && code != UNORDERED)
+ new_code = UNKNOWN;
+ else
+ new_code = reverse_condition_maybe_unordered (code);
+
}
else
{
{
var = operands[2];
if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
- operands[2] = constm1_rtx, op = and_optab;
+ {
+ /* For smin (x, 0), expand as "x < 0 ? x : 0" instead of
+ "x <= 0 ? x : 0" to enable sign_bit_compare_p. */
+ if (code == LE && op1 == const0_rtx && rtx_equal_p (op0, var))
+ operands[1] = simplify_gen_relational (LT, VOIDmode,
+ GET_MODE (op0),
+ op0, const0_rtx);
+
+ operands[2] = constm1_rtx;
+ op = and_optab;
+ }
else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
operands[2] = const0_rtx, op = ior_optab;
else
return true;
}
+/* Return true if MODE is valid for vector compare to mask register,
+ Same result for conditionl vector move with mask register. */
+static bool
+ix86_valid_mask_cmp_mode (machine_mode mode)
+{
+ /* XOP has its own vector conditional movement. */
+ if (TARGET_XOP && !TARGET_AVX512F)
+ return false;
+
+ /* AVX512F is needed for mask operation. */
+ if (!(TARGET_AVX512F && VECTOR_MODE_P (mode)))
+ return false;
+
+ /* AVX512BW is needed for vector QI/HImode,
+ AVX512VL is needed for 128/256-bit vector. */
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ int vector_size = GET_MODE_SIZE (mode);
+ if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW)
+ return false;
+
+ return vector_size == 64 || TARGET_AVX512VL;
+}
+
/* Expand an SSE comparison. Return the register with the result. */
static rtx
bool maskcmp = false;
rtx x;
- if (GET_MODE_SIZE (cmp_ops_mode) == 64)
+ if (ix86_valid_mask_cmp_mode (cmp_ops_mode))
{
unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
- cmp_mode = int_mode_for_size (nbits, 0).require ();
maskcmp = true;
+ cmp_mode = nbits > 8 ? int_mode_for_size (nbits, 0).require () : E_QImode;
}
else
cmp_mode = cmp_ops_mode;
|| (op_false && reg_overlap_mentioned_p (dest, op_false)))
dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
- /* Compare patterns for int modes are unspec in AVX512F only. */
- if (maskcmp && (code == GT || code == EQ))
+ if (maskcmp)
{
- rtx (*gen)(rtx, rtx, rtx);
-
- switch (cmp_ops_mode)
- {
- case E_V64QImode:
- gcc_assert (TARGET_AVX512BW);
- gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
- break;
- case E_V32HImode:
- gcc_assert (TARGET_AVX512BW);
- gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
- break;
- case E_V16SImode:
- gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
- break;
- case E_V8DImode:
- gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
- break;
- default:
- gen = NULL;
- }
-
- if (gen)
- {
- emit_insn (gen (dest, cmp_op0, cmp_op1));
- return dest;
- }
+ bool ok = ix86_expand_mask_vec_cmp (dest, code, cmp_op0, cmp_op1);
+ gcc_assert (ok);
+ return dest;
}
+
x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
if (cmp_mode != mode && !maskcmp)
machine_mode cmpmode = GET_MODE (cmp);
/* In AVX512F the result of comparison is an integer mask. */
- bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
+ bool maskcmp = mode != cmpmode && ix86_valid_mask_cmp_mode (mode);
rtx t2, t3, x;
if (maskcmp)
{
- rtx (*gen) (rtx, rtx) = NULL;
- if ((op_true == CONST0_RTX (mode)
- && vector_all_ones_operand (op_false, mode))
- || (op_false == CONST0_RTX (mode)
- && vector_all_ones_operand (op_true, mode)))
- switch (mode)
- {
- case E_V64QImode:
- if (TARGET_AVX512BW)
- gen = gen_avx512bw_cvtmask2bv64qi;
- break;
- case E_V32QImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_cvtmask2bv32qi;
- break;
- case E_V16QImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_cvtmask2bv16qi;
- break;
- case E_V32HImode:
- if (TARGET_AVX512BW)
- gen = gen_avx512bw_cvtmask2wv32hi;
- break;
- case E_V16HImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_cvtmask2wv16hi;
- break;
- case E_V8HImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_cvtmask2wv8hi;
- break;
- case E_V16SImode:
- if (TARGET_AVX512DQ)
- gen = gen_avx512f_cvtmask2dv16si;
- break;
- case E_V8SImode:
- if (TARGET_AVX512VL && TARGET_AVX512DQ)
- gen = gen_avx512vl_cvtmask2dv8si;
- break;
- case E_V4SImode:
- if (TARGET_AVX512VL && TARGET_AVX512DQ)
- gen = gen_avx512vl_cvtmask2dv4si;
- break;
- case E_V8DImode:
- if (TARGET_AVX512DQ)
- gen = gen_avx512f_cvtmask2qv8di;
- break;
- case E_V4DImode:
- if (TARGET_AVX512VL && TARGET_AVX512DQ)
- gen = gen_avx512vl_cvtmask2qv4di;
- break;
- case E_V2DImode:
- if (TARGET_AVX512VL && TARGET_AVX512DQ)
- gen = gen_avx512vl_cvtmask2qv2di;
- break;
- default:
- break;
- }
- if (gen && SCALAR_INT_MODE_P (cmpmode))
- {
- cmp = force_reg (cmpmode, cmp);
- if (op_true == CONST0_RTX (mode))
+ /* Using vector move with mask register. */
+ cmp = force_reg (cmpmode, cmp);
+ /* Optimize for mask zero. */
+ op_true = (op_true != CONST0_RTX (mode)
+ ? force_reg (mode, op_true) : op_true);
+ op_false = (op_false != CONST0_RTX (mode)
+ ? force_reg (mode, op_false) : op_false);
+ if (op_true == CONST0_RTX (mode))
+ {
+ rtx (*gen_not) (rtx, rtx);
+ switch (cmpmode)
{
- rtx (*gen_not) (rtx, rtx);
- switch (cmpmode)
- {
- case E_QImode: gen_not = gen_knotqi; break;
- case E_HImode: gen_not = gen_knothi; break;
- case E_SImode: gen_not = gen_knotsi; break;
- case E_DImode: gen_not = gen_knotdi; break;
- default: gcc_unreachable ();
- }
- rtx n = gen_reg_rtx (cmpmode);
- emit_insn (gen_not (n, cmp));
- cmp = n;
+ case E_QImode: gen_not = gen_knotqi; break;
+ case E_HImode: gen_not = gen_knothi; break;
+ case E_SImode: gen_not = gen_knotsi; break;
+ case E_DImode: gen_not = gen_knotdi; break;
+ default: gcc_unreachable ();
}
- emit_insn (gen (dest, cmp));
- return;
+ rtx n = gen_reg_rtx (cmpmode);
+ emit_insn (gen_not (n, cmp));
+ cmp = n;
+ /* Reverse op_true op_false. */
+ std::swap (op_true, op_false);
}
+
+ rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
+ emit_insn (gen_rtx_SET (dest, vec_merge));
+ return;
}
else if (vector_all_ones_operand (op_true, mode)
&& op_false == CONST0_RTX (mode))
/* Expand AVX-512 vector comparison. */
bool
-ix86_expand_mask_vec_cmp (rtx operands[])
+ix86_expand_mask_vec_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1)
{
- machine_mode mask_mode = GET_MODE (operands[0]);
- machine_mode cmp_mode = GET_MODE (operands[2]);
- enum rtx_code code = GET_CODE (operands[1]);
+ machine_mode mask_mode = GET_MODE (dest);
+ machine_mode cmp_mode = GET_MODE (cmp_op0);
rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
int unspec_code;
rtx unspec;
unspec_code = UNSPEC_PCMP;
}
- unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
- operands[3], imm),
+ unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, cmp_op0, cmp_op1, imm),
unspec_code);
- emit_insn (gen_rtx_SET (operands[0], unspec));
+ emit_insn (gen_rtx_SET (dest, unspec));
return true;
}
&& (mode == V16QImode || mode == V8HImode
|| mode == V4SImode || mode == V2DImode))
;
+ /* AVX512F supports all of the comparsions
+ on all 128/256/512-bit vector int types. */
+ else if (ix86_valid_mask_cmp_mode (mode))
+ ;
else
{
/* Canonicalize the comparison to EQ, GT, GTU. */
emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
HOST_WIDE_INT size_to_move)
{
- rtx dst = destmem, src = *srcmem, adjust, tempreg;
+ rtx dst = destmem, src = *srcmem, tempreg;
enum insn_code code;
machine_mode move_mode;
int piece_size, i;
/* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
gcc_assert (size_to_move % piece_size == 0);
- adjust = GEN_INT (piece_size);
+
for (i = 0; i < size_to_move; i += piece_size)
{
/* We move from memory to memory, so we'll need to do it via
emit_insn (GEN_FCN (code) (dst, tempreg));
emit_move_insn (destptr,
- gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
+ plus_constant (Pmode, copy_rtx (destptr), piece_size));
emit_move_insn (srcptr,
- gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
+ plus_constant (Pmode, copy_rtx (srcptr), piece_size));
dst = adjust_automodify_address_nv (dst, move_mode, destptr,
piece_size);
emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
HOST_WIDE_INT size_to_move)
{
- rtx dst = destmem, adjust;
+ rtx dst = destmem;
enum insn_code code;
machine_mode move_mode;
int piece_size, i;
/* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
gcc_assert (size_to_move % piece_size == 0);
- adjust = GEN_INT (piece_size);
+
for (i = 0; i < size_to_move; i += piece_size)
{
if (piece_size <= GET_MODE_SIZE (word_mode))
emit_insn (GEN_FCN (code) (dst, promoted_val));
emit_move_insn (destptr,
- gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
+ plus_constant (Pmode, copy_rtx (destptr), piece_size));
dst = adjust_automodify_address_nv (dst, move_mode, destptr,
piece_size);
rtx reg = convert_modes (mode, QImode, val, true);
if (!TARGET_PARTIAL_REG_STALL)
- if (mode == SImode)
- emit_insn (gen_insvsi_1 (reg, reg));
- else
- emit_insn (gen_insvdi_1 (reg, reg));
+ emit_insn (gen_insv_1 (mode, reg, reg));
else
{
tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
&& optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
move_mode = wider_mode;
- if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
+ if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
move_mode = TImode;
/* Find the corresponding vector mode with the same size as MOVE_MODE.
reg,
tmpreg)));
/* Emit lea manually to avoid clobbering of flags. */
- emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
+ emit_insn (gen_rtx_SET (reg2, plus_constant (Pmode, out, 2)));
tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
case VOID_FTYPE_PV8SI_V8DI_UQI:
case VOID_FTYPE_PV8HI_V8DI_UQI:
case VOID_FTYPE_PV16HI_V16SI_UHI:
- case VOID_FTYPE_PV16QI_V8DI_UQI:
+ case VOID_FTYPE_PUDI_V8DI_UQI:
case VOID_FTYPE_PV16QI_V16SI_UHI:
case VOID_FTYPE_PV4SI_V4DI_UQI:
- case VOID_FTYPE_PV4SI_V2DI_UQI:
- case VOID_FTYPE_PV8HI_V4DI_UQI:
- case VOID_FTYPE_PV8HI_V2DI_UQI:
+ case VOID_FTYPE_PUDI_V2DI_UQI:
+ case VOID_FTYPE_PUDI_V4DI_UQI:
+ case VOID_FTYPE_PUSI_V2DI_UQI:
case VOID_FTYPE_PV8HI_V8SI_UQI:
- case VOID_FTYPE_PV8HI_V4SI_UQI:
- case VOID_FTYPE_PV16QI_V4DI_UQI:
- case VOID_FTYPE_PV16QI_V2DI_UQI:
- case VOID_FTYPE_PV16QI_V8SI_UQI:
- case VOID_FTYPE_PV16QI_V4SI_UQI:
+ case VOID_FTYPE_PUDI_V4SI_UQI:
+ case VOID_FTYPE_PUSI_V4DI_UQI:
+ case VOID_FTYPE_PUHI_V2DI_UQI:
+ case VOID_FTYPE_PUDI_V8SI_UQI:
+ case VOID_FTYPE_PUSI_V4SI_UQI:
case VOID_FTYPE_PCHAR_V64QI_UDI:
case VOID_FTYPE_PCHAR_V32QI_USI:
case VOID_FTYPE_PCHAR_V16QI_UHI:
case VOID_FTYPE_PFLOAT_V4SF_UQI:
case VOID_FTYPE_PV32QI_V32HI_USI:
case VOID_FTYPE_PV16QI_V16HI_UHI:
- case VOID_FTYPE_PV8QI_V8HI_UQI:
+ case VOID_FTYPE_PUDI_V8HI_UQI:
nargs = 2;
klass = store;
/* Reserve memory operand for target. */
klass = load;
memory = 0;
break;
- case VOID_FTYPE_UINT_UINT_UINT:
- case VOID_FTYPE_UINT64_UINT_UINT:
- case UCHAR_FTYPE_UINT_UINT_UINT:
- case UCHAR_FTYPE_UINT64_UINT_UINT:
- nargs = 3;
- klass = load;
- memory = ARRAY_SIZE (args);
- last_arg_constant = true;
- break;
default:
gcc_unreachable ();
}
{
if (!match)
{
- if (icode == CODE_FOR_lwp_lwpvalsi3
- || icode == CODE_FOR_lwp_lwpinssi3
- || icode == CODE_FOR_lwp_lwpvaldi3
- || icode == CODE_FOR_lwp_lwpinsdi3)
- error ("the last argument must be a 32-bit immediate");
- else
- error ("the last argument must be an 8-bit immediate");
+ error ("the last argument must be an 8-bit immediate");
return const0_rtx;
}
}
OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32
OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4
- where for each this pair it is sufficient if either of the ISAs is
- enabled, plus if it is ored with other options also those others. */
+ where for each such pair it is sufficient if either of the ISAs is
+ enabled, plus if it is ored with other options also those others.
+ OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */
if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
== (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
&& (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0)
== (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
&& (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
- /* Use SSE/SSE2/SSSE3 to emulate MMX intrinsics in 64-bit mode when
- MMX is disabled. NB: Since MMX intrinsics are marked with
- SSE/SSE2/SSSE3, enable them without SSE/SSE2/SSSE3 if MMX is
- enabled. */
- if (TARGET_MMX || TARGET_MMX_WITH_SSE)
- {
- if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
- == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX))
- && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX)) != 0)
- isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX);
- if (((bisa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
- == (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX))
- && (isa & (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX)) != 0)
- isa |= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX);
- if (((bisa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
- == (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX))
- && (isa & (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX)) != 0)
- isa |= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX);
+ if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE)
+ {
+ bisa &= ~OPTION_MASK_ISA_MMX;
+ bisa |= OPTION_MASK_ISA_SSE2;
}
if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
{
else
bisa |= OPTION_MASK_ABI_64;
char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
- (enum fpmath_unit) 0, false, add_abi_p);
+ (enum fpmath_unit) 0,
+ (enum prefer_vector_width) 0,
+ false, add_abi_p);
if (!opts)
error ("%qE needs unknown isa option", fndecl);
else
}
else
{
- rtx pat;
+ if (target == 0
+ || !register_operand (target, SImode))
+ target = gen_reg_rtx (SImode);
- target = gen_reg_rtx (SImode);
emit_move_insn (target, const0_rtx);
target = gen_rtx_SUBREG (QImode, target, 0);
- if (fcode == IX86_BUILTIN_ENQCMD)
- pat = gen_enqcmd (UNSPECV_ENQCMD, Pmode, op0, op1);
- else
- pat = gen_enqcmd (UNSPECV_ENQCMDS, Pmode, op0, op1);
-
- emit_insn (pat);
-
- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- SET_DEST (pat),
- const0_rtx)));
+ int unspecv = (fcode == IX86_BUILTIN_ENQCMD
+ ? UNSPECV_ENQCMD
+ : UNSPECV_ENQCMDS);
+ icode = code_for_enqcmd (unspecv, Pmode);
+ emit_insn (GEN_FCN (icode) (op0, op1));
+ emit_insn
+ (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG (CCZmode, FLAGS_REG),
+ const0_rtx)));
return SUBREG_REG (target);
}
case IX86_BUILTIN_LLWPCB:
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
- icode = CODE_FOR_lwp_llwpcb;
- if (!insn_data[icode].operand[0].predicate (op0, Pmode))
+
+ if (!register_operand (op0, Pmode))
op0 = ix86_zero_extend_to_Pmode (op0);
- emit_insn (gen_lwp_llwpcb (op0));
+ emit_insn (gen_lwp_llwpcb (Pmode, op0));
return 0;
case IX86_BUILTIN_SLWPCB:
- icode = CODE_FOR_lwp_slwpcb;
if (!target
- || !insn_data[icode].operand[0].predicate (target, Pmode))
+ || !register_operand (target, Pmode))
target = gen_reg_rtx (Pmode);
- emit_insn (gen_lwp_slwpcb (target));
+ emit_insn (gen_lwp_slwpcb (Pmode, target));
return target;
+ case IX86_BUILTIN_LWPVAL32:
+ case IX86_BUILTIN_LWPVAL64:
+ case IX86_BUILTIN_LWPINS32:
+ case IX86_BUILTIN_LWPINS64:
+ mode = ((fcode == IX86_BUILTIN_LWPVAL32
+ || fcode == IX86_BUILTIN_LWPINS32)
+ ? SImode : DImode);
+
+ if (fcode == IX86_BUILTIN_LWPVAL32
+ || fcode == IX86_BUILTIN_LWPVAL64)
+ icode = code_for_lwp_lwpval (mode);
+ else
+ icode = code_for_lwp_lwpins (mode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ mode0 = insn_data[icode].operand[0].mode;
+
+ if (!insn_data[icode].operand[0].predicate (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (!insn_data[icode].operand[1].predicate (op1, SImode))
+ op1 = copy_to_mode_reg (SImode, op1);
+
+ if (!CONST_INT_P (op2))
+ {
+ error ("the last argument must be a 32-bit immediate");
+ return const0_rtx;
+ }
+
+ emit_insn (GEN_FCN (icode) (op0, op1, op2));
+
+ if (fcode == IX86_BUILTIN_LWPINS32
+ || fcode == IX86_BUILTIN_LWPINS64)
+ {
+ if (target == 0
+ || !nonimmediate_operand (target, QImode))
+ target = gen_reg_rtx (QImode);
+
+ pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
+ const0_rtx);
+ emit_insn (gen_rtx_SET (target, pat));
+
+ return target;
+ }
+ else
+ return 0;
+
case IX86_BUILTIN_BEXTRI32:
case IX86_BUILTIN_BEXTRI64:
+ mode = (fcode == IX86_BUILTIN_BEXTRI32 ? SImode : DImode);
+
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
- icode = (fcode == IX86_BUILTIN_BEXTRI32
- ? CODE_FOR_tbm_bextri_si
- : CODE_FOR_tbm_bextri_di);
+
if (!CONST_INT_P (op1))
- {
- error ("last argument must be an immediate");
- return const0_rtx;
- }
+ {
+ error ("last argument must be an immediate");
+ return const0_rtx;
+ }
else
- {
- unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
- unsigned char lsb_index = INTVAL (op1) & 0xFF;
- op1 = GEN_INT (length);
- op2 = GEN_INT (lsb_index);
+ {
+ unsigned char lsb_index = UINTVAL (op1);
+ unsigned char length = UINTVAL (op1) >> 8;
+
+ unsigned char bitsize = GET_MODE_BITSIZE (mode);
+
+ icode = code_for_tbm_bextri (mode);
mode1 = insn_data[icode].operand[1].mode;
if (!insn_data[icode].operand[1].predicate (op0, mode1))
|| !register_operand (target, mode0))
target = gen_reg_rtx (mode0);
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (pat)
- emit_insn (pat);
- return target;
- }
+ if (length == 0 || lsb_index >= bitsize)
+ {
+ emit_move_insn (target, const0_rtx);
+ return target;
+ }
+
+ if (length + lsb_index > bitsize)
+ length = bitsize - lsb_index;
+
+ op1 = GEN_INT (length);
+ op2 = GEN_INT (lsb_index);
+
+ emit_insn (GEN_FCN (icode) (target, op0, op1, op2));
+ return target;
+ }
case IX86_BUILTIN_RDRAND16_STEP:
- icode = CODE_FOR_rdrandhi_1;
- mode0 = HImode;
+ mode = HImode;
goto rdrand_step;
case IX86_BUILTIN_RDRAND32_STEP:
- icode = CODE_FOR_rdrandsi_1;
- mode0 = SImode;
+ mode = SImode;
goto rdrand_step;
case IX86_BUILTIN_RDRAND64_STEP:
- icode = CODE_FOR_rdranddi_1;
- mode0 = DImode;
+ mode = DImode;
rdrand_step:
arg0 = CALL_EXPR_ARG (exp, 0);
op1 = copy_addr_to_reg (op1);
}
- op0 = gen_reg_rtx (mode0);
- emit_insn (GEN_FCN (icode) (op0));
+ op0 = gen_reg_rtx (mode);
+ emit_insn (gen_rdrand (mode, op0));
- emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+ emit_move_insn (gen_rtx_MEM (mode, op1), op0);
- op1 = gen_reg_rtx (SImode);
- emit_move_insn (op1, CONST1_RTX (SImode));
+ op1 = force_reg (SImode, const1_rtx);
/* Emit SImode conditional move. */
- if (mode0 == HImode)
+ if (mode == HImode)
{
if (TARGET_ZERO_EXTEND_WITH_AND
&& optimize_function_for_speed_p (cfun))
emit_insn (gen_zero_extendhisi2 (op2, op0));
}
}
- else if (mode0 == SImode)
+ else if (mode == SImode)
op2 = op0;
else
op2 = gen_rtx_SUBREG (SImode, op0, 0);
return target;
case IX86_BUILTIN_RDSEED16_STEP:
- icode = CODE_FOR_rdseedhi_1;
- mode0 = HImode;
+ mode = HImode;
goto rdseed_step;
case IX86_BUILTIN_RDSEED32_STEP:
- icode = CODE_FOR_rdseedsi_1;
- mode0 = SImode;
+ mode = SImode;
goto rdseed_step;
case IX86_BUILTIN_RDSEED64_STEP:
- icode = CODE_FOR_rdseeddi_1;
- mode0 = DImode;
+ mode = DImode;
rdseed_step:
arg0 = CALL_EXPR_ARG (exp, 0);
op1 = copy_addr_to_reg (op1);
}
- op0 = gen_reg_rtx (mode0);
- emit_insn (GEN_FCN (icode) (op0));
+ op0 = gen_reg_rtx (mode);
+ emit_insn (gen_rdseed (mode, op0));
- emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+ emit_move_insn (gen_rtx_MEM (mode, op1), op0);
op2 = gen_reg_rtx (QImode);
emit_insn (gen_xabort (op0));
return 0;
+ case IX86_BUILTIN_RDSSPD:
+ case IX86_BUILTIN_RDSSPQ:
+ mode = (fcode == IX86_BUILTIN_RDSSPD ? SImode : DImode);
+
+ if (target == 0
+ || !register_operand (target, mode))
+ target = gen_reg_rtx (mode);
+
+ op0 = force_reg (mode, const0_rtx);
+
+ emit_insn (gen_rdssp (mode, target, op0));
+ return target;
+
+ case IX86_BUILTIN_INCSSPD:
+ case IX86_BUILTIN_INCSSPQ:
+ mode = (fcode == IX86_BUILTIN_INCSSPD ? SImode : DImode);
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+
+ op0 = force_reg (mode, op0);
+
+ emit_insn (gen_incssp (mode, op0));
+ return 0;
+
case IX86_BUILTIN_RSTORSSP:
case IX86_BUILTIN_CLRSSBSY:
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
icode = (fcode == IX86_BUILTIN_RSTORSSP
- ? CODE_FOR_rstorssp
- : CODE_FOR_clrssbsy);
+ ? CODE_FOR_rstorssp
+ : CODE_FOR_clrssbsy);
+
if (!address_operand (op0, VOIDmode))
{
- op1 = convert_memory_address (Pmode, op0);
- op0 = copy_addr_to_reg (op1);
+ op0 = convert_memory_address (Pmode, op0);
+ op0 = copy_addr_to_reg (op0);
}
- emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0)));
+ emit_insn (GEN_FCN (icode) (gen_rtx_MEM (DImode, op0)));
return 0;
case IX86_BUILTIN_WRSSD:
case IX86_BUILTIN_WRSSQ:
case IX86_BUILTIN_WRUSSD:
case IX86_BUILTIN_WRUSSQ:
+ mode = ((fcode == IX86_BUILTIN_WRSSD
+ || fcode == IX86_BUILTIN_WRUSSD)
+ ? SImode : DImode);
+
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
arg1 = CALL_EXPR_ARG (exp, 1);
op1 = expand_normal (arg1);
- switch (fcode)
- {
- case IX86_BUILTIN_WRSSD:
- icode = CODE_FOR_wrsssi;
- mode = SImode;
- break;
- case IX86_BUILTIN_WRSSQ:
- icode = CODE_FOR_wrssdi;
- mode = DImode;
- break;
- case IX86_BUILTIN_WRUSSD:
- icode = CODE_FOR_wrusssi;
- mode = SImode;
- break;
- case IX86_BUILTIN_WRUSSQ:
- icode = CODE_FOR_wrussdi;
- mode = DImode;
- break;
- }
+
op0 = force_reg (mode, op0);
+
if (!address_operand (op1, VOIDmode))
{
- op2 = convert_memory_address (Pmode, op1);
- op1 = copy_addr_to_reg (op2);
+ op1 = convert_memory_address (Pmode, op1);
+ op1 = copy_addr_to_reg (op1);
}
- emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1)));
+ op1 = gen_rtx_MEM (mode, op1);
+
+ icode = ((fcode == IX86_BUILTIN_WRSSD
+ || fcode == IX86_BUILTIN_WRSSQ)
+ ? code_for_wrss (mode)
+ : code_for_wruss (mode));
+ emit_insn (GEN_FCN (icode) (op0, op1));
+
return 0;
default:
target);
}
- if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
- && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST;
- return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp,
- target);
- }
-
gcc_unreachable ();
}
ix86_expand_vector_init_concat (machine_mode mode,
rtx target, rtx *ops, int n)
{
- machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
- rtx first[16], second[8], third[4];
+ machine_mode half_mode = VOIDmode;
+ rtx half[2];
rtvec v;
int i, j;
switch (mode)
{
case E_V16SImode:
- cmode = V8SImode;
+ half_mode = V8SImode;
break;
case E_V16SFmode:
- cmode = V8SFmode;
+ half_mode = V8SFmode;
break;
case E_V8DImode:
- cmode = V4DImode;
+ half_mode = V4DImode;
break;
case E_V8DFmode:
- cmode = V4DFmode;
+ half_mode = V4DFmode;
break;
case E_V8SImode:
- cmode = V4SImode;
+ half_mode = V4SImode;
break;
case E_V8SFmode:
- cmode = V4SFmode;
+ half_mode = V4SFmode;
break;
case E_V4DImode:
- cmode = V2DImode;
+ half_mode = V2DImode;
break;
case E_V4DFmode:
- cmode = V2DFmode;
+ half_mode = V2DFmode;
break;
case E_V4SImode:
- cmode = V2SImode;
+ half_mode = V2SImode;
break;
case E_V4SFmode:
- cmode = V2SFmode;
+ half_mode = V2SFmode;
break;
case E_V2DImode:
- cmode = DImode;
+ half_mode = DImode;
break;
case E_V2SImode:
- cmode = SImode;
+ half_mode = SImode;
break;
case E_V2DFmode:
- cmode = DFmode;
+ half_mode = DFmode;
break;
case E_V2SFmode:
- cmode = SFmode;
+ half_mode = SFmode;
break;
default:
gcc_unreachable ();
}
- if (!register_operand (ops[1], cmode))
- ops[1] = force_reg (cmode, ops[1]);
- if (!register_operand (ops[0], cmode))
- ops[0] = force_reg (cmode, ops[0]);
+ if (!register_operand (ops[1], half_mode))
+ ops[1] = force_reg (half_mode, ops[1]);
+ if (!register_operand (ops[0], half_mode))
+ ops[0] = force_reg (half_mode, ops[0]);
emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
ops[1])));
break;
switch (mode)
{
case E_V4DImode:
- cmode = V2DImode;
+ half_mode = V2DImode;
break;
case E_V4DFmode:
- cmode = V2DFmode;
+ half_mode = V2DFmode;
break;
case E_V4SImode:
- cmode = V2SImode;
+ half_mode = V2SImode;
break;
case E_V4SFmode:
- cmode = V2SFmode;
+ half_mode = V2SFmode;
break;
default:
gcc_unreachable ();
switch (mode)
{
case E_V8DImode:
- cmode = V2DImode;
- hmode = V4DImode;
+ half_mode = V4DImode;
break;
case E_V8DFmode:
- cmode = V2DFmode;
- hmode = V4DFmode;
+ half_mode = V4DFmode;
break;
case E_V8SImode:
- cmode = V2SImode;
- hmode = V4SImode;
+ half_mode = V4SImode;
break;
case E_V8SFmode:
- cmode = V2SFmode;
- hmode = V4SFmode;
+ half_mode = V4SFmode;
break;
default:
gcc_unreachable ();
switch (mode)
{
case E_V16SImode:
- cmode = V2SImode;
- hmode = V4SImode;
- gmode = V8SImode;
+ half_mode = V8SImode;
break;
case E_V16SFmode:
- cmode = V2SFmode;
- hmode = V4SFmode;
- gmode = V8SFmode;
+ half_mode = V8SFmode;
break;
default:
gcc_unreachable ();
half:
/* FIXME: We process inputs backward to help RA. PR 36222. */
i = n - 1;
- j = (n >> 1) - 1;
- for (; i > 0; i -= 2, j--)
+ for (j = 1; j != -1; j--)
{
- first[j] = gen_reg_rtx (cmode);
- v = gen_rtvec (2, ops[i - 1], ops[i]);
- ix86_expand_vector_init (false, first[j],
- gen_rtx_PARALLEL (cmode, v));
- }
-
- n >>= 1;
- if (n > 4)
- {
- gcc_assert (hmode != VOIDmode);
- gcc_assert (gmode != VOIDmode);
- for (i = j = 0; i < n; i += 2, j++)
- {
- second[j] = gen_reg_rtx (hmode);
- ix86_expand_vector_init_concat (hmode, second [j],
- &first [i], 2);
- }
- n >>= 1;
- for (i = j = 0; i < n; i += 2, j++)
+ half[j] = gen_reg_rtx (half_mode);
+ switch (n >> 1)
{
- third[j] = gen_reg_rtx (gmode);
- ix86_expand_vector_init_concat (gmode, third[j],
- &second[i], 2);
- }
- n >>= 1;
- ix86_expand_vector_init_concat (mode, target, third, n);
- }
- else if (n > 2)
- {
- gcc_assert (hmode != VOIDmode);
- for (i = j = 0; i < n; i += 2, j++)
- {
- second[j] = gen_reg_rtx (hmode);
- ix86_expand_vector_init_concat (hmode, second [j],
- &first [i], 2);
+ case 2:
+ v = gen_rtvec (2, ops[i-1], ops[i]);
+ i -= 2;
+ break;
+ case 4:
+ v = gen_rtvec (4, ops[i-3], ops[i-2], ops[i-1], ops[i]);
+ i -= 4;
+ break;
+ case 8:
+ v = gen_rtvec (8, ops[i-7], ops[i-6], ops[i-5], ops[i-4],
+ ops[i-3], ops[i-2], ops[i-1], ops[i]);
+ i -= 8;
+ break;
+ default:
+ gcc_unreachable ();
}
- n >>= 1;
- ix86_expand_vector_init_concat (mode, target, second, n);
+ ix86_expand_vector_init (false, half[j],
+ gen_rtx_PARALLEL (half_mode, v));
}
- else
- ix86_expand_vector_init_concat (mode, target, first, n);
+
+ ix86_expand_vector_init_concat (mode, target, half, 2);
break;
default:
break;
case E_V64QImode:
case E_V32HImode:
+ if (i < 64)
+ {
+ d = gen_reg_rtx (V4TImode);
+ tem = gen_avx512bw_lshrv4ti3 (d, gen_lowpart (V4TImode, src),
+ GEN_INT (i / 2));
+ break;
+ }
+ /* FALLTHRU */
case E_V16SImode:
case E_V16SFmode:
case E_V8DImode:
case E_V8DFmode:
if (i > 128)
tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
- gen_lowpart (V16SImode, src),
- gen_lowpart (V16SImode, src),
- GEN_INT (0x4 + (i == 512 ? 4 : 0)),
- GEN_INT (0x5 + (i == 512 ? 4 : 0)),
- GEN_INT (0x6 + (i == 512 ? 4 : 0)),
- GEN_INT (0x7 + (i == 512 ? 4 : 0)),
- GEN_INT (0xC), GEN_INT (0xD),
- GEN_INT (0xE), GEN_INT (0xF),
- GEN_INT (0x10), GEN_INT (0x11),
- GEN_INT (0x12), GEN_INT (0x13),
- GEN_INT (0x14), GEN_INT (0x15),
- GEN_INT (0x16), GEN_INT (0x17));
+ gen_lowpart (V16SImode, src),
+ gen_lowpart (V16SImode, src),
+ GEN_INT (0x4 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x5 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x6 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x7 + (i == 512 ? 4 : 0)),
+ GEN_INT (0xC), GEN_INT (0xD),
+ GEN_INT (0xE), GEN_INT (0xF),
+ GEN_INT (0x10), GEN_INT (0x11),
+ GEN_INT (0x12), GEN_INT (0x13),
+ GEN_INT (0x14), GEN_INT (0x15),
+ GEN_INT (0x16), GEN_INT (0x17));
else
tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
- gen_lowpart (V16SImode, src),
- GEN_INT (i == 128 ? 0x2 : 0x1),
- GEN_INT (0x3),
- GEN_INT (0x3),
- GEN_INT (0x3),
- GEN_INT (i == 128 ? 0x6 : 0x5),
- GEN_INT (0x7),
- GEN_INT (0x7),
- GEN_INT (0x7),
- GEN_INT (i == 128 ? 0xA : 0x9),
- GEN_INT (0xB),
- GEN_INT (0xB),
- GEN_INT (0xB),
- GEN_INT (i == 128 ? 0xE : 0xD),
- GEN_INT (0xF),
- GEN_INT (0xF),
- GEN_INT (0xF));
+ gen_lowpart (V16SImode, src),
+ GEN_INT (i == 128 ? 0x2 : 0x1),
+ GEN_INT (0x3),
+ GEN_INT (0x3),
+ GEN_INT (0x3),
+ GEN_INT (i == 128 ? 0x6 : 0x5),
+ GEN_INT (0x7),
+ GEN_INT (0x7),
+ GEN_INT (0x7),
+ GEN_INT (i == 128 ? 0xA : 0x9),
+ GEN_INT (0xB),
+ GEN_INT (0xB),
+ GEN_INT (0xB),
+ GEN_INT (i == 128 ? 0xE : 0xD),
+ GEN_INT (0xF),
+ GEN_INT (0xF),
+ GEN_INT (0xF));
break;
default:
gcc_unreachable ();
}
}
+ mthree = force_reg (mode, mthree);
+
/* e0 = x0 * a */
emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
- /* e1 = e0 * x0 */
- emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
- /* e2 = e1 - 3. */
- mthree = force_reg (mode, mthree);
- emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
+ unsigned vector_size = GET_MODE_SIZE (mode);
+ if (TARGET_FMA
+ || (TARGET_AVX512F && vector_size == 64)
+ || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+ emit_insn (gen_rtx_SET (e2,
+ gen_rtx_FMA (mode, e0, x0, mthree)));
+ else
+ {
+ /* e1 = e0 * x0 */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
+
+ /* e2 = e1 - 3. */
+ emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
+ }
mhalf = force_reg (mode, mhalf);
if (recip)
return false;
if (!(TARGET_SSE && vmode == V4SFmode)
+ && !(TARGET_MMX_WITH_SSE && vmode == V2SFmode)
&& !(TARGET_SSE2 && vmode == V2DFmode))
return false;
/* vpshufb only works intra lanes, it is not
possible to shuffle bytes in between the lanes. */
for (i = 0; i < nelt; ++i)
- if ((d->perm[i] ^ i) & (nelt / 4))
+ if ((d->perm[i] ^ i) & (3 * nelt / 4))
return false;
}
}
case E_V2DFmode:
case E_V4SFmode:
case E_V2DImode:
+ case E_V2SImode:
case E_V4SImode:
/* These are always directly implementable by expand_vec_perm_1. */
gcc_unreachable ();
+ case E_V2SFmode:
+ gcc_assert (TARGET_MMX_WITH_SSE);
+ /* We have no suitable instructions. */
+ if (d->testing_p)
+ return false;
+ break;
+
+ case E_V4HImode:
+ if (d->testing_p)
+ break;
+ /* We need 2*log2(N)-1 operations to achieve odd/even
+ with interleave. */
+ t1 = gen_reg_rtx (V4HImode);
+ emit_insn (gen_mmx_punpckhwd (t1, d->op0, d->op1));
+ emit_insn (gen_mmx_punpcklwd (d->target, d->op0, d->op1));
+ if (odd)
+ t2 = gen_mmx_punpckhwd (d->target, d->target, t1);
+ else
+ t2 = gen_mmx_punpcklwd (d->target, d->target, t1);
+ emit_insn (t2);
+ break;
+
case E_V8HImode:
if (TARGET_SSE4_1)
return expand_vec_perm_even_odd_pack (d);
gcc_unreachable ();
case E_V2DFmode:
- case E_V2DImode:
+ case E_V2SFmode:
case E_V4SFmode:
+ case E_V2DImode:
+ case E_V2SImode:
case E_V4SImode:
/* These are always implementable using standard shuffle patterns. */
gcc_unreachable ();
if (d.testing_p && TARGET_SSSE3)
return true;
break;
+ case E_V2SFmode:
+ case E_V2SImode:
+ case E_V4HImode:
+ if (!TARGET_MMX_WITH_SSE)
+ return false;
+ break;
case E_V2DImode:
case E_V2DFmode:
if (!TARGET_SSE)
d.one_operand_p = (which != 3);
/* Implementable with shufps or pshufd. */
- if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
+ if (d.one_operand_p
+ && (d.vmode == V4SFmode || d.vmode == V2SFmode
+ || d.vmode == V4SImode || d.vmode == V2SImode))
return true;
/* Otherwise we have to go through the motions and see if we can
gcc_assert (ok);
}
+/* Optimize vector MUL generation for V8QI, V16QI and V32QI
+ under TARGET_AVX512BW. i.e. for v16qi a * b, it has
+
+ vpmovzxbw ymm2, xmm0
+ vpmovzxbw ymm3, xmm1
+ vpmullw ymm4, ymm2, ymm3
+ vpmovwb xmm0, ymm4
+
+ it would take less instructions than ix86_expand_vecop_qihi.
+ Return true if success. */
+
+bool
+ix86_expand_vecmul_qihi (rtx dest, rtx op1, rtx op2)
+{
+ machine_mode himode, qimode = GET_MODE (dest);
+ rtx hop1, hop2, hdest;
+ rtx (*gen_extend)(rtx, rtx);
+ rtx (*gen_truncate)(rtx, rtx);
+
+ /* There's no V64HImode multiplication instruction. */
+ if (qimode == E_V64QImode)
+ return false;
+
+ /* vpmovwb only available under AVX512BW. */
+ if (!TARGET_AVX512BW)
+ return false;
+ if ((qimode == V8QImode || qimode == V16QImode)
+ && !TARGET_AVX512VL)
+ return false;
+ /* Not generate zmm instruction when prefer 128/256 bit vector width. */
+ if (qimode == V32QImode
+ && (TARGET_PREFER_AVX128 || TARGET_PREFER_AVX256))
+ return false;
+
+ switch (qimode)
+ {
+ case E_V8QImode:
+ himode = V8HImode;
+ gen_extend = gen_zero_extendv8qiv8hi2;
+ gen_truncate = gen_truncv8hiv8qi2;
+ break;
+ case E_V16QImode:
+ himode = V16HImode;
+ gen_extend = gen_zero_extendv16qiv16hi2;
+ gen_truncate = gen_truncv16hiv16qi2;
+ break;
+ case E_V32QImode:
+ himode = V32HImode;
+ gen_extend = gen_zero_extendv32qiv32hi2;
+ gen_truncate = gen_truncv32hiv32qi2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ hop1 = gen_reg_rtx (himode);
+ hop2 = gen_reg_rtx (himode);
+ hdest = gen_reg_rtx (himode);
+ emit_insn (gen_extend (hop1, op1));
+ emit_insn (gen_extend (hop2, op2));
+ emit_insn (gen_rtx_SET (hdest, simplify_gen_binary (MULT, himode,
+ hop1, hop2)));
+ emit_insn (gen_truncate (dest, hdest));
+ return true;
+}
+
+/* Expand a vector operation shift by constant for a V*QImode in terms of the
+ same operation on V*HImode. Return true if success. */
+bool
+ix86_expand_vec_shift_qihi_constant (enum rtx_code code, rtx dest, rtx op1, rtx op2)
+{
+ machine_mode qimode, himode;
+ HOST_WIDE_INT and_constant, xor_constant;
+ HOST_WIDE_INT shift_amount;
+ rtx vec_const_and, vec_const_xor;
+ rtx tmp, op1_subreg;
+ rtx (*gen_shift) (rtx, rtx, rtx);
+ rtx (*gen_and) (rtx, rtx, rtx);
+ rtx (*gen_xor) (rtx, rtx, rtx);
+ rtx (*gen_sub) (rtx, rtx, rtx);
+
+ /* Only optimize shift by constant. */
+ if (!CONST_INT_P (op2))
+ return false;
+
+ qimode = GET_MODE (dest);
+ shift_amount = INTVAL (op2);
+ /* Do nothing when shift amount greater equal 8. */
+ if (shift_amount > 7)
+ return false;
+
+ gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
+ /* Record sign bit. */
+ xor_constant = 1 << (8 - shift_amount - 1);
+
+ /* Zero upper/lower bits shift from left/right element. */
+ and_constant
+ = (code == ASHIFT ? 256 - (1 << shift_amount)
+ : (1 << (8 - shift_amount)) - 1);
+
+ switch (qimode)
+ {
+ case V16QImode:
+ himode = V8HImode;
+ gen_shift =
+ ((code == ASHIFT)
+ ? gen_ashlv8hi3
+ : (code == ASHIFTRT) ? gen_ashrv8hi3 : gen_lshrv8hi3);
+ gen_and = gen_andv16qi3;
+ gen_xor = gen_xorv16qi3;
+ gen_sub = gen_subv16qi3;
+ break;
+ case V32QImode:
+ himode = V16HImode;
+ gen_shift =
+ ((code == ASHIFT)
+ ? gen_ashlv16hi3
+ : (code == ASHIFTRT) ? gen_ashrv16hi3 : gen_lshrv16hi3);
+ gen_and = gen_andv32qi3;
+ gen_xor = gen_xorv32qi3;
+ gen_sub = gen_subv32qi3;
+ break;
+ case V64QImode:
+ himode = V32HImode;
+ gen_shift =
+ ((code == ASHIFT)
+ ? gen_ashlv32hi3
+ : (code == ASHIFTRT) ? gen_ashrv32hi3 : gen_lshrv32hi3);
+ gen_and = gen_andv64qi3;
+ gen_xor = gen_xorv64qi3;
+ gen_sub = gen_subv64qi3;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ tmp = gen_reg_rtx (himode);
+ vec_const_and = gen_reg_rtx (qimode);
+ op1_subreg = lowpart_subreg (himode, op1, qimode);
+
+ /* For ASHIFT and LSHIFTRT, perform operation like
+ vpsllw/vpsrlw $shift_amount, %op1, %dest.
+ vpand %vec_const_and, %dest. */
+ emit_insn (gen_shift (tmp, op1_subreg, op2));
+ emit_move_insn (dest, simplify_gen_subreg (qimode, tmp, himode, 0));
+ emit_move_insn (vec_const_and,
+ ix86_build_const_vector (qimode, true,
+ gen_int_mode (and_constant, QImode)));
+ emit_insn (gen_and (dest, dest, vec_const_and));
+
+ /* For ASHIFTRT, perform extra operation like
+ vpxor %vec_const_xor, %dest, %dest
+ vpsubb %vec_const_xor, %dest, %dest */
+ if (code == ASHIFTRT)
+ {
+ vec_const_xor = gen_reg_rtx (qimode);
+ emit_move_insn (vec_const_xor,
+ ix86_build_const_vector (qimode, true,
+ gen_int_mode (xor_constant, QImode)));
+ emit_insn (gen_xor (dest, dest, vec_const_xor));
+ emit_insn (gen_sub (dest, dest, vec_const_xor));
+ }
+ return true;
+}
/* Expand a vector operation CODE for a V*QImode in terms of the
same operation on V*HImode. */
case E_V4SImode:
case E_V2DImode:
case E_V1TImode:
- case E_TImode:
{
machine_mode srcmode, dstmode;
rtx d, pat;
case E_V4SImode:
case E_V2DImode:
case E_V1TImode:
- case E_TImode:
{
machine_mode srcmode, dstmode;
rtx (*pinsr)(rtx, rtx, rtx, rtx);