+2016-08-23 Michael Meissner <meissner@linux.vnet.ibm.com>
+
+ * config/rs6000/rs6000.c (rs6000_expand_vector_init): Set
+ initialization of all 0's to the 0 constant, instead of directly
+ generating XOR. Add support for V4SImode vector initialization on
+ 64-bit systems with direct move, and rework the ISA 3.0 V4SImode
+ initialization. Change variables used in V4SFmode vector
+ intialization. For V4SFmode vector splat on ISA 3.0, make sure
+ any memory addresses are in index form. Add support for using
+ VSPLTH/VSPLTB to initialize vector short and vector char vectors
+ with all of the same element.
+ (regno_or_subregno): New helper function to return a register
+ number for either REG or SUBREG.
+ (rs6000_adjust_vec_address): Do not generate ADDI <reg>,R0,<num>.
+ Use regno_or_subregno where possible.
+ (rs6000_split_v4si_init_di_reg): New helper function to build up a
+ DImode value from two SImode values in order to generate V4SImode
+ vector initialization on 64-bit systems with direct move.
+ (rs6000_split_v4si_init): Split up the insns for a V4SImode vector
+ initialization.
+ (rtx_is_swappable_p): V4SImode vector initialization insn is not
+ swappable.
+ * config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Add
+ declaration.
+ * config/rs6000/vsx.md (VSX_SPLAT_I): New mode iterators and
+ attributes to initialize V8HImode and V16QImode vectors with the
+ same element.
+ (VSX_SPLAT_COUNT): Likewise.
+ (VSX_SPLAT_SUFFIX): Likewise.
+ (UNSPEC_VSX_VEC_INIT): New unspec.
+ (vsx_concat_v2sf): Eliminate using 'preferred' register classes.
+ Allow SFmode values to come from Altivec registers.
+ (vsx_init_v4si): New insn/split for V4SImode vector initialization
+ on 64-bit systems with direct move.
+ (vsx_splat_<mode>, VSX_W iterator): Rework V4SImode and V4SFmode
+ vector initializations, to allow V4SImode vector initializations
+ on 64-bit systems with direct move.
+ (vsx_splat_v4si): Likewise.
+ (vsx_splat_v4si_di): Likewise.
+ (vsx_splat_v4sf): Likewise.
+ (vsx_splat_v4sf_internal): Likewise.
+ (vsx_xxspltw_<mode>, VSX_W iterator): Eliminate using 'preferred'
+ register classes.
+ (vsx_xxspltw_<mode>_direct, VSX_W iterator): Likewise.
+ (vsx_vsplt<VSX_SPLAT_SUFFIX>_di): New insns to support
+ initializing V8HImode and V16QImode vectors with the same
+ element.
+ * config/rs6000/rs6000.h (TARGET_DIRECT_MOVE_64BIT): Disallow
+ optimization if -maltivec=be.
+
2016-08-23 Christophe Lyon <christophe.lyon@linaro.org>
* config/arm/arm.md (arm_movqi_insn): Swap predicable_short_it
extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
+extern void rs6000_split_v4si_init (rtx []);
extern bool altivec_expand_vec_perm_const (rtx op[4]);
extern void altivec_expand_vec_perm_le (rtx op[4]);
extern bool rs6000_expand_vec_perm_const (rtx op[4]);
if ((int_vector_p || TARGET_VSX) && all_const_zero)
{
/* Zero register. */
- emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
+ emit_move_insn (target, CONST0_RTX (mode));
return;
}
else if (int_vector_p && easy_vector_constant (const_vec, mode))
return;
}
- /* Word values on ISA 3.0 can use mtvsrws, lxvwsx, or vspltisw. V4SF is
- complicated since scalars are stored as doubles in the registers. */
- if (TARGET_P9_VECTOR && mode == V4SImode && all_same
- && VECTOR_MEM_VSX_P (mode))
+ /* Special case initializing vector int if we are on 64-bit systems with
+ direct move or we have the ISA 3.0 instructions. */
+ if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
+ && TARGET_DIRECT_MOVE_64BIT)
{
- emit_insn (gen_vsx_splat_v4si (target, XVECEXP (vals, 0, 0)));
- return;
+ if (all_same)
+ {
+ rtx element0 = XVECEXP (vals, 0, 0);
+ if (MEM_P (element0))
+ element0 = rs6000_address_for_fpconvert (element0);
+ else
+ element0 = force_reg (SImode, element0);
+
+ if (TARGET_P9_VECTOR)
+ emit_insn (gen_vsx_splat_v4si (target, element0));
+ else
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_zero_extendsidi2 (tmp, element0));
+ emit_insn (gen_vsx_splat_v4si_di (target, tmp));
+ }
+ return;
+ }
+ else
+ {
+ rtx elements[4];
+ size_t i;
+
+ for (i = 0; i < 4; i++)
+ {
+ elements[i] = XVECEXP (vals, 0, i);
+ if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
+ elements[i] = copy_to_mode_reg (SImode, elements[i]);
+ }
+
+ emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
+ elements[2], elements[3]));
+ return;
+ }
}
/* With single precision floating point on VSX, know that internally single
precision is actually represented as a double, and either make 2 V2DF
vectors, and convert these vectors to single precision, or do one
conversion, and splat the result to the other elements. */
- if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
+ if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
{
if (all_same)
{
- rtx op0 = XVECEXP (vals, 0, 0);
+ rtx element0 = XVECEXP (vals, 0, 0);
if (TARGET_P9_VECTOR)
- emit_insn (gen_vsx_splat_v4sf (target, op0));
+ {
+ if (MEM_P (element0))
+ element0 = rs6000_address_for_fpconvert (element0);
+
+ emit_insn (gen_vsx_splat_v4sf (target, element0));
+ }
else
{
rtx freg = gen_reg_rtx (V4SFmode);
- rtx sreg = force_reg (SFmode, op0);
+ rtx sreg = force_reg (SFmode, element0);
rtx cvt = (TARGET_XSCVDPSPN
? gen_vsx_xscvdpspn_scalar (freg, sreg)
: gen_vsx_xscvdpsp_scalar (freg, sreg));
return;
}
+ /* Special case initializing vector short/char that are splats if we are on
+ 64-bit systems with direct move. */
+ if (all_same && TARGET_DIRECT_MOVE_64BIT
+ && (mode == V16QImode || mode == V8HImode))
+ {
+ rtx op0 = XVECEXP (vals, 0, 0);
+ rtx di_tmp = gen_reg_rtx (DImode);
+
+ if (!REG_P (op0))
+ op0 = force_reg (GET_MODE_INNER (mode), op0);
+
+ if (mode == V16QImode)
+ {
+ emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
+ emit_insn (gen_vsx_vspltb_di (target, di_tmp));
+ return;
+ }
+
+ if (mode == V8HImode)
+ {
+ emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
+ emit_insn (gen_vsx_vsplth_di (target, di_tmp));
+ return;
+ }
+ }
+
/* Store value to stack temp. Load vector element. Splat. However, splat
of 64-bit items is not supported on Altivec. */
if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
}
+/* Helper function to return the register number of a RTX. */
+static inline int
+regno_or_subregno (rtx op)
+{
+ if (REG_P (op))
+ return REGNO (op);
+ else if (SUBREG_P (op))
+ return subreg_regno (op);
+ else
+ gcc_unreachable ();
+}
+
/* Adjust a memory address (MEM) of a vector type to point to a scalar field
within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
temporary (BASE_TMP) to fixup the address. Return the new memory address
}
else
{
- if (REG_P (op1) || SUBREG_P (op1))
+ bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
+ bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
+
+ /* Note, ADDI requires the register being added to be a base
+ register. If the register was R0, load it up into the temporary
+ and do the add. */
+ if (op1_reg_p
+ && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
{
insn = gen_add3_insn (base_tmp, op1, element_offset);
gcc_assert (insn != NULL_RTX);
emit_insn (insn);
}
- else if (REG_P (element_offset) || SUBREG_P (element_offset))
+ else if (ele_reg_p
+ && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
{
insn = gen_add3_insn (base_tmp, element_offset, op1);
gcc_assert (insn != NULL_RTX);
{
rtx op1 = XEXP (new_addr, 1);
addr_mask_type addr_mask;
- int scalar_regno;
-
- if (REG_P (scalar_reg))
- scalar_regno = REGNO (scalar_reg);
- else if (SUBREG_P (scalar_reg))
- scalar_regno = subreg_regno (scalar_reg);
- else
- gcc_unreachable ();
+ int scalar_regno = regno_or_subregno (scalar_reg);
gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
if (INT_REGNO_P (scalar_regno))
gcc_unreachable ();
}
+/* Helper function for rs6000_split_v4si_init to build up a DImode value from
+ two SImode values. */
+
+static void
+rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
+{
+ const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
+
+ if (CONST_INT_P (si1) && CONST_INT_P (si2))
+ {
+ unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
+ unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
+
+ emit_move_insn (dest, GEN_INT (const1 | const2));
+ return;
+ }
+
+ /* Put si1 into upper 32-bits of dest. */
+ if (CONST_INT_P (si1))
+ emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
+ else
+ {
+ /* Generate RLDIC. */
+ rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
+ rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
+ rtx mask_rtx = GEN_INT (mask_32bit << 32);
+ rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
+ gcc_assert (!reg_overlap_mentioned_p (dest, si1));
+ emit_insn (gen_rtx_SET (dest, and_rtx));
+ }
+
+ /* Put si2 into the temporary. */
+ gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
+ if (CONST_INT_P (si2))
+ emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
+ else
+ emit_insn (gen_zero_extendsidi2 (tmp, si2));
+
+ /* Combine the two parts. */
+ emit_insn (gen_iordi3 (dest, dest, tmp));
+ return;
+}
+
+/* Split a V4SI initialization. */
+
+void
+rs6000_split_v4si_init (rtx operands[])
+{
+ rtx dest = operands[0];
+
+ /* Destination is a GPR, build up the two DImode parts in place. */
+ if (REG_P (dest) || SUBREG_P (dest))
+ {
+ int d_regno = regno_or_subregno (dest);
+ rtx scalar1 = operands[1];
+ rtx scalar2 = operands[2];
+ rtx scalar3 = operands[3];
+ rtx scalar4 = operands[4];
+ rtx tmp1 = operands[5];
+ rtx tmp2 = operands[6];
+
+ /* Even though we only need one temporary (plus the destination, which
+ has an early clobber constraint, try to use two temporaries, one for
+ each double word created. That way the 2nd insn scheduling pass can
+ rearrange things so the two parts are done in parallel. */
+ if (BYTES_BIG_ENDIAN)
+ {
+ rtx di_lo = gen_rtx_REG (DImode, d_regno);
+ rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
+ rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
+ rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
+ }
+ else
+ {
+ rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
+ rtx di_hi = gen_rtx_REG (DImode, d_regno);
+ gcc_assert (!VECTOR_ELT_ORDER_BIG);
+ rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
+ rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
+ }
+ return;
+ }
+
+ else
+ gcc_unreachable ();
+}
+
/* Return TRUE if OP is an invalid SUBREG operation on the e500. */
bool
case UNSPEC_VSX_CVSPDPN:
case UNSPEC_VSX_EXTRACT:
case UNSPEC_VSX_VSLO:
+ case UNSPEC_VSX_VEC_INIT:
return 0;
case UNSPEC_VSPLT_DIRECT:
*special = SH_SPLAT;
&& TARGET_SINGLE_FLOAT \
&& TARGET_DOUBLE_FLOAT)
-/* Macro to say whether we can do optimization where we need to do parts of the
- calculation in 64-bit GPRs and then is transfered to the vector
- registers. */
+/* Macro to say whether we can do optimizations where we need to do parts of
+ the calculation in 64-bit GPRs and then is transfered to the vector
+ registers. Do not allow -maltivec=be for these optimizations, because it
+ adds to the complexity of the code. */
#define TARGET_DIRECT_MOVE_64BIT (TARGET_DIRECT_MOVE \
&& TARGET_P8_VECTOR \
&& TARGET_POWERPC64 \
- && TARGET_UPPER_REGS_DI)
+ && TARGET_UPPER_REGS_DI \
+ && (rs6000_altivec_element_order != 2))
/* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
(V8HI "v")
(V4SI "wa")])
+;; Iterator for the 2 short vector types to do a splat from an integer
+(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
+
+;; Mode attribute to give the count for the splat instruction to splat
+;; the value in the 64-bit integer slot
+(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
+
+;; Mode attribute to give the suffix for the splat instruction
+(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
+
;; Constants for creating unspecs
(define_c_enum "unspec"
[UNSPEC_VSX_CONCAT
UNSPEC_VSX_VXSIG
UNSPEC_VSX_VIEXP
UNSPEC_VSX_VTSTDC
+ UNSPEC_VSX_VEC_INIT
])
;; VSX moves
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
(define_insn "vsx_concat_v2sf"
- [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
(unspec:V2DF
- [(match_operand:SF 1 "vsx_register_operand" "f,f")
- (match_operand:SF 2 "vsx_register_operand" "f,f")]
+ [(match_operand:SF 1 "vsx_register_operand" "ww")
+ (match_operand:SF 2 "vsx_register_operand" "ww")]
UNSPEC_VSX_CONCAT))]
"VECTOR_MEM_VSX_P (V2DFmode)"
{
}
[(set_attr "type" "vecperm")])
+;; V4SImode initialization splitter
+(define_insn_and_split "vsx_init_v4si"
+ [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
+ (unspec:V4SI
+ [(match_operand:SI 1 "reg_or_cint_operand" "rn")
+ (match_operand:SI 2 "reg_or_cint_operand" "rn")
+ (match_operand:SI 3 "reg_or_cint_operand" "rn")
+ (match_operand:SI 4 "reg_or_cint_operand" "rn")]
+ UNSPEC_VSX_VEC_INIT))
+ (clobber (match_scratch:DI 5 "=&r"))
+ (clobber (match_scratch:DI 6 "=&r"))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rs6000_split_v4si_init (operands);
+ DONE;
+})
+
;; xxpermdi for little endian loads and stores. We need several of
;; these since the form of the PARALLEL differs by mode.
(define_insn "*vsx_xxpermdi2_le_<mode>"
mtvsrdd %x0,%1,%1"
[(set_attr "type" "vecperm,vecload,vecperm")])
-;; V4SI splat (ISA 3.0)
-;; When SI's are allowed in VSX registers, add XXSPLTW support
-(define_expand "vsx_splat_<mode>"
- [(set (match_operand:VSX_W 0 "vsx_register_operand" "")
- (vec_duplicate:VSX_W
- (match_operand:<VS_scalar> 1 "splat_input_operand" "")))]
- "TARGET_P9_VECTOR"
-{
- if (MEM_P (operands[1]))
- operands[1] = rs6000_address_for_fpconvert (operands[1]);
- else if (!REG_P (operands[1]))
- operands[1] = force_reg (<VS_scalar>mode, operands[1]);
-})
-
-(define_insn "*vsx_splat_v4si_internal"
- [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
+;; V4SI splat support
+(define_insn "vsx_splat_v4si"
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
(vec_duplicate:V4SI
(match_operand:SI 1 "splat_input_operand" "r,Z")))]
"TARGET_P9_VECTOR"
"@
mtvsrws %x0,%1
lxvwsx %x0,%y1"
- [(set_attr "type" "mftgpr,vecload")])
+ [(set_attr "type" "vecperm,vecload")])
+
+;; SImode is not currently allowed in vector registers. This pattern
+;; allows us to use direct move to get the value in a vector register
+;; so that we can use XXSPLTW
+(define_insn "vsx_splat_v4si_di"
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
+ (vec_duplicate:V4SI
+ (truncate:SI
+ (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "@
+ xxspltw %x0,%x1,1
+ mtvsrws %x0,%1"
+ [(set_attr "type" "vecperm")])
;; V4SF splat (ISA 3.0)
-(define_insn_and_split "*vsx_splat_v4sf_internal"
+(define_insn_and_split "vsx_splat_v4sf"
[(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
(vec_duplicate:V4SF
(match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
;; V4SF/V4SI splat from a vector element
(define_insn "vsx_xxspltw_<mode>"
- [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
(vec_duplicate:VSX_W
(vec_select:<VS_scalar>
- (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
+ (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
(parallel
- [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
+ [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
if (!BYTES_BIG_ENDIAN)
[(set_attr "type" "vecperm")])
(define_insn "vsx_xxspltw_<mode>_direct"
- [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
- (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
- (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
+ (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
UNSPEC_VSX_XXSPLTW))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
"xxspltw %x0,%x1,%2"
[(set_attr "type" "vecperm")])
+;; V16QI/V8HI splat support on ISA 2.07
+(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
+ [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
+ (vec_duplicate:VSX_SPLAT_I
+ (truncate:<VS_scalar>
+ (match_operand:DI 1 "altivec_register_operand" "v"))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
+ "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
+ [(set_attr "type" "vecperm")])
+
;; V2DF/V2DI splat for use by vec_splat builtin
(define_insn "vsx_xxspltd_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+2016-08-23 Michael Meissner <meissner@linux.vnet.ibm.com>
+
+ * gcc.target/powerpc/vec-init-1.c: Add tests where the vector is
+ being created from pointers to memory locations.
+ * gcc.target/powerpc/vec-init-2.c: Likewise.
+
2016-08-23 Fritz Reese <fritzoreese@gmail.com>
* gfortran.dg/dec_structure_12.f90: New testcase.
extern vector int pack_reg (int a, int b, int c, int d)
__attribute__((__noinline__));
+extern vector int pack_from_ptr (int *p_a, int *p_b, int *p_c, int *p_d)
+ __attribute__((__noinline__));
+
extern vector int pack_const (void)
__attribute__((__noinline__));
extern vector int splat_reg (int a)
__attribute__((__noinline__));
+extern vector int splat_from_ptr (int *p)
+ __attribute__((__noinline__));
+
extern vector int splat_const (void)
__attribute__((__noinline__));
return (vector int) { a, b, c, d };
}
+vector int
+pack_from_ptr (int *p_a, int *p_b, int *p_c, int *p_d)
+{
+ return (vector int) { *p_a, *p_b, *p_c, *p_d };
+}
+
vector int
pack_const (void)
{
return (vector int) { a, a, a, a };
}
+vector int
+splat_from_ptr (int *p)
+{
+ return (vector int) { *p, *p, *p, *p };
+}
+
vector int
splat_const (void)
{
int main (void)
{
vector int sv2, sv3;
+ int mem = SPLAT;
+ int mem2[4] = { ELEMENTS };
check (sv);
check (pack_reg (ELEMENTS));
+ check (pack_from_ptr (&mem2[0], &mem2[1], &mem2[2], &mem2[3]));
+
check (pack_const ());
pack_ptr (&sv2, ELEMENTS);
check_splat (splat_reg (SPLAT));
+ check_splat (splat_from_ptr (&mem));
+
check_splat (splat_const ());
splat_ptr (&sv2, SPLAT);
extern vector long pack_reg (long a, long b)
__attribute__((__noinline__));
+extern vector long pack_from_ptr (long *p_a, long *p_b)
+ __attribute__((__noinline__));
+
extern vector long pack_const (void)
__attribute__((__noinline__));
extern vector long splat_reg (long a)
__attribute__((__noinline__));
+extern vector long splat_from_ptr (long *p)
+ __attribute__((__noinline__));
+
extern vector long splat_const (void)
__attribute__((__noinline__));
return (vector long) { a, b };
}
+vector long
+pack_from_ptr (long *p_a, long *p_b)
+{
+ return (vector long) { *p_a, *p_b };
+}
+
vector long
pack_const (void)
{
return (vector long) { a, a };
}
+vector long
+splat_from_ptr (long *p)
+{
+ return (vector long) { *p, *p };
+}
+
vector long
splat_const (void)
{
int main (void)
{
vector long sv2, sv3;
+ long mem = SPLAT;
+ long mem2[2] = { ELEMENTS };
check (sv);
check (pack_reg (ELEMENTS));
+ check (pack_from_ptr (&mem2[0], &mem2[1]));
+
check (pack_const ());
pack_ptr (&sv2, ELEMENTS);
check_splat (splat_reg (SPLAT));
+ check_splat (splat_from_ptr (&mem));
+
check_splat (splat_const ());
splat_ptr (&sv2, SPLAT);