+2018-01-10 Kelvin Nilsen <kelvin@gcc.gnu.org>
+
+ * config/rs6000/rs6000-p8swap.c (rs6000_sum_of_two_registers_p):
+ New function.
+ (rs6000_quadword_masked_address_p): Likewise.
+ (quad_aligned_load_p): Likewise.
+ (quad_aligned_store_p): Likewise.
+ (const_load_sequence_p): Add comment to describe the outer-most loop.
+ (mimic_memory_attributes_and_flags): New function.
+ (rs6000_gen_stvx): Likewise.
+ (replace_swapped_aligned_store): Likewise.
+ (rs6000_gen_lvx): Likewise.
+ (replace_swapped_aligned_load): Likewise.
+ (replace_swapped_load_constant): Capitalize argument name in
+ comment describing this function.
+ (rs6000_analyze_swaps): Add a third pass to search for vector loads
+ and stores that access quad-word aligned addresses and replace
+ with stvx or lvx instructions when appropriate.
+ * config/rs6000/rs6000-protos.h (rs6000_sum_of_two_registers_p):
+ New function prototype.
+ (rs6000_quadword_masked_address_p): Likewise.
+ (rs6000_gen_lvx): Likewise.
+ (rs6000_gen_stvx): Likewise.
+ * config/rs6000/vsx.md (*vsx_le_perm_load_<mode>): For modes
+ VSX_D (V2DF, V2DI), modify this split to select lvx instruction
+ when memory address is aligned.
+ (*vsx_le_perm_load_<mode>): For modes VSX_W (V4SF, V4SI), modify
+ this split to select lvx instruction when memory address is aligned.
+ (*vsx_le_perm_load_v8hi): Modify this split to select lvx
+ instruction when memory address is aligned.
+ (*vsx_le_perm_load_v16qi): Likewise.
+ (four unnamed splitters): Modify to select the stvx instruction
+ when memory is aligned.
+
2018-01-13 Jan Hubicka <hubicka@ucw.cz>
* predict.c (determine_unlikely_bbs): Handle correctly BBs
return 1;
}
+/* Return true iff EXPR represents the sum of two registers. */
+bool
+rs6000_sum_of_two_registers_p (const_rtx expr)
+{
+ if (GET_CODE (expr) == PLUS)
+ {
+ const_rtx operand1 = XEXP (expr, 0);
+ const_rtx operand2 = XEXP (expr, 1);
+ return (REG_P (operand1) && REG_P (operand2));
+ }
+ return false;
+}
+
+/* Return true iff EXPR represents an address expression that masks off
+ the low-order 4 bits in the style of an lvx or stvx rtl pattern. */
+bool
+rs6000_quadword_masked_address_p (const_rtx expr)
+{
+ if (GET_CODE (expr) == AND)
+ {
+ const_rtx operand1 = XEXP (expr, 0);
+ const_rtx operand2 = XEXP (expr, 1);
+ if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1))
+ && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16)
+ return true;
+ }
+ return false;
+}
+
+/* Return TRUE if INSN represents a swap of a swapped load from memory
+ and the memory address is quad-word aligned. */
+static bool
+quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
+{
+ unsigned uid = INSN_UID (insn);
+ if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
+ return false;
+
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+
+ /* Since insn is known to represent a swap instruction, we know it
+ "uses" only one input variable. */
+ df_ref use = DF_INSN_INFO_USES (insn_info);
+
+ /* Figure out where this input variable is defined. */
+ struct df_link *def_link = DF_REF_CHAIN (use);
+
+ /* If there is no definition or the definition is artificial or there are
+ multiple definitions, punt. */
+ if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
+ || def_link->next)
+ return false;
+
+ rtx def_insn = DF_REF_INSN (def_link->ref);
+ unsigned uid2 = INSN_UID (def_insn);
+ /* We're looking for a load-with-swap insn. If this is not that,
+ return false. */
+ if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
+ return false;
+
+ /* If the source of the rtl def is not a set from memory, return
+ false. */
+ rtx body = PATTERN (def_insn);
+ if (GET_CODE (body) != SET
+ || GET_CODE (SET_SRC (body)) != VEC_SELECT
+ || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
+ return false;
+
+ rtx mem = XEXP (SET_SRC (body), 0);
+ rtx base_reg = XEXP (mem, 0);
+ return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg))
+ && MEM_ALIGN (mem) >= 128) ? true : false;
+}
+
+/* Return TRUE if INSN represents a store-with-swap of a swapped value
+ and the memory address is quad-word aligned. */
+static bool
+quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn)
+{
+ unsigned uid = INSN_UID (insn);
+ if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store)
+ return false;
+
+ rtx body = PATTERN (insn);
+ rtx dest_address = XEXP (SET_DEST (body), 0);
+ rtx swap_reg = XEXP (SET_SRC (body), 0);
+
+ /* If the base address for the memory expression is not represented
+ by a single register and is not the sum of two registers, punt. */
+ if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address))
+ return false;
+
+ /* Confirm that the value to be stored is produced by a swap
+ instruction. */
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ df_ref use;
+ FOR_EACH_INSN_INFO_USE (use, insn_info)
+ {
+ struct df_link *def_link = DF_REF_CHAIN (use);
+
+ /* If this is not the definition of the candidate swap register,
+ then skip it. I am interested in a different definition. */
+ if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
+ continue;
+
+ /* If there is no def or the def is artifical or there are
+ multiple defs, punt. */
+ if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
+ || def_link->next)
+ return false;
+
+ rtx def_insn = DF_REF_INSN (def_link->ref);
+ unsigned uid2 = INSN_UID (def_insn);
+
+ /* If this source value is not a simple swap, return false */
+ if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load
+ || insn_entry[uid2].is_store)
+ return false;
+
+ /* I've processed the use that I care about, so break out of
+ this loop. */
+ break;
+ }
+
+ /* At this point, we know the source data comes from a swap. The
+ remaining question is whether the memory address is aligned. */
+ rtx set = single_set (insn);
+ if (set)
+ {
+ rtx dest = SET_DEST (set);
+ if (MEM_P (dest))
+ return (MEM_ALIGN (dest) >= 128);
+ }
+ return false;
+}
+
/* Return 1 iff UID, known to reference a swap, is both fed by a load
and a feeder of a store. */
static unsigned int
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
df_ref use;
+
+ /* Iterate over the definitions that are used by this insn. Since
+ this is known to be a swap insn, expect only one used definnition. */
FOR_EACH_INSN_INFO_USE (use, insn_info)
{
struct df_link *def_link = DF_REF_CHAIN (use);
insn->set_deleted ();
}
-/* Given that swap_insn represents a swap of a load of a constant
+/* Make NEW_MEM_EXP's attributes and flags resemble those of
+ ORIGINAL_MEM_EXP. */
+static void
+mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp)
+{
+ RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump);
+ RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call);
+ RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging);
+ RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil);
+ RTX_FLAG (new_mem_exp, frame_related) =
+ RTX_FLAG (original_mem_exp, frame_related);
+
+ /* The following fields may not be used with MEM subexpressions */
+ RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct);
+ RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val);
+
+ struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp);
+
+ alias_set_type set = original_attrs.alias;
+ set_mem_alias_set (new_mem_exp, set);
+
+ addr_space_t addrspace = original_attrs.addrspace;
+ set_mem_addr_space (new_mem_exp, addrspace);
+
+ unsigned int align = original_attrs.align;
+ set_mem_align (new_mem_exp, align);
+
+ tree expr = original_attrs.expr;
+ set_mem_expr (new_mem_exp, expr);
+
+ if (original_attrs.offset_known_p)
+ {
+ HOST_WIDE_INT offset = original_attrs.offset;
+ set_mem_offset (new_mem_exp, offset);
+ }
+ else
+ clear_mem_offset (new_mem_exp);
+
+ if (original_attrs.size_known_p)
+ {
+ HOST_WIDE_INT size = original_attrs.size;
+ set_mem_size (new_mem_exp, size);
+ }
+ else
+ clear_mem_size (new_mem_exp);
+}
+
+/* Generate an rtx expression to represent use of the stvx insn to store
+ the value represented by register SRC_EXP into the memory at address
+ DEST_EXP, with vector mode MODE. */
+rtx
+rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
+{
+ rtx memory_address = XEXP (dest_exp, 0);
+ rtx stvx;
+
+ if (rs6000_sum_of_two_registers_p (memory_address))
+ {
+ rtx op1, op2;
+ op1 = XEXP (memory_address, 0);
+ op2 = XEXP (memory_address, 1);
+ if (mode == V16QImode)
+ stvx = gen_altivec_stvx_v16qi_2op (src_exp, op1, op2);
+ else if (mode == V8HImode)
+ stvx = gen_altivec_stvx_v8hi_2op (src_exp, op1, op2);
+#ifdef HAVE_V8HFmode
+ else if (mode == V8HFmode)
+ stvx = gen_altivec_stvx_v8hf_2op (src_exp, op1, op2);
+#endif
+ else if (mode == V4SImode)
+ stvx = gen_altivec_stvx_v4si_2op (src_exp, op1, op2);
+ else if (mode == V4SFmode)
+ stvx = gen_altivec_stvx_v4sf_2op (src_exp, op1, op2);
+ else if (mode == V2DImode)
+ stvx = gen_altivec_stvx_v2di_2op (src_exp, op1, op2);
+ else if (mode == V2DFmode)
+ stvx = gen_altivec_stvx_v2df_2op (src_exp, op1, op2);
+ else if (mode == V1TImode)
+ stvx = gen_altivec_stvx_v1ti_2op (src_exp, op1, op2);
+ else
+ /* KFmode, TFmode, other modes not expected in this context. */
+ gcc_unreachable ();
+ }
+ else /* REG_P (memory_address) */
+ {
+ if (mode == V16QImode)
+ stvx = gen_altivec_stvx_v16qi_1op (src_exp, memory_address);
+ else if (mode == V8HImode)
+ stvx = gen_altivec_stvx_v8hi_1op (src_exp, memory_address);
+#ifdef HAVE_V8HFmode
+ else if (mode == V8HFmode)
+ stvx = gen_altivec_stvx_v8hf_1op (src_exp, memory_address);
+#endif
+ else if (mode == V4SImode)
+ stvx = gen_altivec_stvx_v4si_1op (src_exp, memory_address);
+ else if (mode == V4SFmode)
+ stvx = gen_altivec_stvx_v4sf_1op (src_exp, memory_address);
+ else if (mode == V2DImode)
+ stvx = gen_altivec_stvx_v2di_1op (src_exp, memory_address);
+ else if (mode == V2DFmode)
+ stvx = gen_altivec_stvx_v2df_1op (src_exp, memory_address);
+ else if (mode == V1TImode)
+ stvx = gen_altivec_stvx_v1ti_1op (src_exp, memory_address);
+ else
+ /* KFmode, TFmode, other modes not expected in this context. */
+ gcc_unreachable ();
+ }
+
+ rtx new_mem_exp = SET_DEST (stvx);
+ mimic_memory_attributes_and_flags (new_mem_exp, dest_exp);
+ return stvx;
+}
+
+/* Given that STORE_INSN represents an aligned store-with-swap of a
+ swapped value, replace the store with an aligned store (without
+ swap) and replace the swap with a copy insn. */
+static void
+replace_swapped_aligned_store (swap_web_entry *insn_entry,
+ rtx_insn *store_insn)
+{
+ unsigned uid = INSN_UID (store_insn);
+ gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store);
+
+ rtx body = PATTERN (store_insn);
+ rtx dest_address = XEXP (SET_DEST (body), 0);
+ rtx swap_reg = XEXP (SET_SRC (body), 0);
+ gcc_assert (REG_P (dest_address)
+ || rs6000_sum_of_two_registers_p (dest_address));
+
+ /* Find the swap instruction that provides the value to be stored by
+ * this store-with-swap instruction. */
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn);
+ df_ref use;
+ rtx_insn *swap_insn = NULL;
+ unsigned uid2 = 0;
+ FOR_EACH_INSN_INFO_USE (use, insn_info)
+ {
+ struct df_link *def_link = DF_REF_CHAIN (use);
+
+ /* if this is not the definition of the candidate swap register,
+ then skip it. I am only interested in the swap insnd. */
+ if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
+ continue;
+
+ /* If there is no def or the def is artifical or there are
+ multiple defs, we should not be here. */
+ gcc_assert (def_link && def_link->ref && !def_link->next
+ && !DF_REF_IS_ARTIFICIAL (def_link->ref));
+
+ swap_insn = DF_REF_INSN (def_link->ref);
+ uid2 = INSN_UID (swap_insn);
+
+ /* If this source value is not a simple swap, we should not be here. */
+ gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load
+ && !insn_entry[uid2].is_store);
+
+ /* We've processed the use we care about, so break out of
+ this loop. */
+ break;
+ }
+
+ /* At this point, swap_insn and uid2 represent the swap instruction
+ that feeds the store. */
+ gcc_assert (swap_insn);
+ rtx set = single_set (store_insn);
+ gcc_assert (set);
+ rtx dest_exp = SET_DEST (set);
+ rtx src_exp = XEXP (SET_SRC (body), 0);
+ enum machine_mode mode = GET_MODE (dest_exp);
+ gcc_assert (MEM_P (dest_exp));
+ gcc_assert (MEM_ALIGN (dest_exp) >= 128);
+
+ /* Replace the copy with a new insn. */
+ rtx stvx;
+ stvx = rs6000_gen_stvx (mode, dest_exp, src_exp);
+
+ rtx_insn *new_insn = emit_insn_before (stvx, store_insn);
+ rtx new_body = PATTERN (new_insn);
+
+ gcc_assert ((GET_CODE (new_body) == SET)
+ && (GET_CODE (SET_DEST (new_body)) == MEM));
+
+ set_block_for_insn (new_insn, BLOCK_FOR_INSN (store_insn));
+ df_insn_rescan (new_insn);
+
+ df_insn_delete (store_insn);
+ remove_insn (store_insn);
+ store_insn->set_deleted ();
+
+ /* Replace the swap with a copy. */
+ uid2 = INSN_UID (swap_insn);
+ mark_swaps_for_removal (insn_entry, uid2);
+ replace_swap_with_copy (insn_entry, uid2);
+}
+
+/* Generate an rtx expression to represent use of the lvx insn to load
+ from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
+rtx
+rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
+{
+ rtx memory_address = XEXP (src_exp, 0);
+ rtx lvx;
+
+ if (rs6000_sum_of_two_registers_p (memory_address))
+ {
+ rtx op1, op2;
+ op1 = XEXP (memory_address, 0);
+ op2 = XEXP (memory_address, 1);
+
+ if (mode == V16QImode)
+ lvx = gen_altivec_lvx_v16qi_2op (dest_exp, op1, op2);
+ else if (mode == V8HImode)
+ lvx = gen_altivec_lvx_v8hi_2op (dest_exp, op1, op2);
+#ifdef HAVE_V8HFmode
+ else if (mode == V8HFmode)
+ lvx = gen_altivec_lvx_v8hf_2op (dest_exp, op1, op2);
+#endif
+ else if (mode == V4SImode)
+ lvx = gen_altivec_lvx_v4si_2op (dest_exp, op1, op2);
+ else if (mode == V4SFmode)
+ lvx = gen_altivec_lvx_v4sf_2op (dest_exp, op1, op2);
+ else if (mode == V2DImode)
+ lvx = gen_altivec_lvx_v2di_2op (dest_exp, op1, op2);
+ else if (mode == V2DFmode)
+ lvx = gen_altivec_lvx_v2df_2op (dest_exp, op1, op2);
+ else if (mode == V1TImode)
+ lvx = gen_altivec_lvx_v1ti_2op (dest_exp, op1, op2);
+ else
+ /* KFmode, TFmode, other modes not expected in this context. */
+ gcc_unreachable ();
+ }
+ else /* REG_P (memory_address) */
+ {
+ if (mode == V16QImode)
+ lvx = gen_altivec_lvx_v16qi_1op (dest_exp, memory_address);
+ else if (mode == V8HImode)
+ lvx = gen_altivec_lvx_v8hi_1op (dest_exp, memory_address);
+#ifdef HAVE_V8HFmode
+ else if (mode == V8HFmode)
+ lvx = gen_altivec_lvx_v8hf_1op (dest_exp, memory_address);
+#endif
+ else if (mode == V4SImode)
+ lvx = gen_altivec_lvx_v4si_1op (dest_exp, memory_address);
+ else if (mode == V4SFmode)
+ lvx = gen_altivec_lvx_v4sf_1op (dest_exp, memory_address);
+ else if (mode == V2DImode)
+ lvx = gen_altivec_lvx_v2di_1op (dest_exp, memory_address);
+ else if (mode == V2DFmode)
+ lvx = gen_altivec_lvx_v2df_1op (dest_exp, memory_address);
+ else if (mode == V1TImode)
+ lvx = gen_altivec_lvx_v1ti_1op (dest_exp, memory_address);
+ else
+ /* KFmode, TFmode, other modes not expected in this context. */
+ gcc_unreachable ();
+ }
+
+ rtx new_mem_exp = SET_SRC (lvx);
+ mimic_memory_attributes_and_flags (new_mem_exp, src_exp);
+
+ return lvx;
+}
+
+/* Given that SWAP_INSN represents a swap of an aligned
+ load-with-swap, replace the load with an aligned load (without
+ swap) and replace the swap with a copy insn. */
+static void
+replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
+{
+ /* Find the load. */
+ unsigned uid = INSN_UID (swap_insn);
+ /* Only call this if quad_aligned_load_p (swap_insn). */
+ gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load);
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
+
+ /* Since insn is known to represent a swap instruction, we know it
+ "uses" only one input variable. */
+ df_ref use = DF_INSN_INFO_USES (insn_info);
+
+ /* Figure out where this input variable is defined. */
+ struct df_link *def_link = DF_REF_CHAIN (use);
+ gcc_assert (def_link && !def_link->next);
+ gcc_assert (def_link && def_link->ref &&
+ !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next);
+
+ rtx_insn *def_insn = DF_REF_INSN (def_link->ref);
+ unsigned uid2 = INSN_UID (def_insn);
+
+ /* We're expecting a load-with-swap insn. */
+ gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap);
+
+ /* We expect this to be a set to memory, with source representing a
+ swap (indicated by code VEC_SELECT). */
+ rtx body = PATTERN (def_insn);
+ gcc_assert ((GET_CODE (body) == SET)
+ && (GET_CODE (SET_SRC (body)) == VEC_SELECT)
+ && (GET_CODE (XEXP (SET_SRC (body), 0)) == MEM));
+
+ rtx src_exp = XEXP (SET_SRC (body), 0);
+ enum machine_mode mode = GET_MODE (src_exp);
+ rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp);
+
+ rtx_insn *new_insn = emit_insn_before (lvx, def_insn);
+ rtx new_body = PATTERN (new_insn);
+
+ gcc_assert ((GET_CODE (new_body) == SET)
+ && (GET_CODE (SET_SRC (new_body)) == MEM));
+
+ set_block_for_insn (new_insn, BLOCK_FOR_INSN (def_insn));
+ df_insn_rescan (new_insn);
+
+ df_insn_delete (def_insn);
+ remove_insn (def_insn);
+ def_insn->set_deleted ();
+
+ /* Replace the swap with a copy. */
+ mark_swaps_for_removal (insn_entry, uid);
+ replace_swap_with_copy (insn_entry, uid);
+}
+
+/* Given that SWAP_INSN represents a swap of a load of a constant
vector value, replace with a single instruction that loads a
swapped variant of the original constant.
/* Clean up. */
free (insn_entry);
- /* Use additional pass over rtl to replace swap(load(vector constant))
- with load(swapped vector constant). */
+ /* Use a second pass over rtl to detect that certain vector values
+ fetched from or stored to memory on quad-word aligned addresses
+ can use lvx/stvx without swaps. */
+
+ /* First, rebuild ud chains. */
+ df_remove_problem (df_chain);
+ df_process_deferred_rescans ();
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+ df_chain_add_problem (DF_UD_CHAIN);
+ df_analyze ();
+
swap_web_entry *pass2_insn_entry;
pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load
&& !pass2_insn_entry[i].is_store)
{
- insn = pass2_insn_entry[i].insn;
- if (const_load_sequence_p (pass2_insn_entry, insn))
- replace_swapped_load_constant (pass2_insn_entry, insn);
+ /* Replace swap of aligned load-swap with aligned unswapped
+ load. */
+ rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
+ if (quad_aligned_load_p (pass2_insn_entry, rtx_insn))
+ replace_swapped_aligned_load (pass2_insn_entry, rtx_insn);
+ }
+ else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store)
+ {
+ /* Replace aligned store-swap of swapped value with aligned
+ unswapped store. */
+ rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
+ if (quad_aligned_store_p (pass2_insn_entry, rtx_insn))
+ replace_swapped_aligned_store (pass2_insn_entry, rtx_insn);
}
/* Clean up. */
free (pass2_insn_entry);
+
+ /* Use a third pass over rtl to replace swap(load(vector constant))
+ with load(swapped vector constant). */
+
+ /* First, rebuild ud chains. */
+ df_remove_problem (df_chain);
+ df_process_deferred_rescans ();
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+ df_chain_add_problem (DF_UD_CHAIN);
+ df_analyze ();
+
+ swap_web_entry *pass3_insn_entry;
+ pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
+
+ /* Walk the insns to gather basic data. */
+ FOR_ALL_BB_FN (bb, fun)
+ FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
+ {
+ unsigned int uid = INSN_UID (insn);
+ if (NONDEBUG_INSN_P (insn))
+ {
+ pass3_insn_entry[uid].insn = insn;
+
+ pass3_insn_entry[uid].is_relevant = 1;
+ pass3_insn_entry[uid].is_load = insn_is_load_p (insn);
+ pass3_insn_entry[uid].is_store = insn_is_store_p (insn);
+
+ /* Determine if this is a doubleword swap. If not,
+ determine whether it can legally be swapped. */
+ if (insn_is_swap_p (insn))
+ pass3_insn_entry[uid].is_swap = 1;
+ }
+ }
+
+ e = get_max_uid ();
+ for (unsigned i = 0; i < e; ++i)
+ if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load
+ && !pass3_insn_entry[i].is_store)
+ {
+ insn = pass3_insn_entry[i].insn;
+ if (const_load_sequence_p (pass3_insn_entry, insn))
+ replace_swapped_load_constant (pass3_insn_entry, insn);
+ }
+
+ /* Clean up. */
+ free (pass3_insn_entry);
return 0;
}
class rtl_opt_pass;
extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
+extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
+extern bool rs6000_quadword_masked_address_p (const_rtx exp);
+extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx);
+extern rtx rs6000_gen_stvx (enum machine_mode, rtx, rtx);
#endif /* rs6000-protos.h */
(match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
+ "&& 1"
[(set (match_dup 2)
(vec_select:<MODE>
(match_dup 1)
(parallel [(const_int 1) (const_int 0)])))]
"
{
+ rtx mem = operands[1];
+
+ /* Don't apply the swap optimization if we've already performed register
+ allocation and the hard register destination is not in the altivec
+ range. */
+ if ((MEM_ALIGN (mem) >= 128)
+ && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
+ || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
+ {
+ rtx mem_address = XEXP (mem, 0);
+ enum machine_mode mode = GET_MODE (mem);
+
+ if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
+ {
+ /* Replace the source memory address with masked address. */
+ rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
+ emit_insn (lvx_set_expr);
+ DONE;
+ }
+ else if (rs6000_quadword_masked_address_p (mem_address))
+ {
+ /* This rtl is already in the form that matches lvx
+ instruction, so leave it alone. */
+ DONE;
+ }
+ /* Otherwise, fall through to transform into a swapping load. */
+ }
operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
: operands[0];
}
(match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
+ "&& 1"
[(set (match_dup 2)
(vec_select:<MODE>
(match_dup 1)
(const_int 0) (const_int 1)])))]
"
{
+ rtx mem = operands[1];
+
+ /* Don't apply the swap optimization if we've already performed register
+ allocation and the hard register destination is not in the altivec
+ range. */
+ if ((MEM_ALIGN (mem) >= 128)
+ && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
+ || ALTIVEC_REGNO_P (REGNO(operands[0]))))
+ {
+ rtx mem_address = XEXP (mem, 0);
+ enum machine_mode mode = GET_MODE (mem);
+
+ if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
+ {
+ /* Replace the source memory address with masked address. */
+ rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
+ emit_insn (lvx_set_expr);
+ DONE;
+ }
+ else if (rs6000_quadword_masked_address_p (mem_address))
+ {
+ /* This rtl is already in the form that matches lvx
+ instruction, so leave it alone. */
+ DONE;
+ }
+ /* Otherwise, fall through to transform into a swapping load. */
+ }
operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
: operands[0];
}
(match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
+ "&& 1"
[(set (match_dup 2)
(vec_select:V8HI
(match_dup 1)
(const_int 2) (const_int 3)])))]
"
{
+ rtx mem = operands[1];
+
+ /* Don't apply the swap optimization if we've already performed register
+ allocation and the hard register destination is not in the altivec
+ range. */
+ if ((MEM_ALIGN (mem) >= 128)
+ && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
+ || ALTIVEC_REGNO_P (REGNO(operands[0]))))
+ {
+ rtx mem_address = XEXP (mem, 0);
+ enum machine_mode mode = GET_MODE (mem);
+
+ if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
+ {
+ /* Replace the source memory address with masked address. */
+ rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
+ emit_insn (lvx_set_expr);
+ DONE;
+ }
+ else if (rs6000_quadword_masked_address_p (mem_address))
+ {
+ /* This rtl is already in the form that matches lvx
+ instruction, so leave it alone. */
+ DONE;
+ }
+ /* Otherwise, fall through to transform into a swapping load. */
+ }
operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
: operands[0];
}
(match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
- "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
+ "&& 1"
[(set (match_dup 2)
(vec_select:V16QI
(match_dup 1)
(const_int 6) (const_int 7)])))]
"
{
+ rtx mem = operands[1];
+
+ /* Don't apply the swap optimization if we've already performed register
+ allocation and the hard register destination is not in the altivec
+ range. */
+ if ((MEM_ALIGN (mem) >= 128)
+ && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
+ || ALTIVEC_REGNO_P (REGNO(operands[0]))))
+ {
+ rtx mem_address = XEXP (mem, 0);
+ enum machine_mode mode = GET_MODE (mem);
+
+ if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
+ {
+ /* Replace the source memory address with masked address. */
+ rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
+ emit_insn (lvx_set_expr);
+ DONE;
+ }
+ else if (rs6000_quadword_masked_address_p (mem_address))
+ {
+ /* This rtl is already in the form that matches lvx
+ instruction, so leave it alone. */
+ DONE;
+ }
+ /* Otherwise, fall through to transform into a swapping load. */
+ }
operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
: operands[0];
}
(match_dup 2)
(parallel [(const_int 1) (const_int 0)])))]
{
+ rtx mem = operands[0];
+
+ /* Don't apply the swap optimization if we've already performed register
+ allocation and the hard register source is not in the altivec range. */
+ if ((MEM_ALIGN (mem) >= 128)
+ && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
+ || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
+ {
+ rtx mem_address = XEXP (mem, 0);
+ enum machine_mode mode = GET_MODE (mem);
+ if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
+ {
+ rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
+ emit_insn (stvx_set_expr);
+ DONE;
+ }
+ else if (rs6000_quadword_masked_address_p (mem_address))
+ {
+ /* This rtl is already in the form that matches stvx instruction,
+ so leave it alone. */
+ DONE;
+ }
+ /* Otherwise, fall through to transform into a swapping store. */
+ }
+
operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
: operands[1];
})
(parallel [(const_int 2) (const_int 3)
(const_int 0) (const_int 1)])))]
{
+ rtx mem = operands[0];
+
+ /* Don't apply the swap optimization if we've already performed register
+ allocation and the hard register source is not in the altivec range. */
+ if ((MEM_ALIGN (mem) >= 128)
+ && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
+ || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
+ {
+ rtx mem_address = XEXP (mem, 0);
+ enum machine_mode mode = GET_MODE (mem);
+ if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
+ {
+ rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
+ emit_insn (stvx_set_expr);
+ DONE;
+ }
+ else if (rs6000_quadword_masked_address_p (mem_address))
+ {
+ /* This rtl is already in the form that matches stvx instruction,
+ so leave it alone. */
+ DONE;
+ }
+ /* Otherwise, fall through to transform into a swapping store. */
+ }
+
operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
: operands[1];
})
(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
{
+ rtx mem = operands[0];
+
+ /* Don't apply the swap optimization if we've already performed register
+ allocation and the hard register source is not in the altivec range. */
+ if ((MEM_ALIGN (mem) >= 128)
+ && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
+ || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
+ {
+ rtx mem_address = XEXP (mem, 0);
+ enum machine_mode mode = GET_MODE (mem);
+ if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
+ {
+ rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
+ emit_insn (stvx_set_expr);
+ DONE;
+ }
+ else if (rs6000_quadword_masked_address_p (mem_address))
+ {
+ /* This rtl is already in the form that matches stvx instruction,
+ so leave it alone. */
+ DONE;
+ }
+ /* Otherwise, fall through to transform into a swapping store. */
+ }
+
operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
: operands[1];
})
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
{
+ rtx mem = operands[0];
+
+ /* Don't apply the swap optimization if we've already performed register
+ allocation and the hard register source is not in the altivec range. */
+ if ((MEM_ALIGN (mem) >= 128)
+ && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
+ || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
+ {
+ rtx mem_address = XEXP (mem, 0);
+ enum machine_mode mode = GET_MODE (mem);
+ if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
+ {
+ rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
+ emit_insn (stvx_set_expr);
+ DONE;
+ }
+ else if (rs6000_quadword_masked_address_p (mem_address))
+ {
+ /* This rtl is already in the form that matches stvx instruction,
+ so leave it alone. */
+ DONE;
+ }
+ /* Otherwise, fall through to transform into a swapping store. */
+ }
+
operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
: operands[1];
})
+2018-01-10 Kelvin Nilsen <kelvin@gcc.gnu.org>
+
+ * gcc.target/powerpc/pr48857.c: Modify dejagnu directives to look
+ for lvx and stvx instead of lxvd2x and stxvd2x and require
+ little-endian target. Add comments.
+ * gcc.target/powerpc/swaps-p8-28.c: Add functions for more
+ comprehensive testing.
+ * gcc.target/powerpc/swaps-p8-29.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-30.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-31.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-32.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-33.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-34.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-35.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-36.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-37.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-38.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-39.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-40.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-41.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-42.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-43.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-44.c: Likewise.
+ * gcc.target/powerpc/swaps-p8-45.c: Likewise.
+ * gcc.target/powerpc/vec-extract-2.c: Add comment and remove
+ scan-assembler-not directives that forbid lvx and xxpermdi.
+ * gcc.target/powerpc/vec-extract-3.c: Likewise.
+ * gcc.target/powerpc/vec-extract-5.c: Likewise.
+ * gcc.target/powerpc/vec-extract-6.c: Likewise.
+ * gcc.target/powerpc/vec-extract-7.c: Likewise.
+ * gcc.target/powerpc/vec-extract-8.c: Likewise.
+ * gcc.target/powerpc/vec-extract-9.c: Likewise.
+ * gcc.target/powerpc/vsx-vector-6-le.c: Change
+ scan-assembler-times directives to reflect different numbers of
+ expected xxlnor, xxlor, xvcmpgtdp, and xxland instructions.
+
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
-/* { dg-do compile { target { powerpc*-*-* } } } */
+/* Expected instruction selection as characterized by
+ scan-assembler-times directives below is only relevant to
+ little-endian targets. */
+/* { dg-do compile { target { powerpc64le-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } } */
/* { dg-require-effective-target powerpc_vsx_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
/* { dg-options "-O2 -mcpu=power7 -mabi=altivec" } */
-/* { dg-final { scan-assembler-times "lxvd2x" 1 } } */
-/* { dg-final { scan-assembler-times "stxvd2x" 1 } } */
+/* { dg-final { scan-assembler-times "lvx" 1 } } */
+/* { dg-final { scan-assembler-times "stvx" 1 } } */
/* { dg-final { scan-assembler-not "ld" } } */
/* { dg-final { scan-assembler-not "lwz" } } */
/* { dg-final { scan-assembler-not "stw" } } */
v2di_type
return_v2di (v2di_type *ptr)
{
- return *ptr; /* should generate lxvd2x 34,0,3. */
+ /* As of pr48857, should generate lxvd2x 34,0,3
+ followed by xxpermdi 34,34,34,2. Subsequent optimization
+ recognizes that ptr refers to an aligned vector and replaces
+ this with lvx 2,0,3. */
+ return *ptr;
}
void
pass_v2di (v2di_type arg, v2di_type *ptr)
{
- *ptr = arg; /* should generate stxvd2x 34,0,{3,5}. */
+ /* As of pr48857, should generate xxpermdi 34,34,34,2 followed by
+ stxvd2x 34,0,5. Subsequent optimization recognizes that ptr
+ refers to an aligned vector and replaces this with stvx 2,0,5. */
+ *ptr = arg;
}
8, 9, 10, 11,
12, 13, 14, 15 };
+vector char x, z;
+
vector char
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector char
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector char *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector char
+foo2 (void)
+{
+ vector char v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector char
+foo3 (vector char *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ char a_field;
+ vector char a_vector;
+};
+
+vector char
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector char arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector char *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector char arg)
+{
+ vector char v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector char *arg1, vector char arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector char v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
vector char fetched_value = foo ();
if (fetched_value[0] != 0 || fetched_value[15] != 15)
abort ();
- else
- return 0;
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 1 || fetched_value[14] != 14)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 2 || fetched_value[13] != 13)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 3 || fetched_value[12] != 12)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[4] != 4 || fetched_value[11] != 11)
+ abort ();
+
+ for (int i = 0; i < 16; i++)
+ z[i] = 15 - i;
+
+ baz (z);
+ if (x[0] != 15 || x[15] != 0)
+ abort ();
+
+ vector char source = { 8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 9, 10, 11, 12, 13, 14, 15 };
+
+ baz1 (source);
+ if (x[3] != 5 || x[8] != 0)
+ abort ();
+
+ vector char dest;
+ baz2 (&dest, source);
+ if (dest[4] != 4 || dest[1] != 7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[7] != 1 || a_struct.a_vector[15] != 15)
+ abort ();
+
+ return 0;
}
8, 9, 10, 11,
12, 13, 14, 15 };
+vector char x, z;
+
vector char
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector char
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector char *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector char
+foo2 (void)
+{
+ vector char v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector char
+foo3 (vector char *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ char a_field;
+ vector char a_vector;
+};
+
+vector char
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector char arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector char *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector char arg)
+{
+ vector char v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector char *arg1, vector char arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector char v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
vector char fetched_value = foo ();
if (fetched_value[0] != 0 || fetched_value[15] != 15)
abort ();
- else
- return 0;
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 1 || fetched_value[14] != 14)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 2 || fetched_value[13] != 13)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 3 || fetched_value[12] != 12)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[4] != 4 || fetched_value[11] != 11)
+ abort ();
+
+ for (int i = 0; i < 16; i++)
+ z[i] = 15 - i;
+
+ baz (z);
+ if (x[0] != 15 || x[15] != 0)
+ abort ();
+
+ vector char source = { 8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 9, 10, 11, 12, 13, 14, 15 };
+
+ baz1 (source);
+ if (x[3] != 5 || x[8] != 0)
+ abort ();
+
+ vector char dest;
+ baz2 (&dest, source);
+ if (dest[4] != 4 || dest[1] != 7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[7] != 1 || a_struct.a_vector[15] != 15)
+ abort ();
+
+ return 0;
}
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O3 " } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
-/* { dg-final { scan-assembler-not "xxswapd" } } */
+
+/* Previous versions of this test required that the assembler does not
+ contain xxpermdi or xxswapd. However, with the more sophisticated
+ code generation used today, it is now possible that xxpermdi (aka
+ xxswapd) show up without being part of a lxvd2x or stxvd2x
+ sequence. */
#include <altivec.h>
8, 9, 10, 11,
12, 13, 14, 15 };
+vector char x, z;
+
vector char
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector char
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector char *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector char
+foo2 (void)
+{
+ vector char v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector char
+foo3 (vector char *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ char a_field;
+ vector char a_vector;
+};
+
+vector char
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector char arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector char *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector char arg)
+{
+ vector char v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector char *arg1, vector char arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector char v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
vector char fetched_value = foo ();
if (fetched_value[0] != 0 || fetched_value[15] != 15)
abort ();
- else
- return 0;
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 1 || fetched_value[14] != 14)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 2 || fetched_value[13] != 13)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 3 || fetched_value[12] != 12)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[4] != 4 || fetched_value[11] != 11)
+ abort ();
+
+ for (int i = 0; i < 16; i++)
+ z[i] = 15 - i;
+
+ baz (z);
+ if (x[0] != 15 || x[15] != 0)
+ abort ();
+
+ vector char source = { 8, 7, 6, 5, 4, 3, 2, 1,
+ 0, 9, 10, 11, 12, 13, 14, 15 };
+
+ baz1 (source);
+ if (x[3] != 5 || x[8] != 0)
+ abort ();
+
+ vector char dest;
+ baz2 (&dest, source);
+ if (dest[4] != 4 || dest[1] != 7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[7] != 1 || a_struct.a_vector[15] != 15)
+ abort ();
+
+ return 0;
}
extern void abort (void);
-vector short y = { 0, 1, 2, 3,
- 4, 5, 6, 7 };
+vector short x;
+vector short y = { 0, 1, 2, 3, 4, 5, 6, 7 };
+vector short z;
vector short
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector short
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector short *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector short
+foo2 (void)
+{
+ vector short v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector short
+foo3 (vector short *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector short a_vector;
+};
+
+vector short
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector short arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector short *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector short arg)
+{
+ vector short v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector short *arg1, vector short arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector short v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (int argc, short *argv[])
{
vector short fetched_value = foo ();
if (fetched_value[0] != 0 || fetched_value[7] != 7)
abort ();
- else
- return 0;
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 1 || fetched_value[6] != 6)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 2 || fetched_value[5] != 5)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 3 || fetched_value[4] != 4)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[4] != 4 || fetched_value[3] != 3)
+ abort ();
+
+ for (int i = 0; i < 8; i++)
+ z[i] = 7 - i;
+
+ baz (z);
+ if (x[0] != 7 || x[7] != 0)
+ abort ();
+
+ vector short source = { 8, 7, 6, 5, 4, 3, 2, 1 };
+
+ baz1 (source);
+ if (x[3] != 5 || x[7] != 1)
+ abort ();
+
+ vector short dest;
+ baz2 (&dest, source);
+ if (dest[4] != 4 || dest[1] != 7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[7] != 1 || a_struct.a_vector[5] != 3)
+ abort ();
+
+ return 0;
}
extern void abort (void);
-const vector short y = { 0, 1, 2, 3,
- 4, 5, 6, 7 };
+vector short x;
+const vector short y = { 0, 1, 2, 3, 4, 5, 6, 7 };
+vector short z;
vector short
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector short
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector short *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector short
+foo2 (void)
+{
+ vector short v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector short
+foo3 (vector short *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector short a_vector;
+};
+
+vector short
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector short arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector short *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector short arg)
+{
+ vector short v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector short *arg1, vector short arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector short v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (int argc, short *argv[])
{
vector short fetched_value = foo ();
if (fetched_value[0] != 0 || fetched_value[7] != 7)
abort ();
- else
- return 0;
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 1 || fetched_value[6] != 6)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 2 || fetched_value[5] != 5)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 3 || fetched_value[4] != 4)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[4] != 4 || fetched_value[3] != 3)
+ abort ();
+
+ for (int i = 0; i < 8; i++)
+ z[i] = 7 - i;
+
+ baz (z);
+ if (x[0] != 7 || x[7] != 0)
+ abort ();
+
+ vector short source = { 8, 7, 6, 5, 4, 3, 2, 1 };
+
+ baz1 (source);
+ if (x[3] != 5 || x[7] != 1)
+ abort ();
+
+ vector short dest;
+ baz2 (&dest, source);
+ if (dest[4] != 4 || dest[1] != 7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[7] != 1 || a_struct.a_vector[5] != 3)
+ abort ();
+
+ return 0;
}
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O3 " } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
-/* { dg-final { scan-assembler-not "xxswapd" } } */
+
+/* Previous versions of this test required that the assembler does not
+ contain xxpermdi or xxswapd. However, with the more sophisticated
+ code generation used today, it is now possible that xxpermdi (aka
+ xxswapd) show up without being part of a lxvd2x or stxvd2x
+ sequence. */
#include <altivec.h>
extern void abort (void);
-const vector short y = { 0, 1, 2, 3,
- 4, 5, 6, 7 };
+vector short x;
+const vector short y = { 0, 1, 2, 3, 4, 5, 6, 7 };
+vector short z;
vector short
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector short
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector short *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector short
+foo2 (void)
+{
+ vector short v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector short
+foo3 (vector short *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector short a_vector;
+};
+
+vector short
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector short arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector short *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector short arg)
+{
+ vector short v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector short *arg1, vector short arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector short v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (int argc, short *argv[])
{
vector short fetched_value = foo ();
- if (fetched_value[0] != 0 || fetched_value[15] != 15)
+ if (fetched_value[0] != 0 || fetched_value[7] != 7)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 1 || fetched_value[6] != 6)
abort ();
- else
- return 0;
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 2 || fetched_value[5] != 5)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 3 || fetched_value[4] != 4)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[4] != 4 || fetched_value[3] != 3)
+ abort ();
+
+ for (int i = 0; i < 8; i++)
+ z[i] = 7 - i;
+
+ baz (z);
+ if (x[0] != 7 || x[7] != 0)
+ abort ();
+
+ vector short source = { 8, 7, 6, 5, 4, 3, 2, 1 };
+
+ baz1 (source);
+ if (x[3] != 5 || x[7] != 1)
+ abort ();
+
+ vector short dest;
+ baz2 (&dest, source);
+ if (dest[4] != 4 || dest[1] != 7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[7] != 1 || a_struct.a_vector[5] != 3)
+ abort ();
+
+ return 0;
}
extern void abort (void);
+vector int x;
vector int y = { 0, 1, 2, 3 };
+vector int z;
vector int
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector int
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector int *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector int
+foo2 (void)
+{
+ vector int v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector int
+foo3 (vector int *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector int a_vector;
+};
+
+vector int
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector int arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector int *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector int arg)
+{
+ vector int v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector int *arg1, vector int arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector int v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (int argc, int *argv[])
{
vector int fetched_value = foo ();
if (fetched_value[0] != 0 || fetched_value[3] != 3)
abort ();
- else
- return 0;
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 1 || fetched_value[2] != 2)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 2 || fetched_value[1] != 1)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 3 || fetched_value[0] != 0)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[2] != 2 || fetched_value[3] != 3)
+ abort ();
+
+ z[0] = 7;
+ z[1] = 6;
+ z[2] = 5;
+ z[3] = 4;
+
+ baz (z);
+ if (x[0] != 7 || x[3] != 4)
+ abort ();
+
+ vector int source = { 8, 7, 6, 5 };
+
+ baz1 (source);
+ if (x[2] != 6 || x[1] != 7)
+ abort ();
+
+ vector int dest;
+ baz2 (&dest, source);
+ if (dest[0] != 8 || dest[1] != 7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[3] != 5 || a_struct.a_vector[0] != 8)
+ abort ();
+
+ return 0;
}
extern void abort (void);
+vector int x;
const vector int y = { 0, 1, 2, 3 };
+vector int z;
vector int
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector int
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector int *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector int
+foo2 (void)
+{
+ vector int v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector int
+foo3 (vector int *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector int a_vector;
+};
+
+vector int
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector int arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector int *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector int arg)
+{
+ vector int v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector int *arg1, vector int arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector int v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (int argc, int *argv[])
{
vector int fetched_value = foo ();
if (fetched_value[0] != 0 || fetched_value[3] != 3)
abort ();
- else
- return 0;
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 1 || fetched_value[2] != 2)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 2 || fetched_value[1] != 1)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 3 || fetched_value[0] != 0)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[2] != 2 || fetched_value[3] != 3)
+ abort ();
+
+ z[0] = 7;
+ z[1] = 6;
+ z[2] = 5;
+ z[3] = 4;
+
+ baz (z);
+ if (x[0] != 7 || x[3] != 4)
+ abort ();
+
+ vector int source = { 8, 7, 6, 5 };
+
+ baz1 (source);
+ if (x[2] != 6 || x[1] != 7)
+ abort ();
+
+ vector int dest;
+ baz2 (&dest, source);
+ if (dest[0] != 8 || dest[1] != 7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[3] != 5 || a_struct.a_vector[0] != 8)
+ abort ();
+
+ return 0;
}
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O3 " } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
-/* { dg-final { scan-assembler-not "xxswapd" } } */
+
+/* Previous versions of this test required that the assembler does not
+ contain xxpermdi or xxswapd. However, with the more sophisticated
+ code generation used today, it is now possible that xxpermdi (aka
+ xxswapd) show up without being part of a lxvd2x or stxvd2x
+ sequence. */
#include <altivec.h>
extern void abort (void);
+vector int x;
const vector int y = { 0, 1, 2, 3 };
+vector int z;
vector int
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector int
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector int *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector int
+foo2 (void)
+{
+ vector int v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector int
+foo3 (vector int *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector int a_vector;
+};
+
+vector int
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector int arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector int *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector int arg)
+{
+ vector int v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector int *arg1, vector int arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector int v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (int argc, int *argv[])
{
vector int fetched_value = foo ();
if (fetched_value[0] != 0 || fetched_value[3] != 3)
abort ();
- else
- return 0;
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 1 || fetched_value[2] != 2)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 2 || fetched_value[1] != 1)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 3 || fetched_value[0] != 0)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[2] != 2 || fetched_value[3] != 3)
+ abort ();
+
+ z[0] = 7;
+ z[1] = 6;
+ z[2] = 5;
+ z[3] = 4;
+
+ baz (z);
+ if (x[0] != 7 || x[3] != 4)
+ abort ();
+
+ vector int source = { 8, 7, 6, 5 };
+
+ baz1 (source);
+ if (x[3] != 6 || x[2] != 7)
+ abort ();
+
+ vector int dest;
+ baz2 (&dest, source);
+ if (dest[0] != 8 || dest[1] != 7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[3] != 5 || a_struct.a_vector[0] != 8)
+ abort ();
+
+ return 0;
}
extern void abort (void);
-vector float y = { 0.0f, 0.1f, 0.2f, 0.3f };
+vector float x;
+vector float y = { 0.0F, 0.1F, 0.2F, 0.3F };
+vector float z;
vector float
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector float
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector float *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector float
+foo2 (void)
+{
+ vector float v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector float
+foo3 (vector float *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector float a_vector;
+};
+
+vector float
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector float arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector float *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector float arg)
+{
+ vector float v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector float *arg1, vector float arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector float v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (float argc, float *argv[])
{
vector float fetched_value = foo ();
- if (fetched_value[0] != 0.0f || fetched_value[3] != 0.3f)
+ if (fetched_value[0] != 0.0F || fetched_value[3] != 0.3F)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 0.1F || fetched_value[2] != 0.2F)
abort ();
- else
- return 0;
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 0.2F || fetched_value[1] != 0.1F)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 0.3F || fetched_value[0] != 0.0F)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[2] != 0.2F || fetched_value[3] != 0.3F)
+ abort ();
+
+ z[0] = 0.7F;
+ z[1] = 0.6F;
+ z[2] = 0.5F;
+ z[3] = 0.4F;
+
+ baz (z);
+ if (x[0] != 0.7F || x[3] != 0.4F)
+ abort ();
+
+ vector float source = { 0.8F, 0.7F, 0.6F, 0.5F };
+
+ baz1 (source);
+ if (x[2] != 0.6F || x[1] != 0.7F)
+ abort ();
+
+ vector float dest;
+ baz2 (&dest, source);
+ if (dest[0] != 0.8F || dest[1] != 0.7F)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[3] != 0.5F || a_struct.a_vector[0] != 0.8F)
+ abort ();
+
+ return 0;
}
extern void abort (void);
-const vector float y = { 0.0f, 0.1f, 0.2f, 0.3f };
+vector float x;
+const vector float y = { 0.0F, 0.1F, 0.2F, 0.3F };
+vector float z;
vector float
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector float
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector float *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector float
+foo2 (void)
+{
+ vector float v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector float
+foo3 (vector float *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector float a_vector;
+};
+
+vector float
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector float arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector float *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector float arg)
+{
+ vector float v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector float *arg1, vector float arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector float v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (float argc, float *argv[])
{
vector float fetched_value = foo ();
- if (fetched_value[0] != 0.0f || fetched_value[3] != 0.3f)
+ if (fetched_value[0] != 0.0F || fetched_value[3] != 0.3F)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 0.1F || fetched_value[2] != 0.2F)
abort ();
- else
- return 0;
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 0.2F || fetched_value[1] != 0.1F)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 0.3F || fetched_value[0] != 0.0F)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[2] != 0.2F || fetched_value[3] != 0.3F)
+ abort ();
+
+ z[0] = 0.7F;
+ z[1] = 0.6F;
+ z[2] = 0.5F;
+ z[3] = 0.4F;
+
+ baz (z);
+ if (x[0] != 0.7F || x[3] != 0.4F)
+ abort ();
+
+ vector float source = { 0.8F, 0.7F, 0.6F, 0.5F };
+
+ baz1 (source);
+ if (x[2] != 0.6F || x[1] != 0.7F)
+ abort ();
+
+ vector float dest;
+ baz2 (&dest, source);
+ if (dest[0] != 0.8F || dest[1] != 0.7F)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[3] != 0.5F || a_struct.a_vector[0] != 0.8F)
+ abort ();
+
+ return 0;
}
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O3 " } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
-/* { dg-final { scan-assembler-not "xxswapd" } } */
+
+/* Previous versions of this test required that the assembler does not
+ contain xxpermdi or xxswapd. However, with the more sophisticated
+ code generation used today, it is now possible that xxpermdi (aka
+ xxswapd) show up without being part of a lxvd2x or stxvd2x
+ sequence. */
#include <altivec.h>
extern void abort (void);
-const vector float y = { 0.0f, 0.1f, 0.2f, 0.3f };
+vector float x;
+const vector float y = { 0.0F, 0.1F, 0.2F, 0.3F };
+vector float z;
vector float
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector float
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector float *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector float
+foo2 (void)
+{
+ vector float v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector float
+foo3 (vector float *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector float a_vector;
+};
+
+vector float
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector float arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector float *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector float arg)
+{
+ vector float v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector float *arg1, vector float arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector float v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (float argc, float *argv[])
{
vector float fetched_value = foo ();
- if (fetched_value[0] != 0.0f || fetched_value[3] != 0.3)
+ if (fetched_value[0] != 0.0F || fetched_value[3] != 0.3F)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 0.1F || fetched_value[2] != 0.2F)
abort ();
- else
- return 0;
+
+ fetched_value = foo2 ();
+ if (fetched_value[2] != 0.2F || fetched_value[1] != 0.1F)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[3] != 0.3F || fetched_value[0] != 0.0F)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[2] != 0.2F || fetched_value[3] != 0.3F)
+ abort ();
+
+ z[0] = 0.7F;
+ z[1] = 0.6F;
+ z[2] = 0.5F;
+ z[3] = 0.4F;
+
+ baz (z);
+ if (x[0] != 0.7F || x[3] != 0.4F)
+ abort ();
+
+ vector float source = { 0.8F, 0.7F, 0.6F, 0.5F };
+
+ baz1 (source);
+ if (x[3] != 0.6F || x[2] != 0.7F)
+ abort ();
+
+ vector float dest;
+ baz2 (&dest, source);
+ if (dest[0] != 0.8F || dest[1] != 0.7F)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[3] != 0.5F || a_struct.a_vector[0] != 0.8F)
+ abort ();
+
+ return 0;
}
extern void abort (void);
-vector long long int y = { 0, 1 };
+vector long long x;
+vector long long y = { 1024, 2048 };
+vector long long z;
-vector long long int
+vector long long
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector long long
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector long long *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector long long
+foo2 (void)
+{
+ vector long long v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector long long
+foo3 (vector long long *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector long long a_vector;
+};
+
+vector long long
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector long long arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector long long *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector long long arg)
+{
+ vector long long v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector long long *arg1, vector long long arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector long long v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, int *argv[])
+main (long long argc, long long *argv[])
{
- vector long long int fetched_value = foo ();
- if (fetched_value[0] != 0 || fetched_value[1] != 1)
+ vector long long fetched_value = foo ();
+ if (fetched_value[0] != 1024 || fetched_value[1] != 2048)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 2048 || fetched_value[0] != 1024)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[0] != 1024 || fetched_value[1] != 2048)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[1] != 2048 || fetched_value[0] != 1024)
abort ();
- else
- return 0;
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[1] != 2048 || fetched_value[0] != 1024)
+ abort ();
+
+ z[0] = 7096;
+ z[1] = 6048;
+
+ baz (z);
+ if (x[0] != 7096 || x[1] != 6048)
+ abort ();
+
+ vector long long source = { 8192, 7096};
+
+ baz1 (source);
+ if (x[0] != 8192 || x[1] != 7096)
+ abort ();
+
+ vector long long dest;
+ baz2 (&dest, source);
+ if (dest[0] != 8192 || dest[1] != 7096)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[1] != 7096 || a_struct.a_vector[0] != 8192)
+ abort ();
+
+ return 0;
}
extern void abort (void);
-const vector long long int y = { 0, 1 };
+vector long long x;
+const vector long long y = { 1024, 2048 };
+vector long long z;
-vector long long int
+vector long long
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector long long
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector long long *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector long long
+foo2 (void)
+{
+ vector long long v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector long long
+foo3 (vector long long *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector long long a_vector;
+};
+
+vector long long
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector long long arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector long long *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector long long arg)
+{
+ vector long long v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector long long *arg1, vector long long arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector long long v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (long long argc, long long *argv[])
{
- vector long long int fetched_value = foo ();
- if (fetched_value[0] != 0 || fetched_value[1] != 1)
+ vector long long fetched_value = foo ();
+ if (fetched_value[0] != 1024 || fetched_value[1] != 2048)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 2048 || fetched_value[0] != 1024)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[0] != 1024 || fetched_value[1] != 2048)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[1] != 2048 || fetched_value[0] != 1024)
abort ();
- else
- return 0;
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[1] != 2048 || fetched_value[0] != 1024)
+ abort ();
+
+ z[0] = 7096;
+ z[1] = 6048;
+
+ baz (z);
+ if (x[0] != 7096 || x[1] != 6048)
+ abort ();
+
+ vector long long source = { 8192, 7096};
+
+ baz1 (source);
+ if (x[0] != 8192 || x[1] != 7096)
+ abort ();
+
+ vector long long dest;
+ baz2 (&dest, source);
+ if (dest[0] != 8192 || dest[1] != 7096)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[1] != 7096 || a_struct.a_vector[0] != 8192)
+ abort ();
+
+ return 0;
}
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O3 " } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
-/* { dg-final { scan-assembler-not "xxswapd" } } */
+
+/* Previous versions of this test required that the assembler does not
+ contain xxpermdi or xxswapd. However, with the more sophisticated
+ code generation used today, it is now possible that xxpermdi (aka
+ xxswapd) show up without being part of a lxvd2x or stxvd2x
+ sequence. */
#include <altivec.h>
extern void abort (void);
-const vector long long int y = { 0, 1 };
+vector long long x;
+const vector long long y = { 1024, 2048 };
+vector long long z;
-vector long long int
+vector long long
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector long long
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector long long *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector long long
+foo2 (void)
+{
+ vector long long v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector long long
+foo3 (vector long long *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector long long a_vector;
+};
+
+vector long long
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector long long arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector long long *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector long long arg)
+{
+ vector long long v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector long long *arg1, vector long long arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector long long v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (long long argc, long long *argv[])
{
- vector long long int fetched_value = foo ();
- if (fetched_value[0] != 0 || fetched_value[1] != 1)
+ vector long long fetched_value = foo ();
+ if (fetched_value[0] != 1024 || fetched_value[1] != 2048)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 2048 || fetched_value[0] != 1024)
+ abort ();
+
+ fetched_value = foo2 ();
+ if (fetched_value[0] != 1024 || fetched_value[1] != 2048)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[1] != 2048 || fetched_value[0] != 1024)
abort ();
- else
- return 0;
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[1] != 2048 || fetched_value[0] != 1024)
+ abort ();
+
+ z[0] = 7096;
+ z[1] = 6048;
+
+ baz (z);
+ if (x[0] != 7096 || x[1] != 6048)
+ abort ();
+
+ vector long long source = { 8192, 7096};
+
+ baz1 (source);
+ if (x[0] != 8192 || x[1] != 7096)
+ abort ();
+
+ vector long long dest;
+ baz2 (&dest, source);
+ if (dest[0] != 8192 || dest[1] != 7096)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[1] != 7096 || a_struct.a_vector[0] != 8192)
+ abort ();
+
+ return 0;
}
extern void abort (void);
-vector double y = { 0.0, 0.1 };
+vector double x;
+vector double y = { 0.1, 0.2 };
+vector double z;
vector double
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector double
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector double *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector double
+foo2 (void)
+{
+ vector double v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector double
+foo3 (vector double *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector double a_vector;
+};
+
+vector double
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector double arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector double *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector double arg)
+{
+ vector double v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector double *arg1, vector double arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector double v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (double argc, double *argv[])
{
vector double fetched_value = foo ();
- if (fetched_value[0] != 0 || fetched_value[1] != 0.1)
+ if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1)
abort ();
- else
- return 0;
+
+ fetched_value = foo2 ();
+ if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1)
+ abort ();
+
+ z[0] = 0.7;
+ z[1] = 0.6;
+
+ baz (z);
+ if (x[0] != 0.7 || x[1] != 0.6)
+ abort ();
+
+ vector double source = { 0.8, 0.7 };
+
+ baz1 (source);
+ if (x[0] != 0.8 || x[1] != 0.7)
+ abort ();
+
+ vector double dest;
+ baz2 (&dest, source);
+ if (dest[0] != 0.8 || dest[1] != 0.7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[1] != 0.7 || a_struct.a_vector[0] != 0.8)
+ abort ();
+
+ return 0;
}
extern void abort (void);
-const vector double y = { 0.0, 0.1 };
+vector double x;
+const vector double y = { 0.1, 0.2 };
+vector double z;
vector double
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector double
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector double *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector double
+foo2 (void)
+{
+ vector double v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector double
+foo3 (vector double *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector double a_vector;
+};
+
+vector double
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector double arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector double *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector double arg)
+{
+ vector double v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector double *arg1, vector double arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector double v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (double argc, double *argv[])
{
vector double fetched_value = foo ();
- if (fetched_value[0] != 0.0 || fetched_value[1] != 0.1)
+ if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1)
abort ();
- else
- return 0;
+
+ fetched_value = foo2 ();
+ if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1)
+ abort ();
+
+ z[0] = 0.7;
+ z[1] = 0.6;
+
+ baz (z);
+ if (x[0] != 0.7 || x[1] != 0.6)
+ abort ();
+
+ vector double source = { 0.8, 0.7 };
+
+ baz1 (source);
+ if (x[0] != 0.8 || x[1] != 0.7)
+ abort ();
+
+ vector double dest;
+ baz2 (&dest, source);
+ if (dest[0] != 0.8 || dest[1] != 0.7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[1] != 0.7 || a_struct.a_vector[0] != 0.8)
+ abort ();
+
+ return 0;
}
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-mcpu=power8 -O3 " } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
-/* { dg-final { scan-assembler-not "xxswapd" } } */
+
+/* Previous versions of this test required that the assembler does not
+ contain xxpermdi or xxswapd. However, with the more sophisticated
+ code generation used today, it is now possible that xxpermdi (aka
+ xxswapd) show up without being part of a lxvd2x or stxvd2x
+ sequence. */
#include <altivec.h>
extern void abort (void);
-const vector double y = { 0.0, 0.1 };
+vector double x;
+const vector double y = { 0.1, 0.2 };
+vector double z;
vector double
foo (void)
{
- return y;
+ return y; /* Remove 1 swap and use lvx. */
+}
+
+vector double
+foo1 (void)
+{
+ x = y; /* Remove 2 redundant swaps here. */
+ return x; /* Remove 1 swap and use lvx. */
+}
+
+void __attribute__ ((noinline))
+fill_local (vector double *vp)
+{
+ *vp = x; /* Remove 2 redundant swaps here. */
+}
+
+/* Test aligned load from local. */
+vector double
+foo2 (void)
+{
+ vector double v;
+
+ /* Need to be clever here because v will normally reside in a
+ register rather than memory. */
+ fill_local (&v);
+ return v; /* Remove 1 swap and use lvx. */
+}
+
+
+/* Test aligned load from pointer. */
+vector double
+foo3 (vector double *arg)
+{
+ return *arg; /* Remove 1 swap and use lvx. */
+}
+
+/* In this structure, the compiler should insert padding to assure
+ that a_vector is properly aligned. */
+struct bar {
+ short a_field;
+ vector double a_vector;
+};
+
+vector double
+foo4 (struct bar *bp)
+{
+ return bp->a_vector; /* Remove 1 swap and use lvx. */
+}
+
+/* Test aligned store to global. */
+void
+baz (vector double arg)
+{
+ x = arg; /* Remove 1 swap and use stvx. */
+}
+
+void __attribute__ ((noinline))
+copy_local (vector double *arg)
+{
+ x = *arg; /* Remove 2 redundant swaps. */
+}
+
+
+/* Test aligned store to local. */
+void
+baz1 (vector double arg)
+{
+ vector double v;
+
+ /* Need cleverness, because v will normally reside in a register
+ rather than memory. */
+ v = arg; /* Aligned store to local: remove 1
+ swap and use stvx. */
+ copy_local (&v);
+}
+
+/* Test aligned store to pointer. */
+void
+baz2 (vector double *arg1, vector double arg2)
+{
+ /* Assume arg2 resides in register. */
+ *arg1 = arg2; /* Remove 1 swap and use stvx. */
+}
+
+void
+baz3 (struct bar *bp, vector double v)
+{
+ /* Assume v resides in register. */
+ bp->a_vector = v; /* Remove 1 swap and use stvx. */
}
int
-main (int argc, char *argv[])
+main (double argc, double *argv[])
{
vector double fetched_value = foo ();
- if (fetched_value[0] != 0.0 || fetched_value[15] != 0.1)
+ if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2)
+ abort ();
+
+ fetched_value = foo1 ();
+ if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1)
abort ();
- else
- return 0;
+
+ fetched_value = foo2 ();
+ if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2)
+ abort ();
+
+ fetched_value = foo3 (&x);
+ if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1)
+ abort ();
+
+ struct bar a_struct;
+ a_struct.a_vector = x; /* Remove 2 redundant swaps. */
+ fetched_value = foo4 (&a_struct);
+ if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1)
+ abort ();
+
+ z[0] = 0.7;
+ z[1] = 0.6;
+
+ baz (z);
+ if (x[0] != 0.7 || x[1] != 0.6)
+ abort ();
+
+ vector double source = { 0.8, 0.7 };
+
+ baz1 (source);
+ if (x[0] != 0.8 || x[1] != 0.7)
+ abort ();
+
+ vector double dest;
+ baz2 (&dest, source);
+ if (dest[0] != 0.8 || dest[1] != 0.7)
+ abort ();
+
+ baz3 (&a_struct, source);
+ if (a_struct.a_vector[1] != 0.7 || a_struct.a_vector[0] != 0.8)
+ abort ();
+
+ return 0;
}
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lxvx" } } */
/* { dg-final { scan-assembler-not "lxv" } } */
-/* { dg-final { scan-assembler-not "lvx" } } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* With recent enhancements to the code generator, it is considered
+ * legal to implement vec_extract with lvx and xxpermdi. Previous
+ * versions of this test forbid both instructions. */
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lxvx" } } */
/* { dg-final { scan-assembler-not "lxv" } } */
-/* { dg-final { scan-assembler-not "lvx" } } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* With recent enhancements to the code generator, it is considered
+ * legal to implement vec_extract with lvx and xxpermdi. Previous
+ * versions of this test forbid both instructions. */
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lxvx" } } */
/* { dg-final { scan-assembler-not "lxv" } } */
-/* { dg-final { scan-assembler-not "lvx" } } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* With recent enhancements to the code generator, it is considered
+ * legal to implement vec_extract with lvx and xxpermdi. Previous
+ * versions of this test forbid both instructions. */
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lxvx" } } */
/* { dg-final { scan-assembler-not "lxv" } } */
-/* { dg-final { scan-assembler-not "lvx" } } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* With recent enhancements to the code generator, it is considered
+ * legal to implement vec_extract with lvx and xxpermdi. Previous
+ * versions of this test forbid both instructions. */
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lxvx" } } */
/* { dg-final { scan-assembler-not "lxv" } } */
-/* { dg-final { scan-assembler-not "lvx" } } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* With recent enhancements to the code generator, it is considered
+ * legal to implement vec_extract with lvx and xxpermdi. Previous
+ * versions of this test forbid both instructions. */
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lxvx" } } */
/* { dg-final { scan-assembler-not "lxv" } } */
-/* { dg-final { scan-assembler-not "lvx" } } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* With recent enhancements to the code generator, it is considered
+ * legal to implement vec_extract with lvx and xxpermdi. Previous
+ * versions of this test forbid both instructions. */
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lxvx" } } */
/* { dg-final { scan-assembler-not "lxv" } } */
-/* { dg-final { scan-assembler-not "lvx" } } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* With recent enhancements to the code generator, it is considered
+ * legal to implement vec_extract with lvx and xxpermdi. Previous
+ * versions of this test forbid both instructions. */
/* { dg-final { scan-assembler-times "xvabsdp" 1 } } */
/* { dg-final { scan-assembler-times "xvadddp" 1 } } */
-/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
-/* { dg-final { scan-assembler-times "xxlor" 16 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 8 } } */
+/* { dg-final { scan-assembler-times "xxlor" 30 } } */
/* { dg-final { scan-assembler-times "xvcmpeqdp" 5 } } */
-/* { dg-final { scan-assembler-times "xvcmpgtdp" 7 } } */
+/* { dg-final { scan-assembler-times "xvcmpgtdp" 8 } } */
/* { dg-final { scan-assembler-times "xvcmpgedp" 6 } } */
/* { dg-final { scan-assembler-times "xvrdpim" 1 } } */
/* { dg-final { scan-assembler-times "xvmaddadp" 1 } } */
/* { dg-final { scan-assembler-times "xvmsubasp" 1 } } */
/* { dg-final { scan-assembler-times "xvnmaddasp" 1 } } */
/* { dg-final { scan-assembler-times "vmsumshs" 1 } } */
-/* { dg-final { scan-assembler-times "xxland" 9 } } */
+/* { dg-final { scan-assembler-times "xxland" 13 } } */
/* Source code for the test in vsx-vector-6.h */
#include "vsx-vector-6.h"
+2018-01-10 Kelvin Nilsen <kelvin@gcc.gnu.org>
+
+ * lex.c (search_line_fast): Remove illegal coercion of an
+ unaligned pointer value to vector pointer type and replace with
+ use of __builtin_vec_vsx_ld () built-in function, which operates
+ on unaligned pointer values.
+
2018-01-03 Jakub Jelinek <jakub@redhat.com>
Update copyright years.
{
vc m_nl, m_cr, m_bs, m_qm;
- data = *((const vc *)s);
+ data = __builtin_vec_vsx_ld (0, s);
s += 16;
m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);