From: Kelvin Nilsen Date: Sun, 14 Jan 2018 05:19:29 +0000 (+0000) Subject: rs6000-p8swap.c (rs6000_sum_of_two_registers_p): New function. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a3a821c903c9fa2288712d31da2038d0297babcb;p=gcc.git rs6000-p8swap.c (rs6000_sum_of_two_registers_p): New function. gcc/ChangeLog: 2018-01-10 Kelvin Nilsen * config/rs6000/rs6000-p8swap.c (rs6000_sum_of_two_registers_p): New function. (rs6000_quadword_masked_address_p): Likewise. (quad_aligned_load_p): Likewise. (quad_aligned_store_p): Likewise. (const_load_sequence_p): Add comment to describe the outer-most loop. (mimic_memory_attributes_and_flags): New function. (rs6000_gen_stvx): Likewise. (replace_swapped_aligned_store): Likewise. (rs6000_gen_lvx): Likewise. (replace_swapped_aligned_load): Likewise. (replace_swapped_load_constant): Capitalize argument name in comment describing this function. (rs6000_analyze_swaps): Add a third pass to search for vector loads and stores that access quad-word aligned addresses and replace with stvx or lvx instructions when appropriate. * config/rs6000/rs6000-protos.h (rs6000_sum_of_two_registers_p): New function prototype. (rs6000_quadword_masked_address_p): Likewise. (rs6000_gen_lvx): Likewise. (rs6000_gen_stvx): Likewise. * config/rs6000/vsx.md (*vsx_le_perm_load_): For modes VSX_D (V2DF, V2DI), modify this split to select lvx instruction when memory address is aligned. (*vsx_le_perm_load_): For modes VSX_W (V4SF, V4SI), modify this split to select lvx instruction when memory address is aligned. (*vsx_le_perm_load_v8hi): Modify this split to select lvx instruction when memory address is aligned. (*vsx_le_perm_load_v16qi): Likewise. (four unnamed splitters): Modify to select the stvx instruction when memory is aligned. gcc/testsuite/ChangeLog: 2018-01-10 Kelvin Nilsen * gcc.target/powerpc/pr48857.c: Modify dejagnu directives to look for lvx and stvx instead of lxvd2x and stxvd2x and require little-endian target. Add comments. * gcc.target/powerpc/swaps-p8-28.c: Add functions for more comprehensive testing. * gcc.target/powerpc/swaps-p8-29.c: Likewise. * gcc.target/powerpc/swaps-p8-30.c: Likewise. * gcc.target/powerpc/swaps-p8-31.c: Likewise. * gcc.target/powerpc/swaps-p8-32.c: Likewise. * gcc.target/powerpc/swaps-p8-33.c: Likewise. * gcc.target/powerpc/swaps-p8-34.c: Likewise. * gcc.target/powerpc/swaps-p8-35.c: Likewise. * gcc.target/powerpc/swaps-p8-36.c: Likewise. * gcc.target/powerpc/swaps-p8-37.c: Likewise. * gcc.target/powerpc/swaps-p8-38.c: Likewise. * gcc.target/powerpc/swaps-p8-39.c: Likewise. * gcc.target/powerpc/swaps-p8-40.c: Likewise. * gcc.target/powerpc/swaps-p8-41.c: Likewise. * gcc.target/powerpc/swaps-p8-42.c: Likewise. * gcc.target/powerpc/swaps-p8-43.c: Likewise. * gcc.target/powerpc/swaps-p8-44.c: Likewise. * gcc.target/powerpc/swaps-p8-45.c: Likewise. * gcc.target/powerpc/vec-extract-2.c: Add comment and remove scan-assembler-not directives that forbid lvx and xxpermdi. * gcc.target/powerpc/vec-extract-3.c: Likewise. * gcc.target/powerpc/vec-extract-5.c: Likewise. * gcc.target/powerpc/vec-extract-6.c: Likewise. * gcc.target/powerpc/vec-extract-7.c: Likewise. * gcc.target/powerpc/vec-extract-8.c: Likewise. * gcc.target/powerpc/vec-extract-9.c: Likewise. * gcc.target/powerpc/vsx-vector-6-le.c: Change scan-assembler-times directives to reflect different numbers of expected xxlnor, xxlor, xvcmpgtdp, and xxland instructions. libcpp/ChangeLog: 2018-01-10 Kelvin Nilsen * lex.c (search_line_fast): Remove illegal coercion of an unaligned pointer value to vector pointer type and replace with use of __builtin_vec_vsx_ld () built-in function, which operates on unaligned pointer values. From-SVN: r256656 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bcc77eb28d9..dd1910df7f8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,37 @@ +2018-01-10 Kelvin Nilsen + + * config/rs6000/rs6000-p8swap.c (rs6000_sum_of_two_registers_p): + New function. + (rs6000_quadword_masked_address_p): Likewise. + (quad_aligned_load_p): Likewise. + (quad_aligned_store_p): Likewise. + (const_load_sequence_p): Add comment to describe the outer-most loop. + (mimic_memory_attributes_and_flags): New function. + (rs6000_gen_stvx): Likewise. + (replace_swapped_aligned_store): Likewise. + (rs6000_gen_lvx): Likewise. + (replace_swapped_aligned_load): Likewise. + (replace_swapped_load_constant): Capitalize argument name in + comment describing this function. + (rs6000_analyze_swaps): Add a third pass to search for vector loads + and stores that access quad-word aligned addresses and replace + with stvx or lvx instructions when appropriate. + * config/rs6000/rs6000-protos.h (rs6000_sum_of_two_registers_p): + New function prototype. + (rs6000_quadword_masked_address_p): Likewise. + (rs6000_gen_lvx): Likewise. + (rs6000_gen_stvx): Likewise. + * config/rs6000/vsx.md (*vsx_le_perm_load_): For modes + VSX_D (V2DF, V2DI), modify this split to select lvx instruction + when memory address is aligned. + (*vsx_le_perm_load_): For modes VSX_W (V4SF, V4SI), modify + this split to select lvx instruction when memory address is aligned. + (*vsx_le_perm_load_v8hi): Modify this split to select lvx + instruction when memory address is aligned. + (*vsx_le_perm_load_v16qi): Likewise. + (four unnamed splitters): Modify to select the stvx instruction + when memory is aligned. + 2018-01-13 Jan Hubicka * predict.c (determine_unlikely_bbs): Handle correctly BBs diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index cb88ffbb535..876f339a79e 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -328,6 +328,142 @@ insn_is_swap_p (rtx insn) return 1; } +/* Return true iff EXPR represents the sum of two registers. */ +bool +rs6000_sum_of_two_registers_p (const_rtx expr) +{ + if (GET_CODE (expr) == PLUS) + { + const_rtx operand1 = XEXP (expr, 0); + const_rtx operand2 = XEXP (expr, 1); + return (REG_P (operand1) && REG_P (operand2)); + } + return false; +} + +/* Return true iff EXPR represents an address expression that masks off + the low-order 4 bits in the style of an lvx or stvx rtl pattern. */ +bool +rs6000_quadword_masked_address_p (const_rtx expr) +{ + if (GET_CODE (expr) == AND) + { + const_rtx operand1 = XEXP (expr, 0); + const_rtx operand2 = XEXP (expr, 1); + if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1)) + && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16) + return true; + } + return false; +} + +/* Return TRUE if INSN represents a swap of a swapped load from memory + and the memory address is quad-word aligned. */ +static bool +quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn) +{ + unsigned uid = INSN_UID (insn); + if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) + return false; + + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + + /* Since insn is known to represent a swap instruction, we know it + "uses" only one input variable. */ + df_ref use = DF_INSN_INFO_USES (insn_info); + + /* Figure out where this input variable is defined. */ + struct df_link *def_link = DF_REF_CHAIN (use); + + /* If there is no definition or the definition is artificial or there are + multiple definitions, punt. */ + if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) + || def_link->next) + return false; + + rtx def_insn = DF_REF_INSN (def_link->ref); + unsigned uid2 = INSN_UID (def_insn); + /* We're looking for a load-with-swap insn. If this is not that, + return false. */ + if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) + return false; + + /* If the source of the rtl def is not a set from memory, return + false. */ + rtx body = PATTERN (def_insn); + if (GET_CODE (body) != SET + || GET_CODE (SET_SRC (body)) != VEC_SELECT + || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM) + return false; + + rtx mem = XEXP (SET_SRC (body), 0); + rtx base_reg = XEXP (mem, 0); + return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg)) + && MEM_ALIGN (mem) >= 128) ? true : false; +} + +/* Return TRUE if INSN represents a store-with-swap of a swapped value + and the memory address is quad-word aligned. */ +static bool +quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn) +{ + unsigned uid = INSN_UID (insn); + if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store) + return false; + + rtx body = PATTERN (insn); + rtx dest_address = XEXP (SET_DEST (body), 0); + rtx swap_reg = XEXP (SET_SRC (body), 0); + + /* If the base address for the memory expression is not represented + by a single register and is not the sum of two registers, punt. */ + if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address)) + return false; + + /* Confirm that the value to be stored is produced by a swap + instruction. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + + /* If this is not the definition of the candidate swap register, + then skip it. I am interested in a different definition. */ + if (!rtx_equal_p (DF_REF_REG (use), swap_reg)) + continue; + + /* If there is no def or the def is artifical or there are + multiple defs, punt. */ + if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) + || def_link->next) + return false; + + rtx def_insn = DF_REF_INSN (def_link->ref); + unsigned uid2 = INSN_UID (def_insn); + + /* If this source value is not a simple swap, return false */ + if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load + || insn_entry[uid2].is_store) + return false; + + /* I've processed the use that I care about, so break out of + this loop. */ + break; + } + + /* At this point, we know the source data comes from a swap. The + remaining question is whether the memory address is aligned. */ + rtx set = single_set (insn); + if (set) + { + rtx dest = SET_DEST (set); + if (MEM_P (dest)) + return (MEM_ALIGN (dest) >= 128); + } + return false; +} + /* Return 1 iff UID, known to reference a swap, is both fed by a load and a feeder of a store. */ static unsigned int @@ -372,6 +508,9 @@ const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); df_ref use; + + /* Iterate over the definitions that are used by this insn. Since + this is known to be a swap insn, expect only one used definnition. */ FOR_EACH_INSN_INFO_USE (use, insn_info) { struct df_link *def_link = DF_REF_CHAIN (use); @@ -1353,7 +1492,326 @@ replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) insn->set_deleted (); } -/* Given that swap_insn represents a swap of a load of a constant +/* Make NEW_MEM_EXP's attributes and flags resemble those of + ORIGINAL_MEM_EXP. */ +static void +mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp) +{ + RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump); + RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call); + RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging); + RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil); + RTX_FLAG (new_mem_exp, frame_related) = + RTX_FLAG (original_mem_exp, frame_related); + + /* The following fields may not be used with MEM subexpressions */ + RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct); + RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val); + + struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp); + + alias_set_type set = original_attrs.alias; + set_mem_alias_set (new_mem_exp, set); + + addr_space_t addrspace = original_attrs.addrspace; + set_mem_addr_space (new_mem_exp, addrspace); + + unsigned int align = original_attrs.align; + set_mem_align (new_mem_exp, align); + + tree expr = original_attrs.expr; + set_mem_expr (new_mem_exp, expr); + + if (original_attrs.offset_known_p) + { + HOST_WIDE_INT offset = original_attrs.offset; + set_mem_offset (new_mem_exp, offset); + } + else + clear_mem_offset (new_mem_exp); + + if (original_attrs.size_known_p) + { + HOST_WIDE_INT size = original_attrs.size; + set_mem_size (new_mem_exp, size); + } + else + clear_mem_size (new_mem_exp); +} + +/* Generate an rtx expression to represent use of the stvx insn to store + the value represented by register SRC_EXP into the memory at address + DEST_EXP, with vector mode MODE. */ +rtx +rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) +{ + rtx memory_address = XEXP (dest_exp, 0); + rtx stvx; + + if (rs6000_sum_of_two_registers_p (memory_address)) + { + rtx op1, op2; + op1 = XEXP (memory_address, 0); + op2 = XEXP (memory_address, 1); + if (mode == V16QImode) + stvx = gen_altivec_stvx_v16qi_2op (src_exp, op1, op2); + else if (mode == V8HImode) + stvx = gen_altivec_stvx_v8hi_2op (src_exp, op1, op2); +#ifdef HAVE_V8HFmode + else if (mode == V8HFmode) + stvx = gen_altivec_stvx_v8hf_2op (src_exp, op1, op2); +#endif + else if (mode == V4SImode) + stvx = gen_altivec_stvx_v4si_2op (src_exp, op1, op2); + else if (mode == V4SFmode) + stvx = gen_altivec_stvx_v4sf_2op (src_exp, op1, op2); + else if (mode == V2DImode) + stvx = gen_altivec_stvx_v2di_2op (src_exp, op1, op2); + else if (mode == V2DFmode) + stvx = gen_altivec_stvx_v2df_2op (src_exp, op1, op2); + else if (mode == V1TImode) + stvx = gen_altivec_stvx_v1ti_2op (src_exp, op1, op2); + else + /* KFmode, TFmode, other modes not expected in this context. */ + gcc_unreachable (); + } + else /* REG_P (memory_address) */ + { + if (mode == V16QImode) + stvx = gen_altivec_stvx_v16qi_1op (src_exp, memory_address); + else if (mode == V8HImode) + stvx = gen_altivec_stvx_v8hi_1op (src_exp, memory_address); +#ifdef HAVE_V8HFmode + else if (mode == V8HFmode) + stvx = gen_altivec_stvx_v8hf_1op (src_exp, memory_address); +#endif + else if (mode == V4SImode) + stvx = gen_altivec_stvx_v4si_1op (src_exp, memory_address); + else if (mode == V4SFmode) + stvx = gen_altivec_stvx_v4sf_1op (src_exp, memory_address); + else if (mode == V2DImode) + stvx = gen_altivec_stvx_v2di_1op (src_exp, memory_address); + else if (mode == V2DFmode) + stvx = gen_altivec_stvx_v2df_1op (src_exp, memory_address); + else if (mode == V1TImode) + stvx = gen_altivec_stvx_v1ti_1op (src_exp, memory_address); + else + /* KFmode, TFmode, other modes not expected in this context. */ + gcc_unreachable (); + } + + rtx new_mem_exp = SET_DEST (stvx); + mimic_memory_attributes_and_flags (new_mem_exp, dest_exp); + return stvx; +} + +/* Given that STORE_INSN represents an aligned store-with-swap of a + swapped value, replace the store with an aligned store (without + swap) and replace the swap with a copy insn. */ +static void +replace_swapped_aligned_store (swap_web_entry *insn_entry, + rtx_insn *store_insn) +{ + unsigned uid = INSN_UID (store_insn); + gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store); + + rtx body = PATTERN (store_insn); + rtx dest_address = XEXP (SET_DEST (body), 0); + rtx swap_reg = XEXP (SET_SRC (body), 0); + gcc_assert (REG_P (dest_address) + || rs6000_sum_of_two_registers_p (dest_address)); + + /* Find the swap instruction that provides the value to be stored by + * this store-with-swap instruction. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn); + df_ref use; + rtx_insn *swap_insn = NULL; + unsigned uid2 = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + + /* if this is not the definition of the candidate swap register, + then skip it. I am only interested in the swap insnd. */ + if (!rtx_equal_p (DF_REF_REG (use), swap_reg)) + continue; + + /* If there is no def or the def is artifical or there are + multiple defs, we should not be here. */ + gcc_assert (def_link && def_link->ref && !def_link->next + && !DF_REF_IS_ARTIFICIAL (def_link->ref)); + + swap_insn = DF_REF_INSN (def_link->ref); + uid2 = INSN_UID (swap_insn); + + /* If this source value is not a simple swap, we should not be here. */ + gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load + && !insn_entry[uid2].is_store); + + /* We've processed the use we care about, so break out of + this loop. */ + break; + } + + /* At this point, swap_insn and uid2 represent the swap instruction + that feeds the store. */ + gcc_assert (swap_insn); + rtx set = single_set (store_insn); + gcc_assert (set); + rtx dest_exp = SET_DEST (set); + rtx src_exp = XEXP (SET_SRC (body), 0); + enum machine_mode mode = GET_MODE (dest_exp); + gcc_assert (MEM_P (dest_exp)); + gcc_assert (MEM_ALIGN (dest_exp) >= 128); + + /* Replace the copy with a new insn. */ + rtx stvx; + stvx = rs6000_gen_stvx (mode, dest_exp, src_exp); + + rtx_insn *new_insn = emit_insn_before (stvx, store_insn); + rtx new_body = PATTERN (new_insn); + + gcc_assert ((GET_CODE (new_body) == SET) + && (GET_CODE (SET_DEST (new_body)) == MEM)); + + set_block_for_insn (new_insn, BLOCK_FOR_INSN (store_insn)); + df_insn_rescan (new_insn); + + df_insn_delete (store_insn); + remove_insn (store_insn); + store_insn->set_deleted (); + + /* Replace the swap with a copy. */ + uid2 = INSN_UID (swap_insn); + mark_swaps_for_removal (insn_entry, uid2); + replace_swap_with_copy (insn_entry, uid2); +} + +/* Generate an rtx expression to represent use of the lvx insn to load + from memory SRC_EXP into register DEST_EXP with vector mode MODE. */ +rtx +rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) +{ + rtx memory_address = XEXP (src_exp, 0); + rtx lvx; + + if (rs6000_sum_of_two_registers_p (memory_address)) + { + rtx op1, op2; + op1 = XEXP (memory_address, 0); + op2 = XEXP (memory_address, 1); + + if (mode == V16QImode) + lvx = gen_altivec_lvx_v16qi_2op (dest_exp, op1, op2); + else if (mode == V8HImode) + lvx = gen_altivec_lvx_v8hi_2op (dest_exp, op1, op2); +#ifdef HAVE_V8HFmode + else if (mode == V8HFmode) + lvx = gen_altivec_lvx_v8hf_2op (dest_exp, op1, op2); +#endif + else if (mode == V4SImode) + lvx = gen_altivec_lvx_v4si_2op (dest_exp, op1, op2); + else if (mode == V4SFmode) + lvx = gen_altivec_lvx_v4sf_2op (dest_exp, op1, op2); + else if (mode == V2DImode) + lvx = gen_altivec_lvx_v2di_2op (dest_exp, op1, op2); + else if (mode == V2DFmode) + lvx = gen_altivec_lvx_v2df_2op (dest_exp, op1, op2); + else if (mode == V1TImode) + lvx = gen_altivec_lvx_v1ti_2op (dest_exp, op1, op2); + else + /* KFmode, TFmode, other modes not expected in this context. */ + gcc_unreachable (); + } + else /* REG_P (memory_address) */ + { + if (mode == V16QImode) + lvx = gen_altivec_lvx_v16qi_1op (dest_exp, memory_address); + else if (mode == V8HImode) + lvx = gen_altivec_lvx_v8hi_1op (dest_exp, memory_address); +#ifdef HAVE_V8HFmode + else if (mode == V8HFmode) + lvx = gen_altivec_lvx_v8hf_1op (dest_exp, memory_address); +#endif + else if (mode == V4SImode) + lvx = gen_altivec_lvx_v4si_1op (dest_exp, memory_address); + else if (mode == V4SFmode) + lvx = gen_altivec_lvx_v4sf_1op (dest_exp, memory_address); + else if (mode == V2DImode) + lvx = gen_altivec_lvx_v2di_1op (dest_exp, memory_address); + else if (mode == V2DFmode) + lvx = gen_altivec_lvx_v2df_1op (dest_exp, memory_address); + else if (mode == V1TImode) + lvx = gen_altivec_lvx_v1ti_1op (dest_exp, memory_address); + else + /* KFmode, TFmode, other modes not expected in this context. */ + gcc_unreachable (); + } + + rtx new_mem_exp = SET_SRC (lvx); + mimic_memory_attributes_and_flags (new_mem_exp, src_exp); + + return lvx; +} + +/* Given that SWAP_INSN represents a swap of an aligned + load-with-swap, replace the load with an aligned load (without + swap) and replace the swap with a copy insn. */ +static void +replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn) +{ + /* Find the load. */ + unsigned uid = INSN_UID (swap_insn); + /* Only call this if quad_aligned_load_p (swap_insn). */ + gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load); + struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn); + + /* Since insn is known to represent a swap instruction, we know it + "uses" only one input variable. */ + df_ref use = DF_INSN_INFO_USES (insn_info); + + /* Figure out where this input variable is defined. */ + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + gcc_assert (def_link && def_link->ref && + !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next); + + rtx_insn *def_insn = DF_REF_INSN (def_link->ref); + unsigned uid2 = INSN_UID (def_insn); + + /* We're expecting a load-with-swap insn. */ + gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap); + + /* We expect this to be a set to memory, with source representing a + swap (indicated by code VEC_SELECT). */ + rtx body = PATTERN (def_insn); + gcc_assert ((GET_CODE (body) == SET) + && (GET_CODE (SET_SRC (body)) == VEC_SELECT) + && (GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)); + + rtx src_exp = XEXP (SET_SRC (body), 0); + enum machine_mode mode = GET_MODE (src_exp); + rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp); + + rtx_insn *new_insn = emit_insn_before (lvx, def_insn); + rtx new_body = PATTERN (new_insn); + + gcc_assert ((GET_CODE (new_body) == SET) + && (GET_CODE (SET_SRC (new_body)) == MEM)); + + set_block_for_insn (new_insn, BLOCK_FOR_INSN (def_insn)); + df_insn_rescan (new_insn); + + df_insn_delete (def_insn); + remove_insn (def_insn); + def_insn->set_deleted (); + + /* Replace the swap with a copy. */ + mark_swaps_for_removal (insn_entry, uid); + replace_swap_with_copy (insn_entry, uid); +} + +/* Given that SWAP_INSN represents a swap of a load of a constant vector value, replace with a single instruction that loads a swapped variant of the original constant. @@ -2144,8 +2602,17 @@ rs6000_analyze_swaps (function *fun) /* Clean up. */ free (insn_entry); - /* Use additional pass over rtl to replace swap(load(vector constant)) - with load(swapped vector constant). */ + /* Use a second pass over rtl to detect that certain vector values + fetched from or stored to memory on quad-word aligned addresses + can use lvx/stvx without swaps. */ + + /* First, rebuild ud chains. */ + df_remove_problem (df_chain); + df_process_deferred_rescans (); + df_set_flags (DF_RD_PRUNE_DEAD_DEFS); + df_chain_add_problem (DF_UD_CHAIN); + df_analyze (); + swap_web_entry *pass2_insn_entry; pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); @@ -2174,13 +2641,69 @@ rs6000_analyze_swaps (function *fun) if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load && !pass2_insn_entry[i].is_store) { - insn = pass2_insn_entry[i].insn; - if (const_load_sequence_p (pass2_insn_entry, insn)) - replace_swapped_load_constant (pass2_insn_entry, insn); + /* Replace swap of aligned load-swap with aligned unswapped + load. */ + rtx_insn *rtx_insn = pass2_insn_entry[i].insn; + if (quad_aligned_load_p (pass2_insn_entry, rtx_insn)) + replace_swapped_aligned_load (pass2_insn_entry, rtx_insn); + } + else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store) + { + /* Replace aligned store-swap of swapped value with aligned + unswapped store. */ + rtx_insn *rtx_insn = pass2_insn_entry[i].insn; + if (quad_aligned_store_p (pass2_insn_entry, rtx_insn)) + replace_swapped_aligned_store (pass2_insn_entry, rtx_insn); } /* Clean up. */ free (pass2_insn_entry); + + /* Use a third pass over rtl to replace swap(load(vector constant)) + with load(swapped vector constant). */ + + /* First, rebuild ud chains. */ + df_remove_problem (df_chain); + df_process_deferred_rescans (); + df_set_flags (DF_RD_PRUNE_DEAD_DEFS); + df_chain_add_problem (DF_UD_CHAIN); + df_analyze (); + + swap_web_entry *pass3_insn_entry; + pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); + + /* Walk the insns to gather basic data. */ + FOR_ALL_BB_FN (bb, fun) + FOR_BB_INSNS_SAFE (bb, insn, curr_insn) + { + unsigned int uid = INSN_UID (insn); + if (NONDEBUG_INSN_P (insn)) + { + pass3_insn_entry[uid].insn = insn; + + pass3_insn_entry[uid].is_relevant = 1; + pass3_insn_entry[uid].is_load = insn_is_load_p (insn); + pass3_insn_entry[uid].is_store = insn_is_store_p (insn); + + /* Determine if this is a doubleword swap. If not, + determine whether it can legally be swapped. */ + if (insn_is_swap_p (insn)) + pass3_insn_entry[uid].is_swap = 1; + } + } + + e = get_max_uid (); + for (unsigned i = 0; i < e; ++i) + if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load + && !pass3_insn_entry[i].is_store) + { + insn = pass3_insn_entry[i].insn; + if (const_load_sequence_p (pass3_insn_entry, insn)) + replace_swapped_load_constant (pass3_insn_entry, insn); + } + + /* Clean up. */ + free (pass3_insn_entry); return 0; } diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 1f96005c010..9c6c9a61c15 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -254,5 +254,9 @@ namespace gcc { class context; } class rtl_opt_pass; extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *); +extern bool rs6000_sum_of_two_registers_p (const_rtx expr); +extern bool rs6000_quadword_masked_address_p (const_rtx exp); +extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx); +extern rtx rs6000_gen_stvx (enum machine_mode, rtx, rtx); #endif /* rs6000-protos.h */ diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index e0d76b1e382..6762a00beaa 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -434,7 +434,7 @@ (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" - "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "&& 1" [(set (match_dup 2) (vec_select: (match_dup 1) @@ -445,6 +445,33 @@ (parallel [(const_int 1) (const_int 0)])))] " { + rtx mem = operands[1]; + + /* Don't apply the swap optimization if we've already performed register + allocation and the hard register destination is not in the altivec + range. */ + if ((MEM_ALIGN (mem) >= 128) + && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER) + || ALTIVEC_REGNO_P (reg_or_subregno (operands[0])))) + { + rtx mem_address = XEXP (mem, 0); + enum machine_mode mode = GET_MODE (mem); + + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + /* Replace the source memory address with masked address. */ + rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); + emit_insn (lvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) + { + /* This rtl is already in the form that matches lvx + instruction, so leave it alone. */ + DONE; + } + /* Otherwise, fall through to transform into a swapping load. */ + } operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) : operands[0]; } @@ -457,7 +484,7 @@ (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" - "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "&& 1" [(set (match_dup 2) (vec_select: (match_dup 1) @@ -470,6 +497,33 @@ (const_int 0) (const_int 1)])))] " { + rtx mem = operands[1]; + + /* Don't apply the swap optimization if we've already performed register + allocation and the hard register destination is not in the altivec + range. */ + if ((MEM_ALIGN (mem) >= 128) + && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER) + || ALTIVEC_REGNO_P (REGNO(operands[0])))) + { + rtx mem_address = XEXP (mem, 0); + enum machine_mode mode = GET_MODE (mem); + + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + /* Replace the source memory address with masked address. */ + rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); + emit_insn (lvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) + { + /* This rtl is already in the form that matches lvx + instruction, so leave it alone. */ + DONE; + } + /* Otherwise, fall through to transform into a swapping load. */ + } operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) : operands[0]; } @@ -482,7 +536,7 @@ (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" - "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "&& 1" [(set (match_dup 2) (vec_select:V8HI (match_dup 1) @@ -499,6 +553,33 @@ (const_int 2) (const_int 3)])))] " { + rtx mem = operands[1]; + + /* Don't apply the swap optimization if we've already performed register + allocation and the hard register destination is not in the altivec + range. */ + if ((MEM_ALIGN (mem) >= 128) + && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER) + || ALTIVEC_REGNO_P (REGNO(operands[0])))) + { + rtx mem_address = XEXP (mem, 0); + enum machine_mode mode = GET_MODE (mem); + + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + /* Replace the source memory address with masked address. */ + rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); + emit_insn (lvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) + { + /* This rtl is already in the form that matches lvx + instruction, so leave it alone. */ + DONE; + } + /* Otherwise, fall through to transform into a swapping load. */ + } operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) : operands[0]; } @@ -511,7 +592,7 @@ (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" - "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" + "&& 1" [(set (match_dup 2) (vec_select:V16QI (match_dup 1) @@ -536,6 +617,33 @@ (const_int 6) (const_int 7)])))] " { + rtx mem = operands[1]; + + /* Don't apply the swap optimization if we've already performed register + allocation and the hard register destination is not in the altivec + range. */ + if ((MEM_ALIGN (mem) >= 128) + && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER) + || ALTIVEC_REGNO_P (REGNO(operands[0])))) + { + rtx mem_address = XEXP (mem, 0); + enum machine_mode mode = GET_MODE (mem); + + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + /* Replace the source memory address with masked address. */ + rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); + emit_insn (lvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) + { + /* This rtl is already in the form that matches lvx + instruction, so leave it alone. */ + DONE; + } + /* Otherwise, fall through to transform into a swapping load. */ + } operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) : operands[0]; } @@ -564,6 +672,31 @@ (match_dup 2) (parallel [(const_int 1) (const_int 0)])))] { + rtx mem = operands[0]; + + /* Don't apply the swap optimization if we've already performed register + allocation and the hard register source is not in the altivec range. */ + if ((MEM_ALIGN (mem) >= 128) + && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER) + || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) + { + rtx mem_address = XEXP (mem, 0); + enum machine_mode mode = GET_MODE (mem); + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); + emit_insn (stvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) + { + /* This rtl is already in the form that matches stvx instruction, + so leave it alone. */ + DONE; + } + /* Otherwise, fall through to transform into a swapping store. */ + } + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) : operands[1]; }) @@ -611,6 +744,31 @@ (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))] { + rtx mem = operands[0]; + + /* Don't apply the swap optimization if we've already performed register + allocation and the hard register source is not in the altivec range. */ + if ((MEM_ALIGN (mem) >= 128) + && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER) + || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) + { + rtx mem_address = XEXP (mem, 0); + enum machine_mode mode = GET_MODE (mem); + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); + emit_insn (stvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) + { + /* This rtl is already in the form that matches stvx instruction, + so leave it alone. */ + DONE; + } + /* Otherwise, fall through to transform into a swapping store. */ + } + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) : operands[1]; }) @@ -665,6 +823,31 @@ (const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] { + rtx mem = operands[0]; + + /* Don't apply the swap optimization if we've already performed register + allocation and the hard register source is not in the altivec range. */ + if ((MEM_ALIGN (mem) >= 128) + && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER) + || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) + { + rtx mem_address = XEXP (mem, 0); + enum machine_mode mode = GET_MODE (mem); + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); + emit_insn (stvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) + { + /* This rtl is already in the form that matches stvx instruction, + so leave it alone. */ + DONE; + } + /* Otherwise, fall through to transform into a swapping store. */ + } + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) : operands[1]; }) @@ -733,6 +916,31 @@ (const_int 4) (const_int 5) (const_int 6) (const_int 7)])))] { + rtx mem = operands[0]; + + /* Don't apply the swap optimization if we've already performed register + allocation and the hard register source is not in the altivec range. */ + if ((MEM_ALIGN (mem) >= 128) + && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER) + || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) + { + rtx mem_address = XEXP (mem, 0); + enum machine_mode mode = GET_MODE (mem); + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); + emit_insn (stvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) + { + /* This rtl is already in the form that matches stvx instruction, + so leave it alone. */ + DONE; + } + /* Otherwise, fall through to transform into a swapping store. */ + } + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) : operands[1]; }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 612489f4cf2..f2976406a5f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,39 @@ +2018-01-10 Kelvin Nilsen + + * gcc.target/powerpc/pr48857.c: Modify dejagnu directives to look + for lvx and stvx instead of lxvd2x and stxvd2x and require + little-endian target. Add comments. + * gcc.target/powerpc/swaps-p8-28.c: Add functions for more + comprehensive testing. + * gcc.target/powerpc/swaps-p8-29.c: Likewise. + * gcc.target/powerpc/swaps-p8-30.c: Likewise. + * gcc.target/powerpc/swaps-p8-31.c: Likewise. + * gcc.target/powerpc/swaps-p8-32.c: Likewise. + * gcc.target/powerpc/swaps-p8-33.c: Likewise. + * gcc.target/powerpc/swaps-p8-34.c: Likewise. + * gcc.target/powerpc/swaps-p8-35.c: Likewise. + * gcc.target/powerpc/swaps-p8-36.c: Likewise. + * gcc.target/powerpc/swaps-p8-37.c: Likewise. + * gcc.target/powerpc/swaps-p8-38.c: Likewise. + * gcc.target/powerpc/swaps-p8-39.c: Likewise. + * gcc.target/powerpc/swaps-p8-40.c: Likewise. + * gcc.target/powerpc/swaps-p8-41.c: Likewise. + * gcc.target/powerpc/swaps-p8-42.c: Likewise. + * gcc.target/powerpc/swaps-p8-43.c: Likewise. + * gcc.target/powerpc/swaps-p8-44.c: Likewise. + * gcc.target/powerpc/swaps-p8-45.c: Likewise. + * gcc.target/powerpc/vec-extract-2.c: Add comment and remove + scan-assembler-not directives that forbid lvx and xxpermdi. + * gcc.target/powerpc/vec-extract-3.c: Likewise. + * gcc.target/powerpc/vec-extract-5.c: Likewise. + * gcc.target/powerpc/vec-extract-6.c: Likewise. + * gcc.target/powerpc/vec-extract-7.c: Likewise. + * gcc.target/powerpc/vec-extract-8.c: Likewise. + * gcc.target/powerpc/vec-extract-9.c: Likewise. + * gcc.target/powerpc/vsx-vector-6-le.c: Change + scan-assembler-times directives to reflect different numbers of + expected xxlnor, xxlor, xvcmpgtdp, and xxland instructions. + 2018-01-13 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/testsuite/gcc.target/powerpc/pr48857.c b/gcc/testsuite/gcc.target/powerpc/pr48857.c index 25f14b71331..2a391282ee2 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr48857.c +++ b/gcc/testsuite/gcc.target/powerpc/pr48857.c @@ -1,10 +1,13 @@ -/* { dg-do compile { target { powerpc*-*-* } } } */ +/* Expected instruction selection as characterized by + scan-assembler-times directives below is only relevant to + little-endian targets. */ +/* { dg-do compile { target { powerpc64le-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ /* { dg-options "-O2 -mcpu=power7 -mabi=altivec" } */ -/* { dg-final { scan-assembler-times "lxvd2x" 1 } } */ -/* { dg-final { scan-assembler-times "stxvd2x" 1 } } */ +/* { dg-final { scan-assembler-times "lvx" 1 } } */ +/* { dg-final { scan-assembler-times "stvx" 1 } } */ /* { dg-final { scan-assembler-not "ld" } } */ /* { dg-final { scan-assembler-not "lwz" } } */ /* { dg-final { scan-assembler-not "stw" } } */ @@ -15,12 +18,19 @@ typedef vector long long v2di_type; v2di_type return_v2di (v2di_type *ptr) { - return *ptr; /* should generate lxvd2x 34,0,3. */ + /* As of pr48857, should generate lxvd2x 34,0,3 + followed by xxpermdi 34,34,34,2. Subsequent optimization + recognizes that ptr refers to an aligned vector and replaces + this with lvx 2,0,3. */ + return *ptr; } void pass_v2di (v2di_type arg, v2di_type *ptr) { - *ptr = arg; /* should generate stxvd2x 34,0,{3,5}. */ + /* As of pr48857, should generate xxpermdi 34,34,34,2 followed by + stxvd2x 34,0,5. Subsequent optimization recognizes that ptr + refers to an aligned vector and replaces this with stvx 2,0,5. */ + *ptr = arg; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-28.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-28.c index 4ab2bc3db77..03309d716fe 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-28.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-28.c @@ -12,10 +12,100 @@ vector char y = { 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15 }; +vector char x, z; + vector char foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector char +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector char *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector char +foo2 (void) +{ + vector char v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector char +foo3 (vector char *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + char a_field; + vector char a_vector; +}; + +vector char +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector char arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector char *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector char arg) +{ + vector char v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector char *arg1, vector char arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector char v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int @@ -24,6 +114,47 @@ main (int argc, char *argv[]) vector char fetched_value = foo (); if (fetched_value[0] != 0 || fetched_value[15] != 15) abort (); - else - return 0; + + fetched_value = foo1 (); + if (fetched_value[1] != 1 || fetched_value[14] != 14) + abort (); + + fetched_value = foo2 (); + if (fetched_value[2] != 2 || fetched_value[13] != 13) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 3 || fetched_value[12] != 12) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[4] != 4 || fetched_value[11] != 11) + abort (); + + for (int i = 0; i < 16; i++) + z[i] = 15 - i; + + baz (z); + if (x[0] != 15 || x[15] != 0) + abort (); + + vector char source = { 8, 7, 6, 5, 4, 3, 2, 1, + 0, 9, 10, 11, 12, 13, 14, 15 }; + + baz1 (source); + if (x[3] != 5 || x[8] != 0) + abort (); + + vector char dest; + baz2 (&dest, source); + if (dest[4] != 4 || dest[1] != 7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[7] != 1 || a_struct.a_vector[15] != 15) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-29.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-29.c index d2025dfbafc..943f4e732ed 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-29.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-29.c @@ -12,10 +12,100 @@ const vector char y = { 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15 }; +vector char x, z; + vector char foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector char +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector char *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector char +foo2 (void) +{ + vector char v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector char +foo3 (vector char *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + char a_field; + vector char a_vector; +}; + +vector char +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector char arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector char *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector char arg) +{ + vector char v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector char *arg1, vector char arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector char v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int @@ -24,6 +114,47 @@ main (int argc, char *argv[]) vector char fetched_value = foo (); if (fetched_value[0] != 0 || fetched_value[15] != 15) abort (); - else - return 0; + + fetched_value = foo1 (); + if (fetched_value[1] != 1 || fetched_value[14] != 14) + abort (); + + fetched_value = foo2 (); + if (fetched_value[2] != 2 || fetched_value[13] != 13) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 3 || fetched_value[12] != 12) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[4] != 4 || fetched_value[11] != 11) + abort (); + + for (int i = 0; i < 16; i++) + z[i] = 15 - i; + + baz (z); + if (x[0] != 15 || x[15] != 0) + abort (); + + vector char source = { 8, 7, 6, 5, 4, 3, 2, 1, + 0, 9, 10, 11, 12, 13, 14, 15 }; + + baz1 (source); + if (x[3] != 5 || x[8] != 0) + abort (); + + vector char dest; + baz2 (&dest, source); + if (dest[4] != 4 || dest[1] != 7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[7] != 1 || a_struct.a_vector[15] != 15) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-30.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-30.c index 9421dbbb156..eddecf570c8 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-30.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-30.c @@ -2,8 +2,12 @@ /* { dg-require-effective-target powerpc_p8vector_ok } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8 -O3 " } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ -/* { dg-final { scan-assembler-not "xxswapd" } } */ + +/* Previous versions of this test required that the assembler does not + contain xxpermdi or xxswapd. However, with the more sophisticated + code generation used today, it is now possible that xxpermdi (aka + xxswapd) show up without being part of a lxvd2x or stxvd2x + sequence. */ #include @@ -14,10 +18,100 @@ const vector char y = { 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15 }; +vector char x, z; + vector char foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector char +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector char *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector char +foo2 (void) +{ + vector char v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector char +foo3 (vector char *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + char a_field; + vector char a_vector; +}; + +vector char +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector char arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector char *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector char arg) +{ + vector char v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector char *arg1, vector char arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector char v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int @@ -26,6 +120,47 @@ main (int argc, char *argv[]) vector char fetched_value = foo (); if (fetched_value[0] != 0 || fetched_value[15] != 15) abort (); - else - return 0; + + fetched_value = foo1 (); + if (fetched_value[1] != 1 || fetched_value[14] != 14) + abort (); + + fetched_value = foo2 (); + if (fetched_value[2] != 2 || fetched_value[13] != 13) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 3 || fetched_value[12] != 12) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[4] != 4 || fetched_value[11] != 11) + abort (); + + for (int i = 0; i < 16; i++) + z[i] = 15 - i; + + baz (z); + if (x[0] != 15 || x[15] != 0) + abort (); + + vector char source = { 8, 7, 6, 5, 4, 3, 2, 1, + 0, 9, 10, 11, 12, 13, 14, 15 }; + + baz1 (source); + if (x[3] != 5 || x[8] != 0) + abort (); + + vector char dest; + baz2 (&dest, source); + if (dest[4] != 4 || dest[1] != 7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[7] != 1 || a_struct.a_vector[15] != 15) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-31.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-31.c index a3c2f8240be..976a9cffdb2 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-31.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-31.c @@ -7,21 +7,150 @@ extern void abort (void); -vector short y = { 0, 1, 2, 3, - 4, 5, 6, 7 }; +vector short x; +vector short y = { 0, 1, 2, 3, 4, 5, 6, 7 }; +vector short z; vector short foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector short +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector short *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector short +foo2 (void) +{ + vector short v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector short +foo3 (vector short *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector short a_vector; +}; + +vector short +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector short arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector short *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector short arg) +{ + vector short v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector short *arg1, vector short arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector short v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (int argc, short *argv[]) { vector short fetched_value = foo (); if (fetched_value[0] != 0 || fetched_value[7] != 7) abort (); - else - return 0; + + fetched_value = foo1 (); + if (fetched_value[1] != 1 || fetched_value[6] != 6) + abort (); + + fetched_value = foo2 (); + if (fetched_value[2] != 2 || fetched_value[5] != 5) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 3 || fetched_value[4] != 4) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[4] != 4 || fetched_value[3] != 3) + abort (); + + for (int i = 0; i < 8; i++) + z[i] = 7 - i; + + baz (z); + if (x[0] != 7 || x[7] != 0) + abort (); + + vector short source = { 8, 7, 6, 5, 4, 3, 2, 1 }; + + baz1 (source); + if (x[3] != 5 || x[7] != 1) + abort (); + + vector short dest; + baz2 (&dest, source); + if (dest[4] != 4 || dest[1] != 7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[7] != 1 || a_struct.a_vector[5] != 3) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-32.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-32.c index 57b76ecf7fc..98d5c62dab9 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-32.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-32.c @@ -7,21 +7,150 @@ extern void abort (void); -const vector short y = { 0, 1, 2, 3, - 4, 5, 6, 7 }; +vector short x; +const vector short y = { 0, 1, 2, 3, 4, 5, 6, 7 }; +vector short z; vector short foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector short +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector short *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector short +foo2 (void) +{ + vector short v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector short +foo3 (vector short *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector short a_vector; +}; + +vector short +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector short arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector short *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector short arg) +{ + vector short v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector short *arg1, vector short arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector short v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (int argc, short *argv[]) { vector short fetched_value = foo (); if (fetched_value[0] != 0 || fetched_value[7] != 7) abort (); - else - return 0; + + fetched_value = foo1 (); + if (fetched_value[1] != 1 || fetched_value[6] != 6) + abort (); + + fetched_value = foo2 (); + if (fetched_value[2] != 2 || fetched_value[5] != 5) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 3 || fetched_value[4] != 4) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[4] != 4 || fetched_value[3] != 3) + abort (); + + for (int i = 0; i < 8; i++) + z[i] = 7 - i; + + baz (z); + if (x[0] != 7 || x[7] != 0) + abort (); + + vector short source = { 8, 7, 6, 5, 4, 3, 2, 1 }; + + baz1 (source); + if (x[3] != 5 || x[7] != 1) + abort (); + + vector short dest; + baz2 (&dest, source); + if (dest[4] != 4 || dest[1] != 7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[7] != 1 || a_struct.a_vector[5] != 3) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-33.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-33.c index 2289be468ec..4e189d8a82b 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-33.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-33.c @@ -2,28 +2,161 @@ /* { dg-require-effective-target powerpc_p8vector_ok } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8 -O3 " } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ -/* { dg-final { scan-assembler-not "xxswapd" } } */ + +/* Previous versions of this test required that the assembler does not + contain xxpermdi or xxswapd. However, with the more sophisticated + code generation used today, it is now possible that xxpermdi (aka + xxswapd) show up without being part of a lxvd2x or stxvd2x + sequence. */ #include extern void abort (void); -const vector short y = { 0, 1, 2, 3, - 4, 5, 6, 7 }; +vector short x; +const vector short y = { 0, 1, 2, 3, 4, 5, 6, 7 }; +vector short z; vector short foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector short +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector short *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector short +foo2 (void) +{ + vector short v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector short +foo3 (vector short *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector short a_vector; +}; + +vector short +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector short arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector short *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector short arg) +{ + vector short v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector short *arg1, vector short arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector short v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (int argc, short *argv[]) { vector short fetched_value = foo (); - if (fetched_value[0] != 0 || fetched_value[15] != 15) + if (fetched_value[0] != 0 || fetched_value[7] != 7) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 1 || fetched_value[6] != 6) abort (); - else - return 0; + + fetched_value = foo2 (); + if (fetched_value[2] != 2 || fetched_value[5] != 5) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 3 || fetched_value[4] != 4) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[4] != 4 || fetched_value[3] != 3) + abort (); + + for (int i = 0; i < 8; i++) + z[i] = 7 - i; + + baz (z); + if (x[0] != 7 || x[7] != 0) + abort (); + + vector short source = { 8, 7, 6, 5, 4, 3, 2, 1 }; + + baz1 (source); + if (x[3] != 5 || x[7] != 1) + abort (); + + vector short dest; + baz2 (&dest, source); + if (dest[4] != 4 || dest[1] != 7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[7] != 1 || a_struct.a_vector[5] != 3) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-34.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-34.c index a7ddb01864a..8ee9795b9a5 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-34.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-34.c @@ -7,20 +7,152 @@ extern void abort (void); +vector int x; vector int y = { 0, 1, 2, 3 }; +vector int z; vector int foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector int +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector int *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector int +foo2 (void) +{ + vector int v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector int +foo3 (vector int *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector int a_vector; +}; + +vector int +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector int arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector int *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector int arg) +{ + vector int v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector int *arg1, vector int arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector int v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (int argc, int *argv[]) { vector int fetched_value = foo (); if (fetched_value[0] != 0 || fetched_value[3] != 3) abort (); - else - return 0; + + fetched_value = foo1 (); + if (fetched_value[1] != 1 || fetched_value[2] != 2) + abort (); + + fetched_value = foo2 (); + if (fetched_value[2] != 2 || fetched_value[1] != 1) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 3 || fetched_value[0] != 0) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[2] != 2 || fetched_value[3] != 3) + abort (); + + z[0] = 7; + z[1] = 6; + z[2] = 5; + z[3] = 4; + + baz (z); + if (x[0] != 7 || x[3] != 4) + abort (); + + vector int source = { 8, 7, 6, 5 }; + + baz1 (source); + if (x[2] != 6 || x[1] != 7) + abort (); + + vector int dest; + baz2 (&dest, source); + if (dest[0] != 8 || dest[1] != 7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[3] != 5 || a_struct.a_vector[0] != 8) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-35.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-35.c index da98e90e2c3..13f2ea745bd 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-35.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-35.c @@ -7,20 +7,152 @@ extern void abort (void); +vector int x; const vector int y = { 0, 1, 2, 3 }; +vector int z; vector int foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector int +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector int *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector int +foo2 (void) +{ + vector int v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector int +foo3 (vector int *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector int a_vector; +}; + +vector int +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector int arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector int *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector int arg) +{ + vector int v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector int *arg1, vector int arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector int v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (int argc, int *argv[]) { vector int fetched_value = foo (); if (fetched_value[0] != 0 || fetched_value[3] != 3) abort (); - else - return 0; + + fetched_value = foo1 (); + if (fetched_value[1] != 1 || fetched_value[2] != 2) + abort (); + + fetched_value = foo2 (); + if (fetched_value[2] != 2 || fetched_value[1] != 1) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 3 || fetched_value[0] != 0) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[2] != 2 || fetched_value[3] != 3) + abort (); + + z[0] = 7; + z[1] = 6; + z[2] = 5; + z[3] = 4; + + baz (z); + if (x[0] != 7 || x[3] != 4) + abort (); + + vector int source = { 8, 7, 6, 5 }; + + baz1 (source); + if (x[2] != 6 || x[1] != 7) + abort (); + + vector int dest; + baz2 (&dest, source); + if (dest[0] != 8 || dest[1] != 7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[3] != 5 || a_struct.a_vector[0] != 8) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-36.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-36.c index 52990a6c855..f825f8bcfec 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-36.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-36.c @@ -2,27 +2,163 @@ /* { dg-require-effective-target powerpc_p8vector_ok } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8 -O3 " } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ -/* { dg-final { scan-assembler-not "xxswapd" } } */ + +/* Previous versions of this test required that the assembler does not + contain xxpermdi or xxswapd. However, with the more sophisticated + code generation used today, it is now possible that xxpermdi (aka + xxswapd) show up without being part of a lxvd2x or stxvd2x + sequence. */ #include extern void abort (void); +vector int x; const vector int y = { 0, 1, 2, 3 }; +vector int z; vector int foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector int +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector int *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector int +foo2 (void) +{ + vector int v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector int +foo3 (vector int *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector int a_vector; +}; + +vector int +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector int arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector int *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector int arg) +{ + vector int v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector int *arg1, vector int arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector int v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (int argc, int *argv[]) { vector int fetched_value = foo (); if (fetched_value[0] != 0 || fetched_value[3] != 3) abort (); - else - return 0; + + fetched_value = foo1 (); + if (fetched_value[1] != 1 || fetched_value[2] != 2) + abort (); + + fetched_value = foo2 (); + if (fetched_value[2] != 2 || fetched_value[1] != 1) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 3 || fetched_value[0] != 0) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[2] != 2 || fetched_value[3] != 3) + abort (); + + z[0] = 7; + z[1] = 6; + z[2] = 5; + z[3] = 4; + + baz (z); + if (x[0] != 7 || x[3] != 4) + abort (); + + vector int source = { 8, 7, 6, 5 }; + + baz1 (source); + if (x[3] != 6 || x[2] != 7) + abort (); + + vector int dest; + baz2 (&dest, source); + if (dest[0] != 8 || dest[1] != 7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[3] != 5 || a_struct.a_vector[0] != 8) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-37.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-37.c index 4d8ae11bf97..775d37fe6f6 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-37.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-37.c @@ -7,20 +7,152 @@ extern void abort (void); -vector float y = { 0.0f, 0.1f, 0.2f, 0.3f }; +vector float x; +vector float y = { 0.0F, 0.1F, 0.2F, 0.3F }; +vector float z; vector float foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector float +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector float *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector float +foo2 (void) +{ + vector float v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector float +foo3 (vector float *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector float a_vector; +}; + +vector float +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector float arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector float *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector float arg) +{ + vector float v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector float *arg1, vector float arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector float v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (float argc, float *argv[]) { vector float fetched_value = foo (); - if (fetched_value[0] != 0.0f || fetched_value[3] != 0.3f) + if (fetched_value[0] != 0.0F || fetched_value[3] != 0.3F) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 0.1F || fetched_value[2] != 0.2F) abort (); - else - return 0; + + fetched_value = foo2 (); + if (fetched_value[2] != 0.2F || fetched_value[1] != 0.1F) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 0.3F || fetched_value[0] != 0.0F) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[2] != 0.2F || fetched_value[3] != 0.3F) + abort (); + + z[0] = 0.7F; + z[1] = 0.6F; + z[2] = 0.5F; + z[3] = 0.4F; + + baz (z); + if (x[0] != 0.7F || x[3] != 0.4F) + abort (); + + vector float source = { 0.8F, 0.7F, 0.6F, 0.5F }; + + baz1 (source); + if (x[2] != 0.6F || x[1] != 0.7F) + abort (); + + vector float dest; + baz2 (&dest, source); + if (dest[0] != 0.8F || dest[1] != 0.7F) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[3] != 0.5F || a_struct.a_vector[0] != 0.8F) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-38.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-38.c index bf08574bd74..8bd52dee391 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-38.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-38.c @@ -7,20 +7,152 @@ extern void abort (void); -const vector float y = { 0.0f, 0.1f, 0.2f, 0.3f }; +vector float x; +const vector float y = { 0.0F, 0.1F, 0.2F, 0.3F }; +vector float z; vector float foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector float +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector float *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector float +foo2 (void) +{ + vector float v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector float +foo3 (vector float *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector float a_vector; +}; + +vector float +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector float arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector float *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector float arg) +{ + vector float v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector float *arg1, vector float arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector float v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (float argc, float *argv[]) { vector float fetched_value = foo (); - if (fetched_value[0] != 0.0f || fetched_value[3] != 0.3f) + if (fetched_value[0] != 0.0F || fetched_value[3] != 0.3F) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 0.1F || fetched_value[2] != 0.2F) abort (); - else - return 0; + + fetched_value = foo2 (); + if (fetched_value[2] != 0.2F || fetched_value[1] != 0.1F) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 0.3F || fetched_value[0] != 0.0F) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[2] != 0.2F || fetched_value[3] != 0.3F) + abort (); + + z[0] = 0.7F; + z[1] = 0.6F; + z[2] = 0.5F; + z[3] = 0.4F; + + baz (z); + if (x[0] != 0.7F || x[3] != 0.4F) + abort (); + + vector float source = { 0.8F, 0.7F, 0.6F, 0.5F }; + + baz1 (source); + if (x[2] != 0.6F || x[1] != 0.7F) + abort (); + + vector float dest; + baz2 (&dest, source); + if (dest[0] != 0.8F || dest[1] != 0.7F) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[3] != 0.5F || a_struct.a_vector[0] != 0.8F) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-39.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-39.c index 8cd68ddcd72..94add40de7e 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-39.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-39.c @@ -2,27 +2,163 @@ /* { dg-require-effective-target powerpc_p8vector_ok } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8 -O3 " } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ -/* { dg-final { scan-assembler-not "xxswapd" } } */ + +/* Previous versions of this test required that the assembler does not + contain xxpermdi or xxswapd. However, with the more sophisticated + code generation used today, it is now possible that xxpermdi (aka + xxswapd) show up without being part of a lxvd2x or stxvd2x + sequence. */ #include extern void abort (void); -const vector float y = { 0.0f, 0.1f, 0.2f, 0.3f }; +vector float x; +const vector float y = { 0.0F, 0.1F, 0.2F, 0.3F }; +vector float z; vector float foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector float +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector float *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector float +foo2 (void) +{ + vector float v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector float +foo3 (vector float *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector float a_vector; +}; + +vector float +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector float arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector float *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector float arg) +{ + vector float v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector float *arg1, vector float arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector float v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (float argc, float *argv[]) { vector float fetched_value = foo (); - if (fetched_value[0] != 0.0f || fetched_value[3] != 0.3) + if (fetched_value[0] != 0.0F || fetched_value[3] != 0.3F) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 0.1F || fetched_value[2] != 0.2F) abort (); - else - return 0; + + fetched_value = foo2 (); + if (fetched_value[2] != 0.2F || fetched_value[1] != 0.1F) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[3] != 0.3F || fetched_value[0] != 0.0F) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[2] != 0.2F || fetched_value[3] != 0.3F) + abort (); + + z[0] = 0.7F; + z[1] = 0.6F; + z[2] = 0.5F; + z[3] = 0.4F; + + baz (z); + if (x[0] != 0.7F || x[3] != 0.4F) + abort (); + + vector float source = { 0.8F, 0.7F, 0.6F, 0.5F }; + + baz1 (source); + if (x[3] != 0.6F || x[2] != 0.7F) + abort (); + + vector float dest; + baz2 (&dest, source); + if (dest[0] != 0.8F || dest[1] != 0.7F) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[3] != 0.5F || a_struct.a_vector[0] != 0.8F) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-40.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-40.c index 3d1bd3f4b6f..50610d9ab81 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-40.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-40.c @@ -7,20 +7,150 @@ extern void abort (void); -vector long long int y = { 0, 1 }; +vector long long x; +vector long long y = { 1024, 2048 }; +vector long long z; -vector long long int +vector long long foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector long long +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector long long *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector long long +foo2 (void) +{ + vector long long v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector long long +foo3 (vector long long *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector long long a_vector; +}; + +vector long long +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector long long arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector long long *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector long long arg) +{ + vector long long v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector long long *arg1, vector long long arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector long long v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, int *argv[]) +main (long long argc, long long *argv[]) { - vector long long int fetched_value = foo (); - if (fetched_value[0] != 0 || fetched_value[1] != 1) + vector long long fetched_value = foo (); + if (fetched_value[0] != 1024 || fetched_value[1] != 2048) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 2048 || fetched_value[0] != 1024) + abort (); + + fetched_value = foo2 (); + if (fetched_value[0] != 1024 || fetched_value[1] != 2048) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[1] != 2048 || fetched_value[0] != 1024) abort (); - else - return 0; + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[1] != 2048 || fetched_value[0] != 1024) + abort (); + + z[0] = 7096; + z[1] = 6048; + + baz (z); + if (x[0] != 7096 || x[1] != 6048) + abort (); + + vector long long source = { 8192, 7096}; + + baz1 (source); + if (x[0] != 8192 || x[1] != 7096) + abort (); + + vector long long dest; + baz2 (&dest, source); + if (dest[0] != 8192 || dest[1] != 7096) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[1] != 7096 || a_struct.a_vector[0] != 8192) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-41.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-41.c index 3ac52fda59a..f8b1d03b908 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-41.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-41.c @@ -7,20 +7,150 @@ extern void abort (void); -const vector long long int y = { 0, 1 }; +vector long long x; +const vector long long y = { 1024, 2048 }; +vector long long z; -vector long long int +vector long long foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector long long +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector long long *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector long long +foo2 (void) +{ + vector long long v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector long long +foo3 (vector long long *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector long long a_vector; +}; + +vector long long +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector long long arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector long long *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector long long arg) +{ + vector long long v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector long long *arg1, vector long long arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector long long v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (long long argc, long long *argv[]) { - vector long long int fetched_value = foo (); - if (fetched_value[0] != 0 || fetched_value[1] != 1) + vector long long fetched_value = foo (); + if (fetched_value[0] != 1024 || fetched_value[1] != 2048) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 2048 || fetched_value[0] != 1024) + abort (); + + fetched_value = foo2 (); + if (fetched_value[0] != 1024 || fetched_value[1] != 2048) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[1] != 2048 || fetched_value[0] != 1024) abort (); - else - return 0; + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[1] != 2048 || fetched_value[0] != 1024) + abort (); + + z[0] = 7096; + z[1] = 6048; + + baz (z); + if (x[0] != 7096 || x[1] != 6048) + abort (); + + vector long long source = { 8192, 7096}; + + baz1 (source); + if (x[0] != 8192 || x[1] != 7096) + abort (); + + vector long long dest; + baz2 (&dest, source); + if (dest[0] != 8192 || dest[1] != 7096) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[1] != 7096 || a_struct.a_vector[0] != 8192) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-42.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-42.c index 1f2a73a07b0..a8efc304d51 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-42.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-42.c @@ -2,27 +2,161 @@ /* { dg-require-effective-target powerpc_p8vector_ok } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8 -O3 " } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ -/* { dg-final { scan-assembler-not "xxswapd" } } */ + +/* Previous versions of this test required that the assembler does not + contain xxpermdi or xxswapd. However, with the more sophisticated + code generation used today, it is now possible that xxpermdi (aka + xxswapd) show up without being part of a lxvd2x or stxvd2x + sequence. */ #include extern void abort (void); -const vector long long int y = { 0, 1 }; +vector long long x; +const vector long long y = { 1024, 2048 }; +vector long long z; -vector long long int +vector long long foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector long long +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector long long *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector long long +foo2 (void) +{ + vector long long v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector long long +foo3 (vector long long *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector long long a_vector; +}; + +vector long long +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector long long arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector long long *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector long long arg) +{ + vector long long v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector long long *arg1, vector long long arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector long long v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (long long argc, long long *argv[]) { - vector long long int fetched_value = foo (); - if (fetched_value[0] != 0 || fetched_value[1] != 1) + vector long long fetched_value = foo (); + if (fetched_value[0] != 1024 || fetched_value[1] != 2048) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 2048 || fetched_value[0] != 1024) + abort (); + + fetched_value = foo2 (); + if (fetched_value[0] != 1024 || fetched_value[1] != 2048) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[1] != 2048 || fetched_value[0] != 1024) abort (); - else - return 0; + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[1] != 2048 || fetched_value[0] != 1024) + abort (); + + z[0] = 7096; + z[1] = 6048; + + baz (z); + if (x[0] != 7096 || x[1] != 6048) + abort (); + + vector long long source = { 8192, 7096}; + + baz1 (source); + if (x[0] != 8192 || x[1] != 7096) + abort (); + + vector long long dest; + baz2 (&dest, source); + if (dest[0] != 8192 || dest[1] != 7096) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[1] != 7096 || a_struct.a_vector[0] != 8192) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-43.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-43.c index c3caf8898e1..ca1dc97fabf 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-43.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-43.c @@ -7,20 +7,150 @@ extern void abort (void); -vector double y = { 0.0, 0.1 }; +vector double x; +vector double y = { 0.1, 0.2 }; +vector double z; vector double foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector double +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector double *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector double +foo2 (void) +{ + vector double v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector double +foo3 (vector double *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector double a_vector; +}; + +vector double +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector double arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector double *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector double arg) +{ + vector double v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector double *arg1, vector double arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector double v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (double argc, double *argv[]) { vector double fetched_value = foo (); - if (fetched_value[0] != 0 || fetched_value[1] != 0.1) + if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1) abort (); - else - return 0; + + fetched_value = foo2 (); + if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1) + abort (); + + z[0] = 0.7; + z[1] = 0.6; + + baz (z); + if (x[0] != 0.7 || x[1] != 0.6) + abort (); + + vector double source = { 0.8, 0.7 }; + + baz1 (source); + if (x[0] != 0.8 || x[1] != 0.7) + abort (); + + vector double dest; + baz2 (&dest, source); + if (dest[0] != 0.8 || dest[1] != 0.7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[1] != 0.7 || a_struct.a_vector[0] != 0.8) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-44.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-44.c index 8ab513c0440..648016988a9 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-44.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-44.c @@ -7,20 +7,150 @@ extern void abort (void); -const vector double y = { 0.0, 0.1 }; +vector double x; +const vector double y = { 0.1, 0.2 }; +vector double z; vector double foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector double +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector double *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector double +foo2 (void) +{ + vector double v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector double +foo3 (vector double *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector double a_vector; +}; + +vector double +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector double arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector double *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector double arg) +{ + vector double v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector double *arg1, vector double arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector double v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (double argc, double *argv[]) { vector double fetched_value = foo (); - if (fetched_value[0] != 0.0 || fetched_value[1] != 0.1) + if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1) abort (); - else - return 0; + + fetched_value = foo2 (); + if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1) + abort (); + + z[0] = 0.7; + z[1] = 0.6; + + baz (z); + if (x[0] != 0.7 || x[1] != 0.6) + abort (); + + vector double source = { 0.8, 0.7 }; + + baz1 (source); + if (x[0] != 0.8 || x[1] != 0.7) + abort (); + + vector double dest; + baz2 (&dest, source); + if (dest[0] != 0.8 || dest[1] != 0.7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[1] != 0.7 || a_struct.a_vector[0] != 0.8) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-45.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-45.c index 69953589782..3e8c7e77567 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-45.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-45.c @@ -2,27 +2,161 @@ /* { dg-require-effective-target powerpc_p8vector_ok } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8 -O3 " } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ -/* { dg-final { scan-assembler-not "xxswapd" } } */ + +/* Previous versions of this test required that the assembler does not + contain xxpermdi or xxswapd. However, with the more sophisticated + code generation used today, it is now possible that xxpermdi (aka + xxswapd) show up without being part of a lxvd2x or stxvd2x + sequence. */ #include extern void abort (void); -const vector double y = { 0.0, 0.1 }; +vector double x; +const vector double y = { 0.1, 0.2 }; +vector double z; vector double foo (void) { - return y; + return y; /* Remove 1 swap and use lvx. */ +} + +vector double +foo1 (void) +{ + x = y; /* Remove 2 redundant swaps here. */ + return x; /* Remove 1 swap and use lvx. */ +} + +void __attribute__ ((noinline)) +fill_local (vector double *vp) +{ + *vp = x; /* Remove 2 redundant swaps here. */ +} + +/* Test aligned load from local. */ +vector double +foo2 (void) +{ + vector double v; + + /* Need to be clever here because v will normally reside in a + register rather than memory. */ + fill_local (&v); + return v; /* Remove 1 swap and use lvx. */ +} + + +/* Test aligned load from pointer. */ +vector double +foo3 (vector double *arg) +{ + return *arg; /* Remove 1 swap and use lvx. */ +} + +/* In this structure, the compiler should insert padding to assure + that a_vector is properly aligned. */ +struct bar { + short a_field; + vector double a_vector; +}; + +vector double +foo4 (struct bar *bp) +{ + return bp->a_vector; /* Remove 1 swap and use lvx. */ +} + +/* Test aligned store to global. */ +void +baz (vector double arg) +{ + x = arg; /* Remove 1 swap and use stvx. */ +} + +void __attribute__ ((noinline)) +copy_local (vector double *arg) +{ + x = *arg; /* Remove 2 redundant swaps. */ +} + + +/* Test aligned store to local. */ +void +baz1 (vector double arg) +{ + vector double v; + + /* Need cleverness, because v will normally reside in a register + rather than memory. */ + v = arg; /* Aligned store to local: remove 1 + swap and use stvx. */ + copy_local (&v); +} + +/* Test aligned store to pointer. */ +void +baz2 (vector double *arg1, vector double arg2) +{ + /* Assume arg2 resides in register. */ + *arg1 = arg2; /* Remove 1 swap and use stvx. */ +} + +void +baz3 (struct bar *bp, vector double v) +{ + /* Assume v resides in register. */ + bp->a_vector = v; /* Remove 1 swap and use stvx. */ } int -main (int argc, char *argv[]) +main (double argc, double *argv[]) { vector double fetched_value = foo (); - if (fetched_value[0] != 0.0 || fetched_value[15] != 0.1) + if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2) + abort (); + + fetched_value = foo1 (); + if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1) abort (); - else - return 0; + + fetched_value = foo2 (); + if (fetched_value[0] != 0.1 || fetched_value[1] != 0.2) + abort (); + + fetched_value = foo3 (&x); + if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1) + abort (); + + struct bar a_struct; + a_struct.a_vector = x; /* Remove 2 redundant swaps. */ + fetched_value = foo4 (&a_struct); + if (fetched_value[1] != 0.2 || fetched_value[0] != 0.1) + abort (); + + z[0] = 0.7; + z[1] = 0.6; + + baz (z); + if (x[0] != 0.7 || x[1] != 0.6) + abort (); + + vector double source = { 0.8, 0.7 }; + + baz1 (source); + if (x[0] != 0.8 || x[1] != 0.7) + abort (); + + vector double dest; + baz2 (&dest, source); + if (dest[0] != 0.8 || dest[1] != 0.7) + abort (); + + baz3 (&a_struct, source); + if (a_struct.a_vector[1] != 0.7 || a_struct.a_vector[0] != 0.8) + abort (); + + return 0; } diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-2.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-2.c index 007aeaf2fa1..2aea0baeff4 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract-2.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-2.c @@ -33,5 +33,7 @@ add_long_1 (vector long *p, long x) /* { dg-final { scan-assembler-not "lxvw4x" } } */ /* { dg-final { scan-assembler-not "lxvx" } } */ /* { dg-final { scan-assembler-not "lxv" } } */ -/* { dg-final { scan-assembler-not "lvx" } } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +/* With recent enhancements to the code generator, it is considered + * legal to implement vec_extract with lvx and xxpermdi. Previous + * versions of this test forbid both instructions. */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-3.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-3.c index 2df48489fc7..aeacbfb262b 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract-3.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-3.c @@ -22,5 +22,7 @@ add_long_n (vector long *p, long x, long n) /* { dg-final { scan-assembler-not "lxvw4x" } } */ /* { dg-final { scan-assembler-not "lxvx" } } */ /* { dg-final { scan-assembler-not "lxv" } } */ -/* { dg-final { scan-assembler-not "lvx" } } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +/* With recent enhancements to the code generator, it is considered + * legal to implement vec_extract with lvx and xxpermdi. Previous + * versions of this test forbid both instructions. */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-5.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-5.c index d85dab96bb6..9ee7bf2aa6c 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract-5.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-5.c @@ -64,5 +64,7 @@ add_signed_char_n (vector signed char *p, int n) /* { dg-final { scan-assembler-not "lxvw4x" } } */ /* { dg-final { scan-assembler-not "lxvx" } } */ /* { dg-final { scan-assembler-not "lxv" } } */ -/* { dg-final { scan-assembler-not "lvx" } } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +/* With recent enhancements to the code generator, it is considered + * legal to implement vec_extract with lvx and xxpermdi. Previous + * versions of this test forbid both instructions. */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-6.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-6.c index 667b34948ba..c18f439080d 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract-6.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-6.c @@ -64,5 +64,7 @@ add_unsigned_char_n (vector unsigned char *p, int n) /* { dg-final { scan-assembler-not "lxvw4x" } } */ /* { dg-final { scan-assembler-not "lxvx" } } */ /* { dg-final { scan-assembler-not "lxv" } } */ -/* { dg-final { scan-assembler-not "lvx" } } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +/* With recent enhancements to the code generator, it is considered + * legal to implement vec_extract with lvx and xxpermdi. Previous + * versions of this test forbid both instructions. */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-7.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-7.c index 6ee7c66890e..c3bc1dec128 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract-7.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-7.c @@ -40,5 +40,7 @@ add_float_n (vector float *p, long n) /* { dg-final { scan-assembler-not "lxvw4x" } } */ /* { dg-final { scan-assembler-not "lxvx" } } */ /* { dg-final { scan-assembler-not "lxv" } } */ -/* { dg-final { scan-assembler-not "lvx" } } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +/* With recent enhancements to the code generator, it is considered + * legal to implement vec_extract with lvx and xxpermdi. Previous + * versions of this test forbid both instructions. */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-8.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-8.c index 95430904639..12e7c8977fc 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract-8.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-8.c @@ -40,5 +40,7 @@ add_int_n (vector int *p, int n) /* { dg-final { scan-assembler-not "lxvw4x" } } */ /* { dg-final { scan-assembler-not "lxvx" } } */ /* { dg-final { scan-assembler-not "lxv" } } */ -/* { dg-final { scan-assembler-not "lvx" } } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +/* With recent enhancements to the code generator, it is considered + * legal to implement vec_extract with lvx and xxpermdi. Previous + * versions of this test forbid both instructions. */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-9.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-9.c index 5932013696e..2adadf21951 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract-9.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-9.c @@ -64,5 +64,7 @@ add_short_n (vector short *p, int n) /* { dg-final { scan-assembler-not "lxvw4x" } } */ /* { dg-final { scan-assembler-not "lxvx" } } */ /* { dg-final { scan-assembler-not "lxv" } } */ -/* { dg-final { scan-assembler-not "lvx" } } */ -/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +/* With recent enhancements to the code generator, it is considered + * legal to implement vec_extract with lvx and xxpermdi. Previous + * versions of this test forbid both instructions. */ diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c index ad424b25572..ddb0089c396 100644 --- a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c @@ -7,10 +7,10 @@ /* { dg-final { scan-assembler-times "xvabsdp" 1 } } */ /* { dg-final { scan-assembler-times "xvadddp" 1 } } */ -/* { dg-final { scan-assembler-times "xxlnor" 6 } } */ -/* { dg-final { scan-assembler-times "xxlor" 16 } } */ +/* { dg-final { scan-assembler-times "xxlnor" 8 } } */ +/* { dg-final { scan-assembler-times "xxlor" 30 } } */ /* { dg-final { scan-assembler-times "xvcmpeqdp" 5 } } */ -/* { dg-final { scan-assembler-times "xvcmpgtdp" 7 } } */ +/* { dg-final { scan-assembler-times "xvcmpgtdp" 8 } } */ /* { dg-final { scan-assembler-times "xvcmpgedp" 6 } } */ /* { dg-final { scan-assembler-times "xvrdpim" 1 } } */ /* { dg-final { scan-assembler-times "xvmaddadp" 1 } } */ @@ -26,7 +26,7 @@ /* { dg-final { scan-assembler-times "xvmsubasp" 1 } } */ /* { dg-final { scan-assembler-times "xvnmaddasp" 1 } } */ /* { dg-final { scan-assembler-times "vmsumshs" 1 } } */ -/* { dg-final { scan-assembler-times "xxland" 9 } } */ +/* { dg-final { scan-assembler-times "xxland" 13 } } */ /* Source code for the test in vsx-vector-6.h */ #include "vsx-vector-6.h" diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 34b676db99e..b69e3645bad 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,10 @@ +2018-01-10 Kelvin Nilsen + + * lex.c (search_line_fast): Remove illegal coercion of an + unaligned pointer value to vector pointer type and replace with + use of __builtin_vec_vsx_ld () built-in function, which operates + on unaligned pointer values. + 2018-01-03 Jakub Jelinek Update copyright years. diff --git a/libcpp/lex.c b/libcpp/lex.c index 860b31ea6c3..92c62517a4d 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -568,7 +568,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) { vc m_nl, m_cr, m_bs, m_qm; - data = *((const vc *)s); + data = __builtin_vec_vsx_ld (0, s); s += 16; m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);