From 006ba5047cea15ce6f29b0847009ae901b874d50 Mon Sep 17 00:00:00 2001 From: Ilya Enkovich Date: Tue, 29 Sep 2015 09:32:40 +0000 Subject: [PATCH] re PR target/65105 ([i386] XMM registers are not used for 64bit computations on 32bit target) gcc/ PR target/65105 * config/i386/i386.c: Include dbgcnt.h. (has_non_address_hard_reg): New. (convertible_comparison_p): New. (scalar_to_vector_candidate_p): New. (remove_non_convertible_regs): New. (scalar_chain): New. (scalar_chain::scalar_chain): New. (scalar_chain::~scalar_chain): New. (scalar_chain::add_to_queue): New. (scalar_chain::mark_dual_mode_def): New. (scalar_chain::analyze_register_chain): New. (scalar_chain::add_insn): New. (scalar_chain::build): New. (scalar_chain::compute_convert_gain): New. (scalar_chain::replace_with_subreg): New. (scalar_chain::replace_with_subreg_in_insn): New. (scalar_chain::emit_conversion_insns): New. (scalar_chain::make_vector_copies): New. (scalar_chain::convert_reg): New. (scalar_chain::convert_op): New. (scalar_chain::convert_insn): New. (scalar_chain::convert): New. (convert_scalars_to_vector): New. (pass_data_stv): New. (pass_stv): New. (make_pass_stv): New. (ix86_option_override): Created and register stv pass. (flag_opts): Add -mstv. (ix86_option_override_internal): Likewise. * config/i386/i386.md (SWIM1248x): New. (*movdi_internal): Add xmm to mem alternative for TARGET_STV. (and3): Use SWIM1248x iterator instead of SWIM. (*anddi3_doubleword): New. (*zext_doubleword): New. (*zextsi_doubleword): New. (3): Use SWIM1248x iterator instead of SWIM. (*di3_doubleword): New. * config/i386/i386.opt (mstv): New. * dbgcnt.def (stv_conversion): New. gcc/testsuite/ PR target/65105 * gcc.target/i386/pr65105-1.c: New. * gcc.target/i386/pr65105-2.c: New. * gcc.target/i386/pr65105-3.c: New. * gcc.target/i386/pr65105-4.C: New. * gcc.dg/lower-subreg-1.c: Add -mno-stv options for ia32. From-SVN: r228231 --- gcc/ChangeLog | 43 + gcc/config/i386/i386.c | 951 ++++++++++++++++++++++ gcc/config/i386/i386.md | 112 ++- gcc/config/i386/i386.opt | 5 + gcc/dbgcnt.def | 1 + gcc/testsuite/ChangeLog | 9 + gcc/testsuite/gcc.dg/lower-subreg-1.c | 1 + gcc/testsuite/gcc.target/i386/pr65105-1.c | 50 ++ gcc/testsuite/gcc.target/i386/pr65105-2.c | 12 + gcc/testsuite/gcc.target/i386/pr65105-3.c | 16 + gcc/testsuite/gcc.target/i386/pr65105-4.C | 19 + 11 files changed, 1199 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr65105-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr65105-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr65105-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr65105-4.C diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c6140493cee..12de2e10ae7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,46 @@ +2015-09-29 Ilya Enkovich + + PR target/65105 + * config/i386/i386.c: Include dbgcnt.h. + (has_non_address_hard_reg): New. + (convertible_comparison_p): New. + (scalar_to_vector_candidate_p): New. + (remove_non_convertible_regs): New. + (scalar_chain): New. + (scalar_chain::scalar_chain): New. + (scalar_chain::~scalar_chain): New. + (scalar_chain::add_to_queue): New. + (scalar_chain::mark_dual_mode_def): New. + (scalar_chain::analyze_register_chain): New. + (scalar_chain::add_insn): New. + (scalar_chain::build): New. + (scalar_chain::compute_convert_gain): New. + (scalar_chain::replace_with_subreg): New. + (scalar_chain::replace_with_subreg_in_insn): New. + (scalar_chain::emit_conversion_insns): New. + (scalar_chain::make_vector_copies): New. + (scalar_chain::convert_reg): New. + (scalar_chain::convert_op): New. + (scalar_chain::convert_insn): New. + (scalar_chain::convert): New. + (convert_scalars_to_vector): New. + (pass_data_stv): New. + (pass_stv): New. + (make_pass_stv): New. + (ix86_option_override): Created and register stv pass. + (flag_opts): Add -mstv. + (ix86_option_override_internal): Likewise. + * config/i386/i386.md (SWIM1248x): New. + (*movdi_internal): Add xmm to mem alternative for TARGET_STV. + (and3): Use SWIM1248x iterator instead of SWIM. + (*anddi3_doubleword): New. + (*zext_doubleword): New. + (*zextsi_doubleword): New. + (3): Use SWIM1248x iterator instead of SWIM. + (*di3_doubleword): New. + * config/i386/i386.opt (mstv): New. + * dbgcnt.def (stv_conversion): New. + 2015-09-29 Tom de Vries * tree-cfg.c (dump_function_to_file): Dump function attributes. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d370521430b..6f2380f2821 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -87,6 +87,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-iterator.h" #include "tree-chkp.h" #include "rtl-chkp.h" +#include "dbgcnt.h" /* This file should be included last. */ #include "target-def.h" @@ -2602,6 +2603,908 @@ rest_of_handle_insert_vzeroupper (void) return 0; } +/* Return 1 if INSN uses or defines a hard register. + Hard register uses in a memory address are ignored. + Clobbers and flags definitions are ignored. */ + +static bool +has_non_address_hard_reg (rtx_insn *insn) +{ + df_ref ref; + FOR_EACH_INSN_DEF (ref, insn) + if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) + && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) + && DF_REF_REGNO (ref) != FLAGS_REG) + return true; + + FOR_EACH_INSN_USE (ref, insn) + if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) + return true; + + return false; +} + +/* Check if comparison INSN may be transformed + into vector comparison. Currently we transform + zero checks only which look like: + + (set (reg:CCZ 17 flags) + (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) + (subreg:SI (reg:DI x) 0)) + (const_int 0 [0]))) */ + +static bool +convertible_comparison_p (rtx_insn *insn) +{ + if (!TARGET_SSE4_1) + return false; + + rtx def_set = single_set (insn); + + gcc_assert (def_set); + + rtx src = SET_SRC (def_set); + rtx dst = SET_DEST (def_set); + + gcc_assert (GET_CODE (src) == COMPARE); + + if (GET_CODE (dst) != REG + || REGNO (dst) != FLAGS_REG + || GET_MODE (dst) != CCZmode) + return false; + + rtx op1 = XEXP (src, 0); + rtx op2 = XEXP (src, 1); + + if (op2 != CONST0_RTX (GET_MODE (op2))) + return false; + + if (GET_CODE (op1) != IOR) + return false; + + op2 = XEXP (op1, 1); + op1 = XEXP (op1, 0); + + if (!SUBREG_P (op1) + || !SUBREG_P (op2) + || GET_MODE (op1) != SImode + || GET_MODE (op2) != SImode + || ((SUBREG_BYTE (op1) != 0 + || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) + && (SUBREG_BYTE (op2) != 0 + || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) + return false; + + op1 = SUBREG_REG (op1); + op2 = SUBREG_REG (op2); + + if (op1 != op2 + || !REG_P (op1) + || GET_MODE (op1) != DImode) + return false; + + return true; +} + +/* Return 1 if INSN may be converted into vector + instruction. */ + +static bool +scalar_to_vector_candidate_p (rtx_insn *insn) +{ + rtx def_set = single_set (insn); + + if (!def_set) + return false; + + if (has_non_address_hard_reg (insn)) + return false; + + rtx src = SET_SRC (def_set); + rtx dst = SET_DEST (def_set); + + if (GET_CODE (src) == COMPARE) + return convertible_comparison_p (insn); + + /* We are interested in DImode promotion only. */ + if (GET_MODE (src) != DImode + || GET_MODE (dst) != DImode) + return false; + + if (!REG_P (dst) && !MEM_P (dst)) + return false; + + switch (GET_CODE (src)) + { + case PLUS: + case MINUS: + case IOR: + case XOR: + case AND: + break; + + case REG: + return true; + + case MEM: + return REG_P (dst); + + default: + return false; + } + + if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0))) + return false; + + if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) + return false; + + if (GET_MODE (XEXP (src, 0)) != DImode + || GET_MODE (XEXP (src, 1)) != DImode) + return false; + + return true; +} + +/* For a given bitmap of insn UIDs scans all instruction and + remove insn from CANDIDATES in case it has both convertible + and not convertible definitions. + + All insns in a bitmap are conversion candidates according to + scalar_to_vector_candidate_p. Currently it implies all insns + are single_set. */ + +static void +remove_non_convertible_regs (bitmap candidates) +{ + bitmap_iterator bi; + unsigned id; + bitmap regs = BITMAP_ALLOC (NULL); + + EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) + { + rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); + rtx reg = SET_DEST (def_set); + + if (!REG_P (reg) + || bitmap_bit_p (regs, REGNO (reg)) + || HARD_REGISTER_P (reg)) + continue; + + for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg)); + def; + def = DF_REF_NEXT_REG (def)) + { + if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) + { + if (dump_file) + fprintf (dump_file, + "r%d has non convertible definition in insn %d\n", + REGNO (reg), DF_REF_INSN_UID (def)); + + bitmap_set_bit (regs, REGNO (reg)); + break; + } + } + } + + EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) + { + for (df_ref def = DF_REG_DEF_CHAIN (id); + def; + def = DF_REF_NEXT_REG (def)) + if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) + { + if (dump_file) + fprintf (dump_file, "Removing insn %d from candidates list\n", + DF_REF_INSN_UID (def)); + + bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); + } + } + + BITMAP_FREE (regs); +} + +class scalar_chain +{ + public: + scalar_chain (); + ~scalar_chain (); + + static unsigned max_id; + + /* ID of a chain. */ + unsigned int chain_id; + /* A queue of instructions to be included into a chain. */ + bitmap queue; + /* Instructions included into a chain. */ + bitmap insns; + /* All registers defined by a chain. */ + bitmap defs; + /* Registers used in both vector and sclar modes. */ + bitmap defs_conv; + + void build (bitmap candidates, unsigned insn_uid); + int compute_convert_gain (); + int convert (); + + private: + void add_insn (bitmap candidates, unsigned insn_uid); + void add_to_queue (unsigned insn_uid); + void mark_dual_mode_def (df_ref def); + void analyze_register_chain (bitmap candidates, df_ref ref); + rtx replace_with_subreg (rtx x, rtx reg, rtx subreg); + void emit_conversion_insns (rtx insns, rtx_insn *pos); + void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg); + void convert_insn (rtx_insn *insn); + void convert_op (rtx *op, rtx_insn *insn); + void convert_reg (unsigned regno); + void make_vector_copies (unsigned regno); +}; + +unsigned scalar_chain::max_id = 0; + +/* Initialize new chain. */ + +scalar_chain::scalar_chain () +{ + chain_id = ++max_id; + + if (dump_file) + fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); + + bitmap_obstack_initialize (NULL); + insns = BITMAP_ALLOC (NULL); + defs = BITMAP_ALLOC (NULL); + defs_conv = BITMAP_ALLOC (NULL); + queue = NULL; +} + +/* Free chain's data. */ + +scalar_chain::~scalar_chain () +{ + BITMAP_FREE (insns); + BITMAP_FREE (defs); + BITMAP_FREE (defs_conv); + bitmap_obstack_release (NULL); +} + +/* Add instruction into chains' queue. */ + +void +scalar_chain::add_to_queue (unsigned insn_uid) +{ + if (bitmap_bit_p (insns, insn_uid) + || bitmap_bit_p (queue, insn_uid)) + return; + + if (dump_file) + fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", + insn_uid, chain_id); + bitmap_set_bit (queue, insn_uid); +} + +/* Mark register defined by DEF as requiring conversion. */ + +void +scalar_chain::mark_dual_mode_def (df_ref def) +{ + gcc_assert (DF_REF_REG_DEF_P (def)); + + if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def))) + return; + + if (dump_file) + fprintf (dump_file, + " Mark r%d def in insn %d as requiring both modes in chain #%d\n", + DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); + + bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); +} + +/* Check REF's chain to add new insns into a queue + and find registers requiring conversion. */ + +void +scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) +{ + df_link *chain; + + gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) + || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); + add_to_queue (DF_REF_INSN_UID (ref)); + + for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) + { + unsigned uid = DF_REF_INSN_UID (chain->ref); + if (!DF_REF_REG_MEM_P (chain->ref)) + { + if (bitmap_bit_p (insns, uid)) + continue; + + if (bitmap_bit_p (candidates, uid)) + { + add_to_queue (uid); + continue; + } + } + + if (DF_REF_REG_DEF_P (chain->ref)) + { + if (dump_file) + fprintf (dump_file, " r%d def in insn %d isn't convertible\n", + DF_REF_REGNO (chain->ref), uid); + mark_dual_mode_def (chain->ref); + } + else + { + if (dump_file) + fprintf (dump_file, " r%d use in insn %d isn't convertible\n", + DF_REF_REGNO (chain->ref), uid); + mark_dual_mode_def (ref); + } + } +} + +/* Add instruction into a chain. */ + +void +scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) +{ + if (bitmap_bit_p (insns, insn_uid)) + return; + + if (dump_file) + fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); + + bitmap_set_bit (insns, insn_uid); + + rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; + rtx def_set = single_set (insn); + if (def_set && REG_P (SET_DEST (def_set)) + && !HARD_REGISTER_P (SET_DEST (def_set))) + bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); + + df_ref ref; + df_ref def; + for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) + if (!HARD_REGISTER_P (DF_REF_REG (ref))) + for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref)); + def; + def = DF_REF_NEXT_REG (def)) + analyze_register_chain (candidates, def); + for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) + if (!DF_REF_REG_MEM_P (ref)) + analyze_register_chain (candidates, ref); +} + +/* Build new chain starting from insn INSN_UID recursively + adding all dependent uses and definitions. */ + +void +scalar_chain::build (bitmap candidates, unsigned insn_uid) +{ + queue = BITMAP_ALLOC (NULL); + bitmap_set_bit (queue, insn_uid); + + if (dump_file) + fprintf (dump_file, "Building chain #%d...\n", chain_id); + + while (!bitmap_empty_p (queue)) + { + insn_uid = bitmap_first_set_bit (queue); + bitmap_clear_bit (queue, insn_uid); + bitmap_clear_bit (candidates, insn_uid); + add_insn (candidates, insn_uid); + } + + if (dump_file) + { + fprintf (dump_file, "Collected chain #%d...\n", chain_id); + fprintf (dump_file, " insns: "); + dump_bitmap (dump_file, insns); + if (!bitmap_empty_p (defs_conv)) + { + bitmap_iterator bi; + unsigned id; + const char *comma = ""; + fprintf (dump_file, " defs to convert: "); + EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) + { + fprintf (dump_file, "%sr%d", comma, id); + comma = ", "; + } + fprintf (dump_file, "\n"); + } + } + + BITMAP_FREE (queue); +} + +/* Compute a gain for chain conversion. */ + +int +scalar_chain::compute_convert_gain () +{ + bitmap_iterator bi; + unsigned insn_uid; + int gain = 0; + int cost = 0; + + if (dump_file) + fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); + + EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) + { + rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; + rtx def_set = single_set (insn); + rtx src = SET_SRC (def_set); + rtx dst = SET_DEST (def_set); + + if (REG_P (src) && REG_P (dst)) + gain += COSTS_N_INSNS (2) - ix86_cost->sse_move; + else if (REG_P (src) && MEM_P (dst)) + gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; + else if (MEM_P (src) && REG_P (dst)) + gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1]; + else if (GET_CODE (src) == PLUS + || GET_CODE (src) == MINUS + || GET_CODE (src) == IOR + || GET_CODE (src) == XOR + || GET_CODE (src) == AND) + gain += ix86_cost->add; + else if (GET_CODE (src) == COMPARE) + { + /* Assume comparison cost is the same. */ + } + else + gcc_unreachable (); + } + + if (dump_file) + fprintf (dump_file, " Instruction convertion gain: %d\n", gain); + + EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi) + cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer; + + if (dump_file) + fprintf (dump_file, " Registers convertion cost: %d\n", cost); + + gain -= cost; + + if (dump_file) + fprintf (dump_file, " Total gain: %d\n", gain); + + return gain; +} + +/* Replace REG in X with a V2DI subreg of NEW_REG. */ + +rtx +scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg) +{ + if (x == reg) + return gen_rtx_SUBREG (V2DImode, new_reg, 0); + + const char *fmt = GET_RTX_FORMAT (GET_CODE (x)); + int i, j; + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg); + else if (fmt[i] == 'E') + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j), + reg, new_reg); + } + + return x; +} + +/* Replace REG in INSN with a V2DI subreg of NEW_REG. */ + +void +scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg) +{ + replace_with_subreg (single_set (insn), reg, new_reg); +} + +/* Insert generated conversion instruction sequence INSNS + after instruction AFTER. New BB may be required in case + instruction has EH region attached. */ + +void +scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) +{ + if (!control_flow_insn_p (after)) + { + emit_insn_after (insns, after); + return; + } + + basic_block bb = BLOCK_FOR_INSN (after); + edge e = find_fallthru_edge (bb->succs); + gcc_assert (e); + + basic_block new_bb = split_edge (e); + emit_insn_after (insns, BB_HEAD (new_bb)); +} + +/* Make vector copies for all register REGNO definitions + and replace its uses in a chain. */ + +void +scalar_chain::make_vector_copies (unsigned regno) +{ + rtx reg = regno_reg_rtx[regno]; + rtx vreg = gen_reg_rtx (DImode); + df_ref ref; + + for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) + if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) + { + rtx_insn *insn = DF_REF_INSN (ref); + + start_sequence (); + if (TARGET_SSE4_1) + { + emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), + CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, reg, 0))); + emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), + gen_rtx_SUBREG (V4SImode, vreg, 0), + gen_rtx_SUBREG (SImode, reg, 4), + GEN_INT (2))); + } + else if (TARGET_INTER_UNIT_MOVES_TO_VEC) + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), + CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, reg, 0))); + emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), + CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, reg, 4))); + emit_insn (gen_vec_interleave_lowv4si + (gen_rtx_SUBREG (V4SImode, vreg, 0), + gen_rtx_SUBREG (V4SImode, vreg, 0), + gen_rtx_SUBREG (V4SImode, tmp, 0))); + } + else + { + rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP); + emit_move_insn (adjust_address (tmp, SImode, 0), + gen_rtx_SUBREG (SImode, reg, 0)); + emit_move_insn (adjust_address (tmp, SImode, 4), + gen_rtx_SUBREG (SImode, reg, 4)); + emit_move_insn (vreg, tmp); + } + emit_conversion_insns (get_insns (), insn); + end_sequence (); + + if (dump_file) + fprintf (dump_file, + " Copied r%d to a vector register r%d for insn %d\n", + regno, REGNO (vreg), DF_REF_INSN_UID (ref)); + } + + for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) + if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) + { + replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg); + + if (dump_file) + fprintf (dump_file, " Replaced r%d with r%d in insn %d\n", + regno, REGNO (vreg), DF_REF_INSN_UID (ref)); + } +} + +/* Convert all definitions of register REGNO + and fix its uses. Scalar copies may be created + in case register is used in not convertible insn. */ + +void +scalar_chain::convert_reg (unsigned regno) +{ + bool scalar_copy = bitmap_bit_p (defs_conv, regno); + rtx reg = regno_reg_rtx[regno]; + rtx scopy = NULL_RTX; + df_ref ref; + bitmap conv; + + conv = BITMAP_ALLOC (NULL); + bitmap_copy (conv, insns); + + if (scalar_copy) + scopy = gen_reg_rtx (DImode); + + for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) + { + rtx_insn *insn = DF_REF_INSN (ref); + rtx def_set = single_set (insn); + rtx src = SET_SRC (def_set); + rtx reg = DF_REF_REG (ref); + + if (!MEM_P (src)) + { + replace_with_subreg_in_insn (insn, reg, reg); + bitmap_clear_bit (conv, INSN_UID (insn)); + } + + if (scalar_copy) + { + rtx vcopy = gen_reg_rtx (V2DImode); + + start_sequence (); + if (TARGET_INTER_UNIT_MOVES_FROM_VEC) + { + emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0)); + emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), + gen_rtx_SUBREG (SImode, vcopy, 0)); + emit_move_insn (vcopy, + gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32))); + emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), + gen_rtx_SUBREG (SImode, vcopy, 0)); + } + else + { + rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP); + emit_move_insn (tmp, reg); + emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), + adjust_address (tmp, SImode, 0)); + emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), + adjust_address (tmp, SImode, 4)); + } + emit_conversion_insns (get_insns (), insn); + end_sequence (); + + if (dump_file) + fprintf (dump_file, + " Copied r%d to a scalar register r%d for insn %d\n", + regno, REGNO (scopy), INSN_UID (insn)); + } + } + + for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) + if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) + { + if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref))) + { + rtx def_set = single_set (DF_REF_INSN (ref)); + if (!MEM_P (SET_DEST (def_set)) + || !REG_P (SET_SRC (def_set))) + replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg); + bitmap_clear_bit (conv, DF_REF_INSN_UID (ref)); + } + } + else + { + replace_rtx (DF_REF_INSN (ref), reg, scopy); + df_insn_rescan (DF_REF_INSN (ref)); + } + + BITMAP_FREE (conv); +} + +/* Convert operand OP in INSN. All register uses + are converted during registers conversion. + Therefore we should just handle memory operands. */ + +void +scalar_chain::convert_op (rtx *op, rtx_insn *insn) +{ + *op = copy_rtx_if_shared (*op); + + if (MEM_P (*op)) + { + rtx tmp = gen_reg_rtx (DImode); + + emit_insn_before (gen_move_insn (tmp, *op), insn); + *op = gen_rtx_SUBREG (V2DImode, tmp, 0); + + if (dump_file) + fprintf (dump_file, " Preloading operand for insn %d into r%d\n", + INSN_UID (insn), REGNO (tmp)); + } + else + { + gcc_assert (SUBREG_P (*op)); + gcc_assert (GET_MODE (*op) == V2DImode); + } +} + +/* Convert INSN to vector mode. */ + +void +scalar_chain::convert_insn (rtx_insn *insn) +{ + rtx def_set = single_set (insn); + rtx src = SET_SRC (def_set); + rtx dst = SET_DEST (def_set); + rtx subreg; + + if (MEM_P (dst) && !REG_P (src)) + { + /* There are no scalar integer instructions and therefore + temporary register usage is required. */ + rtx tmp = gen_reg_rtx (DImode); + emit_conversion_insns (gen_move_insn (dst, tmp), insn); + dst = gen_rtx_SUBREG (V2DImode, tmp, 0); + } + + switch (GET_CODE (src)) + { + case PLUS: + case MINUS: + case IOR: + case XOR: + case AND: + convert_op (&XEXP (src, 0), insn); + convert_op (&XEXP (src, 1), insn); + PUT_MODE (src, V2DImode); + break; + + case MEM: + if (!REG_P (dst)) + convert_op (&src, insn); + break; + + case REG: + break; + + case SUBREG: + gcc_assert (GET_MODE (src) == V2DImode); + break; + + case COMPARE: + src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); + + gcc_assert ((REG_P (src) && GET_MODE (src) == DImode) + || (SUBREG_P (src) && GET_MODE (src) == V2DImode)); + + if (REG_P (src)) + subreg = gen_rtx_SUBREG (V2DImode, src, 0); + else + subreg = copy_rtx_if_shared (src); + emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), + copy_rtx_if_shared (subreg), + copy_rtx_if_shared (subreg)), + insn); + dst = gen_rtx_REG (CCmode, FLAGS_REG); + src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src), + copy_rtx_if_shared (src)), + UNSPEC_PTEST); + break; + + default: + gcc_unreachable (); + } + + SET_SRC (def_set) = src; + SET_DEST (def_set) = dst; + + /* Drop possible dead definitions. */ + PATTERN (insn) = def_set; + + INSN_CODE (insn) = -1; + recog_memoized (insn); + df_insn_rescan (insn); +} + +/* Convert whole chain creating required register + conversions and copies. */ + +int +scalar_chain::convert () +{ + bitmap_iterator bi; + unsigned id; + int converted_insns = 0; + + if (!dbg_cnt (stv_conversion)) + return 0; + + if (dump_file) + fprintf (dump_file, "Converting chain #%d...\n", chain_id); + + EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi) + convert_reg (id); + + EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi) + make_vector_copies (id); + + EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) + { + convert_insn (DF_INSN_UID_GET (id)->insn); + converted_insns++; + } + + return converted_insns; +} + +/* Main STV pass function. Find and convert scalar + instructions into vector mode when profitable. */ + +static unsigned int +convert_scalars_to_vector () +{ + basic_block bb; + bitmap candidates; + int converted_insns = 0; + + bitmap_obstack_initialize (NULL); + candidates = BITMAP_ALLOC (NULL); + + calculate_dominance_info (CDI_DOMINATORS); + df_set_flags (DF_DEFER_INSN_RESCAN); + df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); + df_md_add_problem (); + df_analyze (); + + /* Find all instructions we want to convert into vector mode. */ + if (dump_file) + fprintf (dump_file, "Searching for mode convertion candidates...\n"); + + FOR_EACH_BB_FN (bb, cfun) + { + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (scalar_to_vector_candidate_p (insn)) + { + if (dump_file) + fprintf (dump_file, " insn %d is marked as a candidate\n", + INSN_UID (insn)); + + bitmap_set_bit (candidates, INSN_UID (insn)); + } + } + + remove_non_convertible_regs (candidates); + + if (bitmap_empty_p (candidates)) + if (dump_file) + fprintf (dump_file, "There are no candidates for optimization.\n"); + + while (!bitmap_empty_p (candidates)) + { + unsigned uid = bitmap_first_set_bit (candidates); + scalar_chain chain; + + /* Find instructions chain we want to convert to vector mode. + Check all uses and definitions to estimate all required + conversions. */ + chain.build (candidates, uid); + + if (chain.compute_convert_gain () > 0) + converted_insns += chain.convert (); + else + if (dump_file) + fprintf (dump_file, "Chain #%d conversion is not profitable\n", + chain.chain_id); + } + + if (dump_file) + fprintf (dump_file, "Total insns converted: %d\n", converted_insns); + + BITMAP_FREE (candidates); + bitmap_obstack_release (NULL); + df_process_deferred_rescans (); + + /* Conversion means we may have 128bit register spills/fills + which require aligned stack. */ + if (converted_insns) + { + if (crtl->stack_alignment_needed < 128) + crtl->stack_alignment_needed = 128; + if (crtl->stack_alignment_estimated < 128) + crtl->stack_alignment_estimated = 128; + } + + return 0; +} + namespace { const pass_data pass_data_insert_vzeroupper = @@ -2639,6 +3542,39 @@ public: }; // class pass_insert_vzeroupper +const pass_data pass_data_stv = +{ + RTL_PASS, /* type */ + "stv", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_stv : public rtl_opt_pass +{ +public: + pass_stv (gcc::context *ctxt) + : rtl_opt_pass (pass_data_stv, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1; + } + + virtual unsigned int execute (function *) + { + return convert_scalars_to_vector (); + } + +}; // class pass_stv + } // anon namespace rtl_opt_pass * @@ -2647,6 +3583,12 @@ make_pass_insert_vzeroupper (gcc::context *ctxt) return new pass_insert_vzeroupper (ctxt); } +rtl_opt_pass * +make_pass_stv (gcc::context *ctxt) +{ + return new pass_stv (ctxt); +} + /* Return true if a red-zone is in use. */ static inline bool @@ -2756,6 +3698,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS }, { "-m8bit-idiv", MASK_USE_8BIT_IDIV }, { "-mvzeroupper", MASK_VZEROUPPER }, + { "-mstv", MASK_STV}, { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD}, { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE}, { "-mprefer-avx128", MASK_PREFER_AVX128}, @@ -4372,6 +5315,8 @@ ix86_option_override_internal (bool main_args_p, if (!(opts_set->x_target_flags & MASK_VZEROUPPER)) opts->x_target_flags |= MASK_VZEROUPPER; + if (!(opts_set->x_target_flags & MASK_STV)) + opts->x_target_flags |= MASK_STV; if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; @@ -4485,12 +5430,18 @@ ix86_option_override (void) = { pass_insert_vzeroupper, "reload", 1, PASS_POS_INSERT_AFTER }; + opt_pass *pass_stv = make_pass_stv (g); + struct register_pass_info stv_info + = { pass_stv, "combine", + 1, PASS_POS_INSERT_AFTER + }; ix86_option_override_internal (true, &global_options, &global_options_set); /* This needs to be done at start up. It's convenient to do it here. */ register_pass (&insert_vzeroupper_info); + register_pass (&stv_info); } /* Implement the TARGET_OFFLOAD_OPTIONS hook. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ba5ab328e32..8c2ed606dee 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -981,6 +981,11 @@ (HI "TARGET_HIMODE_MATH") SI]) +;; Math-dependant integer modes with DImode. +(define_mode_iterator SWIM1248x [(QI "TARGET_QIMODE_MATH") + (HI "TARGET_HIMODE_MATH") + SI (DI "(TARGET_STV && TARGET_SSE2) || TARGET_64BIT")]) + ;; Math-dependant single word integer modes without QImode. (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH") SI (DI "TARGET_64BIT")]) @@ -2097,9 +2102,9 @@ (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m") + "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m") (match_operand:DI 1 "general_operand" - "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn ,*r ,*km,*k,*k"))] + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,v,*Yj,*v,r ,*Yj ,*Yn ,*r ,*km,*k,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2177,9 +2182,9 @@ [(set (attr "isa") (cond [(eq_attr "alternative" "0,1") (const_string "nox64") - (eq_attr "alternative" "2,3,4,5,10,11,16,18,21,23") + (eq_attr "alternative" "2,3,4,5,10,11,17,19,22,24") (const_string "x64") - (eq_attr "alternative" "17") + (eq_attr "alternative" "18") (const_string "x64_sse4") ] (const_string "*"))) @@ -2190,13 +2195,13 @@ (const_string "mmx") (eq_attr "alternative" "7,8,9,10,11") (const_string "mmxmov") - (eq_attr "alternative" "12,17") + (eq_attr "alternative" "12,18") (const_string "sselog1") - (eq_attr "alternative" "13,14,15,16,18") + (eq_attr "alternative" "13,14,15,16,17,19") (const_string "ssemov") - (eq_attr "alternative" "19,20") + (eq_attr "alternative" "20,21") (const_string "ssecvt") - (eq_attr "alternative" "21,22,23,24") + (eq_attr "alternative" "22,23,24,25") (const_string "mskmov") (and (match_operand 0 "register_operand") (match_operand 1 "pic_32bit_operand")) @@ -2211,16 +2216,16 @@ (set (attr "length_immediate") (cond [(and (eq_attr "alternative" "4") (eq_attr "type" "imov")) (const_string "8") - (eq_attr "alternative" "17") + (eq_attr "alternative" "18") (const_string "1") ] (const_string "*"))) (set (attr "prefix_rex") - (if_then_else (eq_attr "alternative" "10,11,16,17,18") + (if_then_else (eq_attr "alternative" "10,11,17,18,19") (const_string "1") (const_string "*"))) (set (attr "prefix_extra") - (if_then_else (eq_attr "alternative" "17") + (if_then_else (eq_attr "alternative" "18") (const_string "1") (const_string "*"))) (set (attr "prefix") @@ -2248,13 +2253,26 @@ ] (const_string "TI")) - (and (eq_attr "alternative" "14,15") + (and (eq_attr "alternative" "14,15,16") (not (match_test "TARGET_SSE2"))) (const_string "V2SF") - (eq_attr "alternative" "17") + (eq_attr "alternative" "18") (const_string "TI") ] - (const_string "DI")))]) + (const_string "DI"))) + (set (attr "enabled") + (cond [(eq_attr "alternative" "15") + (if_then_else + (match_test "TARGET_STV && TARGET_SSE2") + (symbol_ref "false") + (const_string "*")) + (eq_attr "alternative" "16") + (if_then_else + (match_test "TARGET_STV && TARGET_SSE2") + (symbol_ref "true") + (symbol_ref "false")) + ] + (const_string "*")))]) (define_split [(set (match_operand:DI 0 "nonimmediate_operand") @@ -3814,6 +3832,26 @@ "movz{bl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) + +(define_insn_and_split "*zext_doubleword" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SWI12 1 "nonimmediate_operand" "m")))] + "!TARGET_64BIT && TARGET_STV && TARGET_SSE2" + "#" + "&& reload_completed && GENERAL_REG_P (operands[0])" + [(set (match_dup 0) (zero_extend:SI (match_dup 1))) + (set (match_dup 2) (const_int 0))] + "split_double_mode (DImode, &operands[0], 1, &operands[0], &operands[2]);") + +(define_insn_and_split "*zextsi_doubleword" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))] + "!TARGET_64BIT && TARGET_STV && TARGET_SSE2" + "#" + "&& reload_completed && GENERAL_REG_P (operands[0])" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (const_int 0))] + "split_double_mode (DImode, &operands[0], 1, &operands[0], &operands[2]);") ;; Sign extension instructions @@ -7863,9 +7901,9 @@ ;; it should be done with splitters. (define_expand "and3" - [(set (match_operand:SWIM 0 "nonimmediate_operand") - (and:SWIM (match_operand:SWIM 1 "nonimmediate_operand") - (match_operand:SWIM 2 "")))] + [(set (match_operand:SWIM1248x 0 "nonimmediate_operand") + (and:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand") + (match_operand:SWIM1248x 2 "")))] "" { machine_mode mode = mode; @@ -7943,6 +7981,23 @@ (const_string "*"))) (set_attr "mode" "SI,DI,DI,SI,DI")]) +(define_insn_and_split "*anddi3_doubleword" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r") + (and:DI + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_STV && TARGET_SSE2 && ix86_binary_operator_ok (AND, DImode, operands)" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (and:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 3) + (and:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);") + (define_insn "*andsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya,!k") (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm,k") @@ -8430,9 +8485,9 @@ ;; If this is considered useful, it should be done with splitters. (define_expand "3" - [(set (match_operand:SWIM 0 "nonimmediate_operand") - (any_or:SWIM (match_operand:SWIM 1 "nonimmediate_operand") - (match_operand:SWIM 2 "")))] + [(set (match_operand:SWIM1248x 0 "nonimmediate_operand") + (any_or:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand") + (match_operand:SWIM1248x 2 "")))] "" "ix86_expand_binary_operator (, mode, operands); DONE;") @@ -8450,6 +8505,23 @@ [(set_attr "type" "alu,alu,msklog") (set_attr "mode" "")]) +(define_insn_and_split "*di3_doubleword" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r") + (any_or:DI + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_STV && TARGET_SSE2 && ix86_binary_operator_ok (, DImode, operands)" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (any_or:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 3) + (any_or:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);") + (define_insn "*hi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!k") (any_or:HI diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 042f3c1ab20..dae5c5d5464 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -567,6 +567,11 @@ Target Report Mask(VZEROUPPER) Save Generate vzeroupper instruction before a transfer of control flow out of the function. +mstv +Target Report Mask(STV) Save +Disable Scalar to Vector optimization pass transforming 64-bit integer +computations into a vector ones. + mdispatch-scheduler Target RejectNegative Var(flag_dispatch_scheduler) Do dispatch scheduling if processor is bdver1 or bdver2 or bdver3 or bdver4 and Haifa scheduling diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def index 95f6b063f7a..583b16b0f4f 100644 --- a/gcc/dbgcnt.def +++ b/gcc/dbgcnt.def @@ -186,6 +186,7 @@ DEBUG_COUNTER (sel_sched_region_cnt) DEBUG_COUNTER (sms_sched_loop) DEBUG_COUNTER (split_for_sched2) DEBUG_COUNTER (store_motion) +DEBUG_COUNTER (stv_conversion) DEBUG_COUNTER (tail_call) DEBUG_COUNTER (treepre_insert) DEBUG_COUNTER (tree_sra) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e01d04cac88..26f4911c365 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2015-09-29 Ilya Enkovich + + PR target/65105 + * gcc.target/i386/pr65105-1.c: New. + * gcc.target/i386/pr65105-2.c: New. + * gcc.target/i386/pr65105-3.c: New. + * gcc.target/i386/pr65105-4.C: New. + * gcc.dg/lower-subreg-1.c: Add -mno-stv options for ia32. + 2015-09-28 Segher Boessenkool * gcc.dg/asm-4.c: Use braced words for the regular expressions. diff --git a/gcc/testsuite/gcc.dg/lower-subreg-1.c b/gcc/testsuite/gcc.dg/lower-subreg-1.c index 6362d37a878..47057fe0afa 100644 --- a/gcc/testsuite/gcc.dg/lower-subreg-1.c +++ b/gcc/testsuite/gcc.dg/lower-subreg-1.c @@ -1,5 +1,6 @@ /* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */ /* { dg-options "-O -fdump-rtl-subreg1" } */ +/* { dg-additional-options "-mno-stv" { target ia32 } } */ /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */ /* { dg-require-effective-target ilp32 } */ diff --git a/gcc/testsuite/gcc.target/i386/pr65105-1.c b/gcc/testsuite/gcc.target/i386/pr65105-1.c new file mode 100644 index 00000000000..bac6c075ab0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr65105-1.c @@ -0,0 +1,50 @@ +/* PR target/pr65105 */ +/* { dg-do run { target { ia32 } } } */ +/* { dg-options "-O2 -march=slm" } */ +/* { dg-final { scan-assembler "por" } } */ +/* { dg-final { scan-assembler "pand" } } */ + +#include "stdlib.h" + +static int count = 0; + +void __attribute__((noinline)) +counter (long long l) +{ + count++; + if (!l || count > 5) + exit (1); +} + +void __attribute__((noinline)) +test (long long *arr) +{ + register unsigned long long tmp; + + tmp = arr[0] | arr[1] & arr[2]; + while (tmp) + { + counter (tmp); + tmp = *(arr++) & tmp; + } +} + +void __attribute__((noinline)) +fill_data (long long *arr) +{ + arr[0] = 0x00ffffffL; + arr[1] = 0xffffff00L; + arr[2] = 0x00ffffffL; + arr[3] = 0x0000ff00L; + arr[4] = 0x00ff0000L; + arr[5] = 0xff000000L; +} + +int +main (int argc, const char **argv) +{ + long long arr[6]; + fill_data (arr); + test (arr); + return count - 5; +} diff --git a/gcc/testsuite/gcc.target/i386/pr65105-2.c b/gcc/testsuite/gcc.target/i386/pr65105-2.c new file mode 100644 index 00000000000..92168942d11 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr65105-2.c @@ -0,0 +1,12 @@ +/* PR target/pr65105 */ +/* { dg-do compile { target { ia32 } } } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler "por" } } */ + +long long i1, i2, res; + +void +test () +{ + res = i1 | i2; +} diff --git a/gcc/testsuite/gcc.target/i386/pr65105-3.c b/gcc/testsuite/gcc.target/i386/pr65105-3.c new file mode 100644 index 00000000000..b83989fa4d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr65105-3.c @@ -0,0 +1,16 @@ +/* PR target/pr65105 */ +/* { dg-do compile { target { ia32 } } } */ +/* { dg-options "-O2 -march=slm -msse4.2" } */ +/* { dg-final { scan-assembler "pand" } } */ +/* { dg-final { scan-assembler "por" } } */ +/* { dg-final { scan-assembler "ptest" } } */ + +long long i1, i2, i3, res; + +void +test () +{ + res = i1 | i2; + if (res) + res &= i3; +} diff --git a/gcc/testsuite/gcc.target/i386/pr65105-4.C b/gcc/testsuite/gcc.target/i386/pr65105-4.C new file mode 100644 index 00000000000..9acf368e1fc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr65105-4.C @@ -0,0 +1,19 @@ +/* PR target/pr65105 */ +/* { dg-do run { target { ia32 } } } */ +/* { dg-options "-O2 -march=slm" } */ + +struct s { + long long l1, l2, l3, l4, l5; +} *a; +long long b; +long long fn1() +{ + try + { + b = (a->l1 | a->l2 | a->l3 | a->l4 | a->l5); + return a->l1; + } + catch (int) + { + } +} -- 2.30.2