From 841dbf801d85c65f02c04de31e7cf965a020c518 Mon Sep 17 00:00:00 2001 From: Oleg Endo Date: Sat, 24 Jan 2015 13:04:53 +0000 Subject: [PATCH] re PR target/49263 (SH Target: underutilized "TST #imm, R0" instruction) gcc/ PR target/49263 PR target/53987 PR target/64345 PR target/59533 PR target/52933 PR target/54236 PR target/51244 * config/sh/sh-protos.h (sh_extending_set_of_reg::can_use_as_unextended_reg, sh_extending_set_of_reg::use_as_unextended_reg, sh_is_nott_insn, sh_movt_set_dest, sh_movrt_set_dest, sh_is_movt_insn, sh_is_movrt_insn, sh_insn_operands_modified_between_p, sh_reg_dead_or_unused_after_insn, sh_in_recog_treg_set_expr, sh_recog_treg_set_expr, sh_split_treg_set_expr): New functions. (sh_treg_insns): New class. * config/sh/sh.c (TARGET_LEGITIMATE_COMBINED_INSN): Define target hook. (scope_counter): New class. (sh_legitimate_combined_insn, sh_is_nott_insn, sh_movt_set_dest, sh_movrt_set_dest, sh_reg_dead_or_unused_after_insn, sh_extending_set_of_reg::can_use_as_unextended_reg, sh_extending_set_of_reg::use_as_unextended_reg, sh_recog_treg_set_expr, sh_in_recog_treg_set_expr, sh_try_split_insn_simple, sh_split_treg_set_expr): New functions. (addsubcosts): Handle treg_set_expr. (sh_rtx_costs): Handle IF_THEN_ELSE and ZERO_EXTRACT. (sh_rtx_costs): Use arith_reg_operand in SIGN_EXTEND and ZERO_EXTEND. (sh_rtx_costs): Handle additional bit test patterns in EQ and AND cases. (sh_insn_operands_modified_between_p): Make non-static. * config/sh/predicates.md (zero_extend_movu_operand): Allow simple_mem_operand in addition to displacement_mem_operand. (zero_extend_operand): Don't allow zero_extend_movu_operand. (treg_set_expr, treg_set_expr_not_const01, arith_reg_or_treg_set_expr): New predicates. * config/sh/sh.md (tstsi_t): Use arith_reg_operand and arith_or_int_operand instead of logical_operand. Convert to insn_and_split. Try to optimize constant operand in splitter. (tsthi_t, tstqi_t): Fold into *tst_t. Convert to insn_and_split. (*tstqi_t_zero): Delete. (*tst_t_subregs): Add !sh_in_recog_treg_set_expr split condition. (tstsi_t_and_not): Delete. (tst_t_zero_extract_eq): Rename to *tst_t_zero_extract. Convert to insn_and_split. (unnamed split, tstsi_t_zero_extract_xor, tstsi_t_zero_extract_subreg_xor_little, tstsi_t_zero_extract_subreg_xor_big): Delete. (*tstsi_t_shift_mask): New insn_and_split. (cmpeqsi_t, cmpgesi_t): Add new split for const_int 0 operands and try to recombine with surrounding insns when splitting. (*negtstsi): Add !sh_in_recog_treg_set_expr condition. (cmp_div0s_0, cmp_div0s_1, *cmp_div0s_0, *cmp_div0s_1): Rewrite as ... (cmp_div0s, *cmp_div0s_1, *cmp_div0s_2, *cmp_div0s_3, *cmp_div0s_4, *cmp_div0s_5, *cmp_div0s_6): ... these new insn_and_split patterns. (*cbranch_div0s: Delete. (*addc): Convert to insn_and_split. Use treg_set_expr as 3rd operand. Try to recombine with surrounding insns when splitting. Add operand order variants. (*addc_t_r, *addc_r_t): Use treg_set_expr_not_const01. (*addc_r_r_1, *addc_r_lsb, *addc_r_r_lsb, *addc_r_lsb_r, *addc_r_msb, *addc_r_r_msb, *addc_2r_msb): Delete. (*addc_2r_lsb): Rename to *addc_2r_t. Use treg_set_expr. Add operand order variant. (*addc_negreg_t): New insn_and_split. (*subc): Convert to insn_and_split. Use treg_set_expr as 3rd operand. Try to recombine with surrounding insns when splitting. Add operand order variants. (*subc_negt_reg, *subc_negreg_t, *reg_lsb_t, *reg_msb_t): New insn_and_split patterns. (*rotcr): Use arith_reg_or_treg_set_expr. Try to recombine with surrounding insns when splitting. (unnamed rotcr split): Use arith_reg_or_treg_set_expr. (*rotcl): Likewise. Add zero_extract variant. (*ashrsi2_31): New insn_and_split. (*negc): Convert to insn_and_split. Use treg_set_expr. (*zero_extendsi2_disp_mem): Update comment. (movrt_negc, *movrt_negc, nott): Add !sh_in_recog_treg_set_expr split condition. (*mov_t_msb_neg, mov_neg_si_t): Use treg_set_expr. Try to recombine with surrounding insns when splitting. (any_treg_expr_to_reg): New insn_and_split. (*neg_zero_extract_0, *neg_zero_extract_1, *neg_zero_extract_2, *neg_zero_extract_3, *neg_zero_extract_4, *neg_zero_extract_5, *neg_zero_extract_6, *zero_extract_0, *zero_extract_1, *zero_extract_2): New single bit zero extract patterns. (bld_reg, *bld_regqi): Fold into bld_reg. (*get_thread_pointersi, store_gbr, *mov_gbr_load, *mov_gbr_load, *mov_gbr_load, *mov_gbr_load, *movdi_gbr_load): Use arith_reg_dest instead of register_operand for set destination. (set_thread_pointersi, load_gbr): Use arith_reg_operand instead of register_operand for set source. gcc/testsuite/ PR target/49263 PR target/53987 PR target/64345 PR target/59533 PR target/52933 PR target/54236 PR target/51244 * gcc.target/sh/pr64345-1.c: New. * gcc.target/sh/pr64345-2.c: New. * gcc.target/sh/pr59533-1.c: New. * gcc.target/sh/pr49263.c: Adjust matching of expected insns. * gcc.target/sh/pr52933-2.c: Likewise. * gcc.target/sh/pr54089-1.c: Likewise. * gcc.target/sh/pr54236-1.c: Likewise. * gcc.target/sh/pr51244-20-sh2a.c: Likewise. * gcc.target/sh/pr49263-1.c: Remove xfails. * gcc.target/sh/pr49263-2.c: Likewise. * gcc.target/sh/pr49263-3.c: Likewise. * gcc.target/sh/pr53987-1.c: Likewise. * gcc.target/sh/pr52933-1.c: Adjust matching of expected insns. (test_24, test_25, test_26, test_27, test_28, test_29, test_30): New. * gcc.target/sh/pr51244-12.c: Adjust matching of expected insns. (test05, test06, test07, test08, test09, test10, test11, test12): New. * gcc.target/sh/pr54236-3.c: Adjust matching of expected insns. (test_002, test_003, test_004, test_005, test_006, test_007, test_008, test_009): New. * gcc.target/sh/pr51244-4.c: Adjust matching of expected insns. (test_02): New. From-SVN: r220081 --- gcc/ChangeLog | 93 + gcc/config/sh/predicates.md | 26 +- gcc/config/sh/sh-protos.h | 67 + gcc/config/sh/sh.c | 443 ++++- gcc/config/sh/sh.md | 1615 ++++++++++++----- gcc/testsuite/ChangeLog | 31 + gcc/testsuite/gcc.target/sh/pr49263-1.c | 6 +- gcc/testsuite/gcc.target/sh/pr49263-2.c | 11 +- gcc/testsuite/gcc.target/sh/pr49263-3.c | 18 +- gcc/testsuite/gcc.target/sh/pr49263.c | 2 + gcc/testsuite/gcc.target/sh/pr51244-12.c | 58 +- gcc/testsuite/gcc.target/sh/pr51244-20-sh2a.c | 4 +- gcc/testsuite/gcc.target/sh/pr51244-4.c | 24 +- gcc/testsuite/gcc.target/sh/pr52933-1.c | 49 +- gcc/testsuite/gcc.target/sh/pr52933-2.c | 7 +- gcc/testsuite/gcc.target/sh/pr53987-1.c | 2 +- gcc/testsuite/gcc.target/sh/pr54089-1.c | 2 + gcc/testsuite/gcc.target/sh/pr54236-1.c | 11 +- gcc/testsuite/gcc.target/sh/pr54236-3.c | 73 +- gcc/testsuite/gcc.target/sh/pr59533-1.c | 185 ++ gcc/testsuite/gcc.target/sh/pr64345-1.c | 97 + gcc/testsuite/gcc.target/sh/pr64345-2.c | 116 ++ 22 files changed, 2407 insertions(+), 533 deletions(-) create mode 100644 gcc/testsuite/gcc.target/sh/pr59533-1.c create mode 100644 gcc/testsuite/gcc.target/sh/pr64345-1.c create mode 100644 gcc/testsuite/gcc.target/sh/pr64345-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 77791a2a2fe..c6f121305f7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,96 @@ +2015-01-24 Oleg Endo + + PR target/49263 + PR target/53987 + PR target/64345 + PR target/59533 + PR target/52933 + PR target/54236 + PR target/51244 + * config/sh/sh-protos.h + (sh_extending_set_of_reg::can_use_as_unextended_reg, + sh_extending_set_of_reg::use_as_unextended_reg, + sh_is_nott_insn, sh_movt_set_dest, sh_movrt_set_dest, sh_is_movt_insn, + sh_is_movrt_insn, sh_insn_operands_modified_between_p, + sh_reg_dead_or_unused_after_insn, sh_in_recog_treg_set_expr, + sh_recog_treg_set_expr, sh_split_treg_set_expr): New functions. + (sh_treg_insns): New class. + * config/sh/sh.c (TARGET_LEGITIMATE_COMBINED_INSN): Define target hook. + (scope_counter): New class. + (sh_legitimate_combined_insn, sh_is_nott_insn, sh_movt_set_dest, + sh_movrt_set_dest, sh_reg_dead_or_unused_after_insn, + sh_extending_set_of_reg::can_use_as_unextended_reg, + sh_extending_set_of_reg::use_as_unextended_reg, sh_recog_treg_set_expr, + sh_in_recog_treg_set_expr, sh_try_split_insn_simple, + sh_split_treg_set_expr): New functions. + (addsubcosts): Handle treg_set_expr. + (sh_rtx_costs): Handle IF_THEN_ELSE and ZERO_EXTRACT. + (sh_rtx_costs): Use arith_reg_operand in SIGN_EXTEND and ZERO_EXTEND. + (sh_rtx_costs): Handle additional bit test patterns in EQ and AND cases. + (sh_insn_operands_modified_between_p): Make non-static. + * config/sh/predicates.md (zero_extend_movu_operand): Allow + simple_mem_operand in addition to displacement_mem_operand. + (zero_extend_operand): Don't allow zero_extend_movu_operand. + (treg_set_expr, treg_set_expr_not_const01, + arith_reg_or_treg_set_expr): New predicates. + * config/sh/sh.md (tstsi_t): Use arith_reg_operand and + arith_or_int_operand instead of logical_operand. Convert to + insn_and_split. Try to optimize constant operand in splitter. + (tsthi_t, tstqi_t): Fold into *tst_t. Convert to insn_and_split. + (*tstqi_t_zero): Delete. + (*tst_t_subregs): Add !sh_in_recog_treg_set_expr split condition. + (tstsi_t_and_not): Delete. + (tst_t_zero_extract_eq): Rename to *tst_t_zero_extract. + Convert to insn_and_split. + (unnamed split, tstsi_t_zero_extract_xor, + tstsi_t_zero_extract_subreg_xor_little, + tstsi_t_zero_extract_subreg_xor_big): Delete. + (*tstsi_t_shift_mask): New insn_and_split. + (cmpeqsi_t, cmpgesi_t): Add new split for const_int 0 operands and try + to recombine with surrounding insns when splitting. + (*negtstsi): Add !sh_in_recog_treg_set_expr condition. + (cmp_div0s_0, cmp_div0s_1, *cmp_div0s_0, *cmp_div0s_1): Rewrite as ... + (cmp_div0s, *cmp_div0s_1, *cmp_div0s_2, *cmp_div0s_3, *cmp_div0s_4, + *cmp_div0s_5, *cmp_div0s_6): ... these new insn_and_split patterns. + (*cbranch_div0s: Delete. + (*addc): Convert to insn_and_split. Use treg_set_expr as 3rd operand. + Try to recombine with surrounding insns when splitting. Add operand + order variants. + (*addc_t_r, *addc_r_t): Use treg_set_expr_not_const01. + (*addc_r_r_1, *addc_r_lsb, *addc_r_r_lsb, *addc_r_lsb_r, *addc_r_msb, + *addc_r_r_msb, *addc_2r_msb): Delete. + (*addc_2r_lsb): Rename to *addc_2r_t. Use treg_set_expr. Add operand + order variant. + (*addc_negreg_t): New insn_and_split. + (*subc): Convert to insn_and_split. Use treg_set_expr as 3rd operand. + Try to recombine with surrounding insns when splitting. + Add operand order variants. + (*subc_negt_reg, *subc_negreg_t, *reg_lsb_t, *reg_msb_t): New + insn_and_split patterns. + (*rotcr): Use arith_reg_or_treg_set_expr. Try to recombine with + surrounding insns when splitting. + (unnamed rotcr split): Use arith_reg_or_treg_set_expr. + (*rotcl): Likewise. Add zero_extract variant. + (*ashrsi2_31): New insn_and_split. + (*negc): Convert to insn_and_split. Use treg_set_expr. + (*zero_extendsi2_disp_mem): Update comment. + (movrt_negc, *movrt_negc, nott): Add !sh_in_recog_treg_set_expr split + condition. + (*mov_t_msb_neg, mov_neg_si_t): Use treg_set_expr. Try to recombine + with surrounding insns when splitting. + (any_treg_expr_to_reg): New insn_and_split. + (*neg_zero_extract_0, *neg_zero_extract_1, *neg_zero_extract_2, + *neg_zero_extract_3, *neg_zero_extract_4, *neg_zero_extract_5, + *neg_zero_extract_6, *zero_extract_0, *zero_extract_1, + *zero_extract_2): New single bit zero extract patterns. + (bld_reg, *bld_regqi): Fold into bld_reg. + (*get_thread_pointersi, store_gbr, *mov_gbr_load, + *mov_gbr_load, *mov_gbr_load, *mov_gbr_load, + *movdi_gbr_load): Use arith_reg_dest instead of register_operand for + set destination. + (set_thread_pointersi, load_gbr): Use arith_reg_operand instead of + register_operand for set source. + 2015-01-23 Jan Hubicka * i386.opt (prefetch_sse): New targetsave. diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md index db678b66abb..a18515a3a15 100644 --- a/gcc/config/sh/predicates.md +++ b/gcc/config/sh/predicates.md @@ -456,17 +456,17 @@ ;; Returns 1 if the operand can be used in an SH2A movu.{b|w} insn. (define_predicate "zero_extend_movu_operand" - (and (match_operand 0 "displacement_mem_operand") - (match_test "GET_MODE (op) == QImode || GET_MODE (op) == HImode"))) + (and (ior (match_operand 0 "displacement_mem_operand") + (match_operand 0 "simple_mem_operand")) + (ior (match_test "GET_MODE (op) == QImode") + (match_test "GET_MODE (op) == HImode")))) ;; Returns 1 if the operand can be used in a zero_extend. (define_predicate "zero_extend_operand" (ior (and (match_test "TARGET_SHMEDIA") (match_operand 0 "general_extend_operand")) (and (match_test "! TARGET_SHMEDIA") - (match_operand 0 "arith_reg_operand")) - (and (match_test "TARGET_SH2A") - (match_operand 0 "zero_extend_movu_operand")))) + (match_operand 0 "arith_reg_operand")))) ;; Returns 1 if OP can be source of a simple move operation. Same as ;; general_operand, but a LABEL_REF is valid, PRE_DEC is invalid as @@ -1152,6 +1152,18 @@ (match_test "mode != HImode") (match_test "TARGET_SH4A")))) +;; A predicate that matches any expression for which there is an +;; insn pattern that sets the T bit. +(define_predicate "treg_set_expr" + (match_test "sh_recog_treg_set_expr (op, mode)")) + +;; Same as treg_set_expr but disallow constants 0 and 1 which can be loaded +;; into the T bit. +(define_predicate "treg_set_expr_not_const01" + (and (match_test "op != const0_rtx") + (match_test "op != const1_rtx") + (match_operand 0 "treg_set_expr"))) + ;; A predicate describing the T bit register in any form. (define_predicate "t_reg_operand" (match_code "reg,subreg,sign_extend,zero_extend") @@ -1206,6 +1218,10 @@ (ior (match_operand 0 "arith_reg_operand") (match_operand 0 "t_reg_operand"))) +(define_predicate "arith_reg_or_treg_set_expr" + (ior (match_operand 0 "arith_reg_operand") + (match_operand 0 "treg_set_expr"))) + ;; A predicate describing the negated value of the T bit register shifted ;; left by 31. (define_predicate "negt_reg_shl31_operand" diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index bc2b3b30667..945a54f4f1b 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -264,6 +264,14 @@ struct sh_extending_set_of_reg : public set_of_reg ext_code = UNKNOWN; } + /* Returns true if it's possible to use the source reg of the sign + or zero extending set directly, bypassing the extension. */ + bool can_use_as_unextended_reg (void) const; + + /* Returns the reg rtx of the sign or zero extending set source, that can + be safely used at the specified insn in SImode. */ + rtx use_as_unextended_reg (rtx_insn* use_at_insn) const; + /* Returns the reg rtx of the sign or zero extending result, that can be safely used at the specified insn in SImode. If the set source is an implicitly sign extending mem load, the mem load is converted into an @@ -281,7 +289,66 @@ extern bool sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, extern void sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode, int subreg_offset, rtx operands[]); + +extern bool sh_is_nott_insn (const rtx_insn* i); +extern rtx sh_movt_set_dest (const rtx_insn* i); +extern rtx sh_movrt_set_dest (const rtx_insn* i); + +inline bool sh_is_movt_insn (const rtx_insn* i) +{ + return sh_movt_set_dest (i) != NULL; +} + +inline bool sh_is_movrt_insn (const rtx_insn* i) +{ + return sh_movrt_set_dest (i) != NULL; +} + +extern bool sh_insn_operands_modified_between_p (rtx_insn* operands_insn, + const rtx_insn* from, + const rtx_insn* to); + +extern bool sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno); extern void sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno); + +extern bool sh_in_recog_treg_set_expr (void); +extern bool sh_recog_treg_set_expr (rtx op, machine_mode mode); + +/* Result value of sh_split_treg_set_expr. Contains the first insn emitted + and the optional trailing nott insn. */ +class sh_treg_insns +{ +public: + sh_treg_insns (void) : m_first_insn (NULL), m_trailing_nott_insn (NULL) { } + sh_treg_insns (rtx_insn* first_insn, rtx_insn* nott_insn) + : m_first_insn (first_insn), + m_trailing_nott_insn (nott_insn) + { } + + bool was_treg_operand (void) const { return m_first_insn == NULL; } + bool has_trailing_nott (void) const { return m_trailing_nott_insn != NULL; } + rtx_insn* trailing_nott (void) const { return m_trailing_nott_insn; } + rtx_insn* first_insn (void) const { return m_first_insn; } + + /* If there is a trailing nott, remove it from the emitted insns and + return true. Return false otherwise. */ + bool + remove_trailing_nott (void) + { + if (!has_trailing_nott ()) + return false; + + remove_insn (trailing_nott ()); + return true; + } + +private: + rtx_insn* m_first_insn; + rtx_insn* m_trailing_nott_insn; +}; + +extern sh_treg_insns sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn); + #endif /* RTX_CODE */ extern void sh_cpu_cpp_builtins (cpp_reader* pfile); diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index bde95f6a587..7d07634abe3 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -364,6 +364,8 @@ static bool sequence_insn_p (rtx_insn *); static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool); static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&, machine_mode, bool); +static bool sh_legitimate_combined_insn (rtx_insn* insn); + static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2); static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED; @@ -672,6 +674,9 @@ static const struct attribute_spec sh_attribute_table[] = #undef TARGET_CANONICALIZE_COMPARISON #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison +#undef TARGET_LEGITIMATE_COMBINED_INSN +#define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn + #undef TARGET_FIXED_CONDITION_CODE_REGS #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs @@ -2044,6 +2049,26 @@ sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1, *code = (int)tmp_code; } +/* This function implements the legitimate_combined_insn target hook, + which the combine pass uses to early reject combined insns, before + it tries to recog the insn and determine its cost. */ +static bool +sh_legitimate_combined_insn (rtx_insn* insn) +{ + /* Reject combinations of memory loads and zero extensions, as these + interfere with other combine patterns such as zero extracts and bit + tests. The SH2A movu.{b|w} insns are formed later in the + 'sh_optimize_extu_exts' pass after combine/split1. */ + rtx p = PATTERN (insn); + if (GET_CODE (p) == SET + && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode + && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND + && MEM_P (XEXP (XEXP (p, 1), 0))) + return false; + + return true; +} + bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2) { @@ -3330,6 +3355,12 @@ addsubcosts (rtx x) && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31) return 1; } + /* Let's assume that adding the result of an insns that stores into + the T bit is cheap. */ + if (treg_set_expr (op1, SImode)) + return 1; + if (treg_set_expr (op0, SImode)) + return 1; } /* On SH1-4 we have only max. SImode operations. @@ -3445,10 +3476,36 @@ sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, true); return true; + case IF_THEN_ELSE: + /* This case is required for the if_then_else negc pattern. */ + if (treg_set_expr (XEXP (x, 0), SImode)) + { + *total = COSTS_N_INSNS (1); + return true; + } + else + return false; + + /* Zero extracts of single bits are usually combine patterns for the + tst insns. */ + case ZERO_EXTRACT: + if (GET_CODE (XEXP (x, 0)) == XOR + && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode) + && XEXP (x, 1) == const1_rtx + && CONST_INT_P (XEXP (x, 2)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + /* Check that the xor constaint overlaps with the extracted bit. */ + && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2))))) + { + *total = 1; //COSTS_N_INSNS (1); + return true; + } + return false; + /* The cost of a sign or zero extend depends on whether the source is a reg or a mem. In case of a mem take the address into acount. */ case SIGN_EXTEND: - if (REG_P (XEXP (x, 0))) + if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) { *total = COSTS_N_INSNS (1); return true; @@ -3463,7 +3520,7 @@ sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, return false; case ZERO_EXTEND: - if (REG_P (XEXP (x, 0))) + if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) { *total = COSTS_N_INSNS (1); return true; @@ -3555,8 +3612,21 @@ sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, most likely going to be a TST #imm, R0 instruction. Notice that this does not catch the zero_extract variants from the md file. */ - if (GET_CODE (XEXP (x, 0)) == AND - && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0) + if (XEXP (x, 1) == const0_rtx + && (GET_CODE (XEXP (x, 0)) == AND + || (SUBREG_P (XEXP (x, 0)) + && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))) + { + *total = 1; + return true; + } + + else if (XEXP (x, 1) == const0_rtx + && GET_CODE (XEXP (x, 0)) == AND + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT + && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode) + && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))) { *total = 1; return true; @@ -3622,6 +3692,14 @@ sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, return true; case AND: + /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */ + if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx) + { + *total = COSTS_N_INSNS (1); + return true; + } + /* Fall through. */ + case XOR: case IOR: *total = COSTS_N_INSNS (and_xor_ior_costs (x, code)); @@ -13751,7 +13829,7 @@ sh_unspec_insn_p (rtx x) /* Return true if the register operands of the specified insn are modified between the specified from and to insns (exclusive of those two). */ -static bool +bool sh_insn_operands_modified_between_p (rtx_insn* operands_insn, const rtx_insn* from, const rtx_insn* to) @@ -13770,6 +13848,57 @@ sh_insn_operands_modified_between_p (rtx_insn* operands_insn, return false; } +/* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that + negates the T bit and stores the result in the T bit. */ +bool +sh_is_nott_insn (const rtx_insn* i) +{ + return i != NULL && GET_CODE (PATTERN (i)) == SET + && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode) + && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode); +} + +rtx +sh_movt_set_dest (const rtx_insn* i) +{ + if (i == NULL) + return NULL; + + const_rtx p = PATTERN (i); + return GET_CODE (p) == SET + && arith_reg_dest (XEXP (p, 0), SImode) + && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL; +} + +/* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn + that stores the negated T bit in a register, and return the destination + register rtx, or null. */ +rtx +sh_movrt_set_dest (const rtx_insn* i) +{ + if (i == NULL) + return NULL; + + const_rtx p = PATTERN (i); + + /* The negc movrt replacement is inside a parallel. */ + if (GET_CODE (p) == PARALLEL) + p = XVECEXP (p, 0, 0); + + return GET_CODE (p) == SET + && arith_reg_dest (XEXP (p, 0), SImode) + && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL; +} + +/* Given an insn and a reg number, tell whether the reg dies or is unused + after the insn. */ +bool +sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno) +{ + return find_regno_note (i, REG_DEAD, regno) != NULL + || find_regno_note (i, REG_UNUSED, regno) != NULL; +} + /* Given an insn and a reg number, remove reg dead or reg unused notes to mark it as being used after the insn. */ void @@ -14006,6 +14135,40 @@ sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const } } +bool +sh_extending_set_of_reg::can_use_as_unextended_reg (void) const +{ + if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND) + && (from_mode == QImode || from_mode == HImode) + && set_src != NULL) + return arith_reg_operand (XEXP (set_src, 0), from_mode); + else + return false; +} + +rtx +sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const +{ + gcc_assert (can_use_as_unextended_reg ()); + + rtx r = XEXP (set_src, 0); + rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0); + + if (modified_between_p (r, insn, use_at_insn)) + { + rtx r1 = gen_reg_rtx (SImode); + emit_insn_after (gen_move_insn (r1, r0), insn); + return r1; + } + else + { + sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r) + ? REGNO (SUBREG_REG (r)) + : REGNO (r)); + return r0; + } +} + /* Given the current insn, which is assumed to be the *tst_t_subregs insn, perform the necessary checks on the operands and split it accordingly. */ void @@ -14059,6 +14222,276 @@ sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode, emit_insn (gen_tstsi_t (tmp0, operands[1])); } +/* A helper class to increment/decrement a counter variable each time a + function is entered/left. */ +class scope_counter +{ +public: + scope_counter (int& counter) : m_counter (counter) { ++m_counter; } + + ~scope_counter (void) + { + --m_counter; + gcc_assert (m_counter >= 0); + } + + int count (void) const { return m_counter; } + +private: + int& m_counter; +}; + +/* Given an rtx x, determine whether the expression can be used to create + an insn that calulates x and stores the result in the T bit. + This is used by the 'treg_set_expr' predicate to construct insns sequences + where T bit results are fed into other insns, such as addc, subc, negc + insns. + + FIXME: The patterns that expand 'treg_set_expr' operands tend to + distinguish between 'positive' and 'negative' forms. For now this has to + be done in the preparation code. We could also introduce + 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write + two different patterns for the 'postive' and 'negative' forms. However, + the total amount of lines of code seems to be about the same and the + '{pos|neg}_treg_set_expr' predicates would be more expensive, because the + recog function would need to look inside the expression by temporarily + splitting it. */ +static int sh_recog_treg_set_expr_reent_count = 0; + +bool +sh_recog_treg_set_expr (rtx op, machine_mode mode) +{ + scope_counter recursion (sh_recog_treg_set_expr_reent_count); + + /* Limit the recursion count to avoid nested expressions which we can't + resolve to a single treg set insn. */ + if (recursion.count () > 1) + return false; + + /* Early accept known possible operands before doing recog. */ + if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)) + return true; + + /* Early reject impossible operands before doing recog. + There are some (set ((t) (subreg ...))) patterns, but we must be careful + not to allow any invalid reg-reg or mem-reg moves, or else other passes + such as lower-subreg will bail out. Some insns such as SH4A movua are + done with UNSPEC, so must reject those, too, or else it would result + in an invalid reg -> treg move. */ + if (register_operand (op, mode) || memory_operand (op, mode) + || sh_unspec_insn_p (op)) + return false; + + if (!can_create_pseudo_p ()) + return false; + + /* We are going to invoke recog in a re-entrant way and thus + have to capture its current state and restore it afterwards. */ + recog_data_d prev_recog_data = recog_data; + + rtx_insn* i = make_insn_raw (gen_rtx_SET (VOIDmode, get_t_reg_rtx (), op)); + SET_PREV_INSN (i) = NULL; + SET_NEXT_INSN (i) = NULL; + + int result = recog (PATTERN (i), i, 0); + + /* It seems there is no insn like that. Create a simple negated + version and try again. If we hit a negated form, we'll allow that + and append a nott sequence when splitting out the insns. Insns that + do the split can then remove the trailing nott if they know how to + deal with it. */ + if (result < 0 && GET_CODE (op) == EQ) + { + PUT_CODE (op, NE); + result = recog (PATTERN (i), i, 0); + PUT_CODE (op, EQ); + } + if (result < 0 && GET_CODE (op) == NE) + { + PUT_CODE (op, EQ); + result = recog (PATTERN (i), i, 0); + PUT_CODE (op, NE); + } + + recog_data = prev_recog_data; + return result >= 0; +} + +/* Returns true when recog of a 'treg_set_expr' is currently in progress. + This can be used as a condition for insn/split patterns to allow certain + T bit setting patters only to be matched as sub expressions of other + patterns. */ +bool +sh_in_recog_treg_set_expr (void) +{ + return sh_recog_treg_set_expr_reent_count > 0; +} + +/* Given an rtx x, which is assumed to be some expression that has been + matched by the 'treg_set_expr' predicate before, split and emit the + insns that are necessary to calculate the expression and store the result + in the T bit. + The splitting is done recursively similar to 'try_split' in emit-rt.c. + Unfortunately we can't use 'try_split' here directly, as it tries to invoke + 'delete_insn' which then causes the DF parts to bail out, because we + currently are inside another gen_split* function and would invoke + 'try_split' in a reentrant way. */ +static std::pair +sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0) +{ + if (dump_file) + { + fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n); + print_rtl_single (dump_file, i); + fprintf (dump_file, "\n"); + } + + rtx_insn* seq = safe_as_a (split_insns (PATTERN (i), curr_insn)); + + if (seq == NULL) + return std::make_pair (i, i); + + /* Avoid infinite splitter loops if any insn of the result matches + the original pattern. */ + for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s)) + if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i))) + return std::make_pair (i, i); + + unshare_all_rtl_in_chain (seq); + + /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in + a linked list, replace the single insn with the new insns. */ + rtx_insn* seqlast = seq; + while (NEXT_INSN (seqlast) != NULL) + seqlast = NEXT_INSN (seqlast); + + if (rtx_insn* iprev = PREV_INSN (i)) + SET_NEXT_INSN (iprev) = seq; + if (rtx_insn* inext = NEXT_INSN (i)) + SET_PREV_INSN (inext) = seqlast; + + SET_PREV_INSN (seq) = PREV_INSN (i); + SET_NEXT_INSN (seqlast) = NEXT_INSN (i); + + SET_PREV_INSN (i) = NULL; + SET_NEXT_INSN (i) = NULL; + + /* Recursively split all insns. */ + for (i = seq; ; i = NEXT_INSN (i)) + { + std::pair ii = + sh_try_split_insn_simple (i, curr_insn, n + 1); + if (i == seq) + seq = ii.first; + if (i == seqlast) + { + seqlast = ii.second; + break; + } + i = ii.first; + } + + return std::make_pair (seq, seqlast); +} + +sh_treg_insns +sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn) +{ + if (t_reg_operand (x, VOIDmode)) + return sh_treg_insns (); + + scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count); + + rtx_insn* i = make_insn_raw (gen_rtx_SET (VOIDmode, get_t_reg_rtx (), x)); + SET_PREV_INSN (i) = NULL; + SET_NEXT_INSN (i) = NULL; + + if (dump_file) + { + fprintf (dump_file, "split_treg_set_expr insn:\n"); + print_rtl (dump_file, i); + fprintf (dump_file, "\n"); + } + + /* We are going to invoke recog/split_insns in a re-entrant way and thus + have to capture its current state and restore it afterwards. */ + recog_data_d prev_recog_data = recog_data; + + int insn_code = recog (PATTERN (i), i, 0); + + /* If the insn was not found, see if we matched the negated form before + and append a nott. */ + bool append_nott = false; + + if (insn_code < 0 && GET_CODE (x) == EQ) + { + PUT_CODE (x, NE); + insn_code = recog (PATTERN (i), i, 0); + if (insn_code >= 0) + append_nott = true; + else + PUT_CODE (x, EQ); + } + if (insn_code < 0 && GET_CODE (x) == NE) + { + PUT_CODE (x, EQ); + insn_code = recog (PATTERN (i), i, 0); + if (insn_code >= 0) + append_nott = true; + else + PUT_CODE (x, NE); + } + + gcc_assert (insn_code >= 0); + + /* Try to recursively split the insn. Some insns might refuse to split + any further while we are in the treg_set_expr splitting phase. They + will be emitted as part of the outer insn and then split again. */ + std::pair insnlist = + sh_try_split_insn_simple (i, curr_insn); + + /* Restore recog state. */ + recog_data = prev_recog_data; + + rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second) + ? insnlist.second + : NULL; + if (dump_file) + { + fprintf (dump_file, "split_treg_set_expr insnlist:\n"); + print_rtl (dump_file, insnlist.first); + fprintf (dump_file, "\n"); + + if (nott_insn != NULL) + fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn)); + } + + if (nott_insn != NULL && append_nott) + { + if (dump_file) + fprintf (dump_file, "removing trailing nott\n"); + remove_insn (nott_insn); + nott_insn = NULL; + append_nott = false; + } + + emit_insn (insnlist.first); + + if (append_nott) + nott_insn = emit_insn (gen_nott (get_t_reg_rtx ())); + + rtx_insn* first_insn = get_insns (); + + if (dump_file) + { + fprintf (dump_file, "resulting insns:\n"); + print_rtl (dump_file, first_insn); + fprintf (dump_file, "\n"); + } + + return sh_treg_insns (first_insn, nott_insn); +} + /*------------------------------------------------------------------------------ Mode switching support code. */ diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 50f374c9bf8..956d1ef672e 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -612,58 +612,199 @@ ;; SImode signed integer comparisons ;; ------------------------------------------------------------------------- -;; Various patterns to generate the TST #imm, R0 instruction. -;; Although this adds some pressure on the R0 register, it can potentially -;; result in faster code, even if the operand has to be moved to R0 first. -;; This is because on SH4 TST #imm, R0 and MOV Rm, Rn are both MT group -;; instructions and thus will be executed in parallel. On SH4A TST #imm, R0 -;; is an EX group instruction but still can be executed in parallel with the -;; MT group MOV Rm, Rn instruction. - -;; Usual TST #imm, R0 patterns for SI, HI and QI -;; This is usually used for bit patterns other than contiguous bits -;; and single bits. -(define_insn "tstsi_t" +;; Patterns to generate the tst instruction which are usually formed by +;; the combine pass. +;; The canonical form here being used is (eq (and (op) (op)) 0). +;; For some bit patterns, such as contiguous bits, we also must accept +;; zero_extract forms. Single bit tests are also handled via zero_extract +;; patterns in the 'bit field extract patterns' section. All variants +;; are eventually converted to the 'tstsi_t' insn. +;; As long as pseudos can be created (before RA), 'tstsi_t' will also accept +;; constants that won't fit into 8 bits. After having captured the constant +;; we can decide better whether/how to load it into a register and do other +;; post-combine optimizations such as bypassing sign/zero extensions. +(define_insn_and_split "tstsi_t" [(set (reg:SI T_REG) - (eq:SI (and:SI (match_operand:SI 0 "logical_operand" "%z,r") - (match_operand:SI 1 "logical_operand" "K08,r")) - (const_int 0)))] - "TARGET_SH1" - "tst %1,%0" - [(set_attr "type" "mt_group")]) - -(define_insn "tsthi_t" - [(set (reg:SI T_REG) - (eq:SI (subreg:SI (and:HI (match_operand:HI 0 "logical_operand" "%z") - (match_operand 1 "const_int_operand")) 0) + (eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "%z,r") + (match_operand:SI 1 "arith_or_int_operand" "K08,r")) (const_int 0)))] "TARGET_SH1 - && CONST_OK_FOR_K08 (INTVAL (operands[1]))" + && (can_create_pseudo_p () || arith_reg_operand (operands[1], SImode) + || satisfies_constraint_K08 (operands[1]))" "tst %1,%0" - [(set_attr "type" "mt_group")]) - -(define_insn "tstqi_t" - [(set (reg:SI T_REG) - (eq:SI (subreg:SI (and:QI (match_operand:QI 0 "logical_operand" "%z") - (match_operand 1 "const_int_operand")) 0) - (const_int 0)))] - "TARGET_SH1 - && (CONST_OK_FOR_K08 (INTVAL (operands[1])) - || CONST_OK_FOR_I08 (INTVAL (operands[1])))" + "TARGET_SH1 && can_create_pseudo_p () && CONST_INT_P (operands[1]) + && !sh_in_recog_treg_set_expr ()" + [(const_int 0)] { - operands[1] = GEN_INT (INTVAL (operands[1]) & 255); - return "tst %1,%0"; + gcc_assert (CONST_INT_P (operands[1])); + + HOST_WIDE_INT op1val = INTVAL (operands[1]); + bool op0_dead_after_this = + sh_reg_dead_or_unused_after_insn (curr_insn, REGNO (operands[0])); + + if (optimize) + { + if (dump_file) + fprintf (dump_file, + "tstsi_t: trying to optimize const_int 0x%08x\n", + (uint32_t)op1val); + + /* See if we can convert a test with a reg and a constant into + something simpler, if the reg is known to be zero or sign + extended. */ + sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0], + curr_insn); + if (eop0.ext_code != UNKNOWN) + { + /* Adjust the constant, trying to eliminate bits that are not + contributing to the result. */ + if (eop0.from_mode == QImode) + op1val = (op1val + | (eop0.ext_code == SIGN_EXTEND && (op1val & 0xFFFFFF80) + ? 0x80 : 0)) & 0xFF; + else if (eop0.from_mode == HImode) + op1val = (op1val + | (eop0.ext_code == SIGN_EXTEND && (op1val & 0xFFFF8000) + ? 0x8000 : 0)) & 0xFFFF; + + if (dump_file) + fprintf (dump_file, "tstsi_t: using effective const_int: 0x%08x\n", + (uint32_t)op1val); + + /* Try to bypass the sign/zero extension first if op0 dies after + this insn. */ + if (op0_dead_after_this && eop0.can_use_as_unextended_reg ()) + { + if (dump_file) + fprintf (dump_file, "tstsi_t: bypassing sign/zero extension\n"); + + operands[0] = eop0.use_as_unextended_reg (curr_insn); + } + else if ((eop0.from_mode == QImode && op1val == 0xFF) + || (eop0.from_mode == HImode && op1val == 0xFFFF)) + { + if (dump_file) + fprintf (dump_file, "tstsi_t: converting to cmpeqsi_t\n"); + emit_insn (gen_cmpeqsi_t (eop0.use_as_extended_reg (curr_insn), + const0_rtx)); + DONE; + } + else if (eop0.ext_code == SIGN_EXTEND + && ((eop0.from_mode == QImode && op1val == 0x80) + || (eop0.from_mode == HImode && op1val == 0x8000))) + { + if (dump_file) + fprintf (dump_file, "tstsi_t: converting to cmpgesi_t\n"); + emit_insn (gen_cmpgesi_t (eop0.use_as_extended_reg (curr_insn), + const0_rtx)); + DONE; + } + else if (!CONST_OK_FOR_K08 (op1val)) + { + if (dump_file) + fprintf (dump_file, "tstsi_t: converting const_int to signed " + "value\n"); + + /* If here we haven't done anything yet. Convert the constant + to a signed value to reduce the constant pool size. */ + operands[0] = eop0.use_as_extended_reg (curr_insn); + + if (eop0.from_mode == QImode) + op1val |= (op1val & 0x80) ? 0xFFFFFFFFFFFFFF00LL : 0; + else if (eop0.from_mode == HImode) + op1val |= (op1val & 0x8000) ? 0xFFFFFFFFFFFF0000LL : 0; + } + else + operands[0] = eop0.use_as_extended_reg (curr_insn); + } + } + + if (dump_file) + fprintf (dump_file, "tstsi_t: using const_int 0x%08x\n", + (uint32_t)op1val); + + /* Try to fit the constant into 8 bits by shuffling the value in the + register operand. + Doing that usually results in smaller code as the constants in the + pools are avoided (32 bit constant = load + constant = 6 bytes). + However, if the constant load (LS insn) can be hoisted insn dependencies + can be avoided and chances for parallel execution increase. The common + usage pattern is: + - load reg from mem + - test bits + - conditional branch + + FIXME: For now we do that only when optimizing for size until there is + a better heuristic. + + FIXME: If there are multiple tst insns in the block with the same + constant, avoid the #imm variant to avoid R0 loads. Use the 'tst Rn,Rm' + variant instead and load the constant into a reg. For that we'd need + to do some analysis. */ + + if ((op1val & 0xFFFF) == 0 + && CONST_OK_FOR_K08 (op1val >> 16) && optimize_size) + { + /* Use a swap.w insn to do a shift + reg copy (to R0) in one insn. */ + op1val = op1val >> 16; + rtx r = gen_reg_rtx (SImode); + emit_insn (gen_rotlsi3_16 (r, operands[0])); + operands[0] = r; + } + else if ((op1val & 0xFF) == 0 + && CONST_OK_FOR_K08 (op1val >> 8) && optimize_size) + { + /* Use a swap.b insn to do a shift + reg copy (to R0) in one insn. */ + op1val = op1val >> 8; + rtx r = gen_reg_rtx (SImode); + emit_insn (gen_swapbsi2 (r, operands[0])); + operands[0] = r; + } + else if ((op1val & 3) == 0 + && CONST_OK_FOR_K08 (op1val >> 2) && optimize_size) + { + op1val = op1val >> 2; + rtx r = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3_k (r, operands[0], GEN_INT (2))); + operands[0] = r; + } + else if ((op1val & 1) == 0 + && CONST_OK_FOR_K08 (op1val >> 1) && optimize_size) + { + op1val = op1val >> 1; + rtx r = gen_reg_rtx (SImode); + emit_insn (gen_shlr (r, operands[0])); + operands[0] = r; + } + + operands[1] = GEN_INT (op1val); + + if (!satisfies_constraint_K08 (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + + emit_insn (gen_tstsi_t (operands[0], operands[1])); + DONE; } [(set_attr "type" "mt_group")]) -;; Test low QI subreg against zero. -;; This avoids unnecessary zero extension before the test. -(define_insn "*tstqi_t_zero" +;; This pattern is used by combine when testing QI/HImode subregs with a +;; negative constant. Ignore high bits by masking them out in the constant. +(define_insn_and_split "*tst_t" [(set (reg:SI T_REG) - (eq:SI (match_operand:QI 0 "logical_operand" "z") (const_int 0)))] - "TARGET_SH1" - "tst #255,%0" - [(set_attr "type" "mt_group")]) + (eq:SI (subreg:SI + (and:QIHI (match_operand:QIHI 0 "arith_reg_operand") + (match_operand 1 "const_int_operand")) 0) + (const_int 0)))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (reg:SI T_REG) + (eq:SI (and:SI (match_dup 0) (match_dup 1)) (const_int 0)))] +{ + operands[0] = simplify_gen_subreg (SImode, operands[0], mode, 0); + operands[1] = GEN_INT (INTVAL (operands[1]) + & (mode == HImode ? 0xFFFF : 0xFF)); +}) ;; This pattern might be risky because it also tests the upper bits and not ;; only the subreg. We have to check whether the operands have been sign @@ -678,7 +819,7 @@ (const_int 0)))] "TARGET_SH1 && TARGET_LITTLE_ENDIAN && can_create_pseudo_p ()" "#" - "&& 1" + "&& !sh_in_recog_treg_set_expr ()" [(const_int 0)] { sh_split_tst_subregs (curr_insn, mode, , operands); @@ -694,109 +835,50 @@ (const_int 0)))] "TARGET_SH1 && TARGET_BIG_ENDIAN && can_create_pseudo_p ()" "#" - "&& 1" + "&& !sh_in_recog_treg_set_expr ()" [(const_int 0)] { sh_split_tst_subregs (curr_insn, mode, , operands); DONE; }) -;; Extract LSB, negate and store in T bit. -(define_insn "tstsi_t_and_not" - [(set (reg:SI T_REG) - (and:SI (not:SI (match_operand:SI 0 "logical_operand" "z")) - (const_int 1)))] - "TARGET_SH1" - "tst #1,%0" - [(set_attr "type" "mt_group")]) - ;; Extract contiguous bits and compare them against zero. -(define_insn "tst_t_zero_extract_eq" +;; Notice that this will not be used for single bits. Special single bit +;; extraction patterns are in the 'bit field extract patterns' section. +(define_insn_and_split "*tst_t_zero_extract" [(set (reg:SI T_REG) - (eq:SI (zero_extract:SI (match_operand:QIHISIDI 0 "logical_operand" "z") - (match_operand:SI 1 "const_int_operand") - (match_operand:SI 2 "const_int_operand")) + (eq:SI (zero_extract:SI (match_operand:QIHISI 0 "arith_reg_operand") + (match_operand 1 "const_int_operand") + (match_operand 2 "const_int_operand")) (const_int 0)))] - "TARGET_SH1 - && CONST_OK_FOR_K08 (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]))" -{ - operands[1] = GEN_INT (ZERO_EXTRACT_ANDMASK (operands[1], operands[2])); - return "tst %1,%0"; -} - [(set_attr "type" "mt_group")]) - -;; This split is required when testing bits in a QI subreg. -(define_split + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" [(set (reg:SI T_REG) - (eq:SI - (if_then_else:SI - (zero_extract:SI (match_operand 0 "logical_operand") - (match_operand 1 "const_int_operand") - (match_operand 2 "const_int_operand")) - (match_operand 3 "const_int_operand") - (const_int 0)) - (const_int 0)))] - "TARGET_SH1 - && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3]) - && CONST_OK_FOR_K08 (INTVAL (operands[3]))" - [(set (reg:SI T_REG) (eq:SI (and:SI (match_dup 0) (match_dup 3)) - (const_int 0)))] + (eq:SI (and:SI (match_dup 0) (match_dup 1)) (const_int 0)))] { - if (GET_MODE (operands[0]) == QImode) - operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0); + operands[1] = GEN_INT (ZERO_EXTRACT_ANDMASK (operands[1], operands[2])); + if (GET_MODE (operands[0]) != SImode) + operands[0] = simplify_gen_subreg (SImode, operands[0], mode, 0); }) -;; Extract single bit, negate and store it in the T bit. -;; Not used for SH4A. -(define_insn "tstsi_t_zero_extract_xor" +;; Convert '(reg << shift) & mask' into 'reg & (mask >> shift)'. +;; The shifted-out bits in the mask will always be zero, since the +;; shifted-in bits in the reg will also be always zero. +(define_insn_and_split "*tstsi_t_shift_mask" [(set (reg:SI T_REG) - (zero_extract:SI (xor:SI (match_operand:SI 0 "logical_operand" "z") - (match_operand:SI 3 "const_int_operand")) - (match_operand:SI 1 "const_int_operand") - (match_operand:SI 2 "const_int_operand")))] - "TARGET_SH1 - && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3]) - && CONST_OK_FOR_K08 (INTVAL (operands[3]))" - "tst %3,%0" - [(set_attr "type" "mt_group")]) - -;; Extract single bit, negate and store it in the T bit. -;; Used for SH4A little endian. -(define_insn "tstsi_t_zero_extract_subreg_xor_little" - [(set (reg:SI T_REG) - (zero_extract:SI - (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z") - (match_operand:SI 3 "const_int_operand")) 0) - (match_operand:SI 1 "const_int_operand") - (match_operand:SI 2 "const_int_operand")))] - "TARGET_SH1 && TARGET_LITTLE_ENDIAN - && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) - == (INTVAL (operands[3]) & 255) - && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)" -{ - operands[3] = GEN_INT (INTVAL (operands[3]) & 255); - return "tst %3,%0"; -} - [(set_attr "type" "mt_group")]) - -;; Extract single bit, negate and store it in the T bit. -;; Used for SH4A big endian. -(define_insn "tstsi_t_zero_extract_subreg_xor_big" + (eq:SI (and:SI (ashift:SI (match_operand:SI 0 "arith_reg_operand") + (match_operand 1 "const_int_operand")) + (match_operand 2 "const_int_operand")) + (const_int 0)))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" [(set (reg:SI T_REG) - (zero_extract:SI - (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z") - (match_operand:SI 3 "const_int_operand")) 3) - (match_operand:SI 1 "const_int_operand") - (match_operand:SI 2 "const_int_operand")))] - "TARGET_SH1 && TARGET_BIG_ENDIAN - && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) - == (INTVAL (operands[3]) & 255) - && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)" -{ - operands[3] = GEN_INT (INTVAL (operands[3]) & 255); - return "tst %3,%0"; -} - [(set_attr "type" "mt_group")]) + (eq:SI (and:SI (match_dup 0) (match_dup 2)) (const_int 0)))] +{ + operands[2] = GEN_INT (INTVAL (operands[2]) >> INTVAL (operands[1])); +}) (define_insn "cmpeqsi_t" [(set (reg:SI T_REG) @@ -809,6 +891,85 @@ cmp/eq %1,%0" [(set_attr "type" "mt_group")]) +;; Sometimes combine fails to form the (eq (and (op) (op)) 0) tst insn. +;; Try to fix that in the split1 pass by looking for the previous set +;; of the tested op. Also see if there is a preceeding sign/zero +;; extension that can be avoided. +(define_split + [(set (reg:SI T_REG) + (eq:SI (match_operand:SI 0 "arith_reg_operand") (const_int 0)))] + "TARGET_SH1 && can_create_pseudo_p () && optimize + && !sh_in_recog_treg_set_expr ()" + [(set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))] +{ + if (dump_file) + fprintf (dump_file, "cmpeqsi_t: trying to optimize const_int 0\n"); + + /* If the tested reg is not dead after this insn, it's probably used by + something else after the comparison. It's probably better to leave + it as it is. */ + if (find_regno_note (curr_insn, REG_DEAD, REGNO (operands[0])) == NULL_RTX) + FAIL; + + /* FIXME: Maybe also search the predecessor basic blocks to catch + more cases. */ + set_of_reg op = sh_find_set_of_reg (operands[0], curr_insn, + prev_nonnote_insn_bb); + + if (op.set_src != NULL && GET_CODE (op.set_src) == AND + && !sh_insn_operands_modified_between_p (op.insn, op.insn, curr_insn)) + { + if (dump_file) + fprintf (dump_file, "cmpeqsi_t: found preceeding and in insn %d\n", + INSN_UID (op.insn)); + + if (!(arith_reg_operand (XEXP (op.set_src, 0), SImode) + && (arith_reg_operand (XEXP (op.set_src, 1), SImode) + || CONST_INT_P (XEXP (op.set_src, 1))))) + FAIL; + + /* Assume that the operands of the andsi insn are compatible with the + operands of the tstsi_t insn, which is generally the case. */ + if (dump_file) + fprintf (dump_file, "cmpeqsi_t: replacing with tstsi_t\n"); + emit_insn (gen_tstsi_t (XEXP (op.set_src, 0), XEXP (op.set_src, 1))); + DONE; + } + + /* Converting HImode into tests against 0xFFFF tends to increase the code + size, as it will create constant pool entries. Disable it for now. */ + const bool enable_himode = false; + + /* FIXME: try to keep the (eq (reg) (const_int 0)). Even if the zero + extended reg is used after this insn, if we know that _before_ the zero + extension the value was loaded via sign extending mem load, we can just + use the value of the mem load directly. */ + sh_extending_set_of_reg eop = sh_find_extending_set_of_reg (operands[0], + curr_insn); + + if (eop.ext_code != UNKNOWN + && (eop.from_mode == QImode || (eop.from_mode == HImode && enable_himode)) + && eop.can_use_as_unextended_reg () + && !reg_used_between_p (operands[0], eop.insn, curr_insn)) + { + /* Bypass the sign/zero extension and test against the bit mask, but + only if it's the only use of the sign/zero extracted value. + Otherwise we'd be introducing new constants in the pool. */ + if (dump_file) + fprintf (dump_file, "cmpeqsi_t: bypassing sign/zero extension in " + "insn %d and using tstsi_t\n", INSN_UID (op.insn)); + + emit_insn (gen_tstsi_t ( + eop.use_as_unextended_reg (curr_insn), + GEN_INT (eop.from_mode == QImode ? 0xFF : 0xFFFF))); + DONE; + } + + if (dump_file) + fprintf (dump_file, "cmpeqsi_t: nothing optimized\n"); + FAIL; +}) + ;; FIXME: For some reason, on SH4A and SH2A combine fails to simplify this ;; pattern by itself. What this actually does is: ;; x == 0: (1 >> 0-0) & 1 = 1 @@ -845,6 +1006,61 @@ cmp/ge %1,%0" [(set_attr "type" "mt_group")]) +;; Recombine a cmp/pz followed by a nott into a shll. +;; On non-SH2A recombine a cmp/pz followed by a movrt into shll-movt. +;; On SH2A cmp/pz-movrt is slightly better, as it does not mutate the input. +(define_split + [(set (reg:SI T_REG) + (ge:SI (match_operand:SI 0 "arith_reg_operand") (const_int 0)))] + + "TARGET_SH1 && can_create_pseudo_p () && optimize + && !sh_in_recog_treg_set_expr ()" + [(const_int 0)] +{ + if (dump_file) + fprintf (dump_file, "cmpgesi_t: trying to optimize for const_int 0\n"); + + rtx_insn* i = next_nonnote_insn_bb (curr_insn); + + if (dump_file) + { + fprintf (dump_file, "cmpgesi_t: following insn is \n"); + print_rtl_single (dump_file, i); + fprintf (dump_file, "\n"); + } + + if (sh_is_nott_insn (i)) + { + if (dump_file) + fprintf (dump_file, + "cmpgesi_t: replacing (cmp/pz, nott) with (shll)\n"); + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[0])); + set_insn_deleted (i); + DONE; + } + + /* On non-SH2A negc is used as movrt replacement, which sets T = 1. + Thus we can remove it only if T is marked as dead afterwards. */ + if (rtx dest_reg = !TARGET_SH2A + && sh_reg_dead_or_unused_after_insn (i, T_REG) + ? sh_movrt_set_dest (i) : NULL) + { + if (dump_file) + fprintf (dump_file, + "cmpgesi_t: replacing (cmp/pz, movrt) with (shll, movt)\n"); + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[0])); + add_reg_note (emit_insn (gen_movt (dest_reg, get_t_reg_rtx ())), + REG_DEAD, get_t_reg_rtx ()); + set_insn_deleted (i); + DONE; + } + + if (dump_file) + fprintf (dump_file, "cmpgesi_t: nothing optimized\n"); + + FAIL; +}) + ;; FIXME: This is actually wrong. There is no way to literally move a ;; general reg to t reg. Luckily, it seems that this pattern will be only ;; used when the general reg is known be either '0' or '1' during combine. @@ -853,13 +1069,18 @@ ;; and invert the dependent logic. (define_insn "*negtstsi" [(set (reg:SI T_REG) (match_operand:SI 0 "arith_reg_operand" "r"))] - "TARGET_SH1" + "TARGET_SH1 && !sh_in_recog_treg_set_expr ()" "cmp/pl %0" [(set_attr "type" "mt_group")]) ;; Some integer sign comparison patterns can be realized with the div0s insn. ;; div0s Rm,Rn T = (Rm >> 31) ^ (Rn >> 31) -(define_insn "cmp_div0s_0" +;; +;; The 'cmp_div0s' pattern is our canonical form, into which all the other +;; variations are converted. The negative forms will split into a trailing +;; nott sequence, which will be eliminated either by the +;; 'any_treg_expr_to_reg' pattern, or by the 'sh_treg_combine' pass. +(define_insn "cmp_div0s" [(set (reg:SI T_REG) (lshiftrt:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "%r") (match_operand:SI 1 "arith_reg_operand" "r")) @@ -868,79 +1089,79 @@ "div0s %0,%1" [(set_attr "type" "arith")]) -(define_insn "cmp_div0s_1" +(define_insn_and_split "*cmp_div0s_1" [(set (reg:SI T_REG) - (lt:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "%r") - (match_operand:SI 1 "arith_reg_operand" "r")) - (const_int 0)))] - "TARGET_SH1" - "div0s %0,%1" - [(set_attr "type" "arith")]) - -(define_insn_and_split "*cmp_div0s_0" - [(set (match_operand:SI 0 "arith_reg_dest" "") - (lshiftrt:SI (xor:SI (match_operand:SI 1 "arith_reg_operand" "") - (match_operand:SI 2 "arith_reg_operand" "")) - (const_int 31))) - (clobber (reg:SI T_REG))] - "TARGET_SH1" + (xor:SI (ge:SI (match_operand:SI 0 "arith_reg_operand") + (const_int 0)) + (ge:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 0))))] + "TARGET_SH1 && can_create_pseudo_p ()" "#" "&& 1" [(set (reg:SI T_REG) - (lshiftrt:SI (xor:SI (match_dup 1) (match_dup 2)) (const_int 31))) - (set (match_dup 0) (reg:SI T_REG))]) + (lshiftrt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 31)))]) -(define_insn "*cmp_div0s_0" +(define_insn_and_split "*cmp_div0s_2" [(set (reg:SI T_REG) - (eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand") (const_int 31)) - (ge:SI (match_operand:SI 1 "arith_reg_operand" "r") + (ge:SI (match_operand:SI 1 "arith_reg_operand") (const_int 0))))] - "TARGET_SH1" - "div0s %0,%1" - [(set_attr "type" "arith")]) + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (reg:SI T_REG) + (lshiftrt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 31)))]) -(define_insn_and_split "*cmp_div0s_1" - [(set (match_operand:SI 0 "arith_reg_dest" "") - (ge:SI (xor:SI (match_operand:SI 1 "arith_reg_operand" "") - (match_operand:SI 2 "arith_reg_operand" "")) - (const_int 0))) - (clobber (reg:SI T_REG))] - "TARGET_SH1" +(define_insn_and_split "*cmp_div0s_3" + [(set (reg:SI T_REG) + (eq:SI (ge:SI (match_operand:SI 0 "arith_reg_operand") + (const_int 0)) + (ge:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 0))))] + "TARGET_SH1 && can_create_pseudo_p ()" "#" - "&& can_create_pseudo_p ()" - [(const_int 0)] -;; We have to go through the movnegt expander here which will handle the -;; SH2A vs non-SH2A cases. -{ - emit_insn (gen_cmp_div0s_1 (operands[1], operands[2])); - emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ())); - DONE; -}) + "&& 1" + [(set (reg:SI T_REG) + (lshiftrt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 31))) + (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))]) -(define_insn_and_split "*cmp_div0s_1" +(define_insn_and_split "*cmp_div0s_4" [(set (reg:SI T_REG) - (ge:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "") - (match_operand:SI 1 "arith_reg_operand" "")) + (ge:SI (xor:SI (match_operand:SI 0 "arith_reg_operand") + (match_operand:SI 1 "arith_reg_operand")) (const_int 0)))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" - "&& can_create_pseudo_p ()" - [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 0) (match_dup 1)) - (const_int 0))) + "&& 1" + [(set (reg:SI T_REG) + (lshiftrt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 31))) (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))]) -(define_insn_and_split "*cmp_div0s_1" +(define_insn_and_split "*cmp_div0s_5" + [(set (reg:SI T_REG) + (xor:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand") + (const_int 31)) + (ge:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 0))))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (reg:SI T_REG) + (lshiftrt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 31))) + (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))]) + +(define_insn_and_split "*cmp_div0s_6" [(set (reg:SI T_REG) (eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand") (const_int 31)) (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") (const_int 31))))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" - "&& can_create_pseudo_p ()" - [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 0) (match_dup 1)) - (const_int 0))) + "&& 1" + [(set (reg:SI T_REG) + (lshiftrt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 31))) (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))]) ;; ------------------------------------------------------------------------- @@ -1059,43 +1280,6 @@ (label_ref (match_dup 2)) (pc)))]) -;; Compare and branch combine patterns for div0s comparisons. -(define_insn_and_split "*cbranch_div0s" - [(set (pc) - (if_then_else (lt (xor:SI (match_operand:SI 0 "arith_reg_operand" "") - (match_operand:SI 1 "arith_reg_operand" "")) - (const_int 0)) - (label_ref (match_operand 2)) - (pc))) - (clobber (reg:SI T_REG))] - "TARGET_SH1" - "#" - "&& 1" - [(set (reg:SI T_REG) - (lt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 0))) - (set (pc) - (if_then_else (ne (reg:SI T_REG) (const_int 0)) - (label_ref (match_dup 2)) - (pc)))]) - -(define_insn_and_split "*cbranch_div0s" - [(set (pc) - (if_then_else (ge (xor:SI (match_operand:SI 0 "arith_reg_operand" "") - (match_operand:SI 1 "arith_reg_operand" "")) - (const_int 0)) - (label_ref (match_operand 2)) - (pc))) - (clobber (reg:SI T_REG))] - "TARGET_SH1" - "#" - "&& 1" - [(set (reg:SI T_REG) - (lt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 0))) - (set (pc) - (if_then_else (eq (reg:SI T_REG) (const_int 0)) - (label_ref (match_dup 2)) - (pc)))]) - ;; ------------------------------------------------------------------------- ;; SImode unsigned integer comparisons ;; ------------------------------------------------------------------------- @@ -1809,50 +1993,71 @@ ;; A simplified version of the addc insn, where the exact value of the ;; T bit doesn't matter. This is easier for combine to pick up. ;; We allow a reg or 0 for one of the operands in order to be able to -;; do 'reg + T' sequences. Reload will load the constant 0 into the reg -;; as needed. -;; FIXME: The load of constant 0 should be split out before reload, or else -;; it will be difficult to hoist or combine the constant load. -(define_insn "*addc" - [(set (match_operand:SI 0 "arith_reg_dest" "=r") - (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0") - (match_operand:SI 2 "arith_reg_or_0_operand" "r")) - (match_operand:SI 3 "t_reg_operand" ""))) +;; do 'reg + T' sequences. +(define_insn_and_split "*addc" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "arith_reg_or_0_operand")) + (match_operand 3 "treg_set_expr"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" - "addc %2,%0" - [(set_attr "type" "arith")]) + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + sh_treg_insns ti = sh_split_treg_set_expr (operands[3], curr_insn); + if (ti.has_trailing_nott ()) + { + if (operands[2] == const0_rtx) + { + /* op1 + 0 + (1 - T) = op1 + 1 - T = op1 - (-1) - T */ + remove_insn (ti.trailing_nott ()); + emit_insn (gen_subc (operands[0], operands[1], + force_reg (SImode, GEN_INT (-1)))); + DONE; + } + else if (!TARGET_SH2A) + { + /* op1 + op2 + (1 - T) = op1 - (0 - op2 - 1) - T = op1 - ~op2 - T + On SH2A keep the nott insn, because nott-addc sequence doesn't + mutate the inputs. */ + remove_insn (ti.trailing_nott ()); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_one_cmplsi2 (tmp, operands[2])); + emit_insn (gen_subc (operands[0], operands[1], tmp)); + DONE; + } + } -;; Split 'reg + reg + 1' into a sett addc sequence, as it can be scheduled -;; better, if the sett insn can be done early. -(define_insn_and_split "*addc_r_r_1" - [(set (match_operand:SI 0 "arith_reg_dest" "") - (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "") - (match_operand:SI 2 "arith_reg_operand" "")) - (const_int 1))) + emit_insn (gen_addc (operands[0], operands[1], + force_reg (SImode, operands[2]))); + DONE; +}) + +(define_insn_and_split "*addc" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (plus:SI (match_operand 1 "treg_set_expr") + (match_operand:SI 2 "arith_reg_operand")) + (match_operand:SI 3 "arith_reg_operand"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" "&& 1" - [(set (reg:SI T_REG) (const_int 1)) - (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 2)) - (reg:SI T_REG))) + [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3)) + (match_dup 1))) (clobber (reg:SI T_REG))])]) -;; Left shifts by one are usually done with an add insn to avoid T_REG -;; clobbers. Thus addc can also be used to do something like '(x << 1) + 1'. -(define_insn_and_split "*addc_2r_1" +(define_insn_and_split "*addc" [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (mult:SI (match_operand:SI 1 "arith_reg_operand") - (const_int 2)) - (const_int 1))) + (plus:SI (match_operand 1 "treg_set_expr") + (plus:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "arith_reg_operand")))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" "&& 1" - [(set (reg:SI T_REG) (const_int 1)) - (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 1)) - (reg:SI T_REG))) + [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3)) + (match_dup 1))) (clobber (reg:SI T_REG))])]) ;; Sometimes combine will try to do 'reg + (0-reg) + 1' if the *addc pattern @@ -1881,152 +2086,77 @@ ;; can be scheduled much better since the load of the constant can be ;; done earlier, before any comparison insns that store the result in ;; the T bit. +;; However, avoid things like 'reg + 1', which would expand into a +;; 3 insn sequence, instead of add #imm8. (define_insn_and_split "*addc_t_r" [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (match_operand:SI 1 "t_reg_operand") + (plus:SI (match_operand 1 "treg_set_expr_not_const01") (match_operand:SI 2 "arith_reg_operand"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" "&& 1" - [(parallel [(set (match_dup 0) - (plus:SI (plus:SI (match_dup 2) (const_int 0)) - (match_dup 1))) + [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (const_int 0)) + (match_dup 1))) (clobber (reg:SI T_REG))])]) (define_insn_and_split "*addc_r_t" [(set (match_operand:SI 0 "arith_reg_dest") (plus:SI (match_operand:SI 1 "arith_reg_operand") - (match_operand:SI 2 "t_reg_operand"))) + (match_operand 2 "treg_set_expr_not_const01"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" "&& 1" - [(parallel [(set (match_dup 0) - (plus:SI (plus:SI (match_dup 1) (const_int 0)) - (match_dup 2))) + [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (const_int 0)) + (match_dup 2))) (clobber (reg:SI T_REG))])]) -;; Use shlr-addc to do 'reg + (reg & 1)'. -(define_insn_and_split "*addc_r_lsb" +;; Convert '2 * reg + T' into 'reg + reg + T'. +(define_insn_and_split "*addc_2r_t" [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand") - (const_int 1)) - (match_operand:SI 2 "arith_reg_operand"))) - (clobber (reg:SI T_REG))] - "TARGET_SH1" - "#" - "&& can_create_pseudo_p ()" - [(parallel [(set (match_dup 0) (plus:SI (reg:SI T_REG) (match_dup 2))) - (clobber (reg:SI T_REG))])] -{ - emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[1])); -}) - -;; Use shlr-addc to do 'reg + reg + (reg & 1)'. -(define_insn_and_split "*addc_r_r_lsb" - [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand") - (const_int 1)) - (match_operand:SI 2 "arith_reg_operand")) - (match_operand:SI 3 "arith_reg_operand"))) - (clobber (reg:SI T_REG))] - "TARGET_SH1" - "#" - "&& can_create_pseudo_p ()" - [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3)) - (reg:SI T_REG))) - (clobber (reg:SI T_REG))])] -{ - emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[1])); -}) - -;; Canonicalize 'reg + (reg & 1) + reg' into 'reg + reg + (reg & 1)'. -(define_insn_and_split "*addc_r_lsb_r" - [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand") - (const_int 1)) - (plus:SI (match_operand:SI 2 "arith_reg_operand") - (match_operand:SI 3 "arith_reg_operand")))) - (clobber (reg:SI T_REG))] - "TARGET_SH1" - "#" - "&& can_create_pseudo_p ()" - [(parallel [(set (match_dup 0) - (plus:SI (plus:SI (and:SI (match_dup 1) (const_int 1)) - (match_dup 2)) - (match_dup 3))) - (clobber (reg:SI T_REG))])]) - -;; Canonicalize '2 * reg + (reg & 1)' into 'reg + reg + (reg & 1)'. -(define_insn_and_split "*addc_2r_lsb" - [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand") - (const_int 1)) + (plus:SI (match_operand 1 "treg_set_expr") (mult:SI (match_operand:SI 2 "arith_reg_operand") (const_int 2)))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" - "&& can_create_pseudo_p ()" - [(parallel [(set (match_dup 0) - (plus:SI (plus:SI (and:SI (match_dup 1) (const_int 1)) - (match_dup 2)) - (match_dup 2))) + "&& 1" + [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 2)) + (match_dup 1))) (clobber (reg:SI T_REG))])]) -;; Use shll-addc to do 'reg + ((unsigned int)reg >> 31)'. -(define_insn_and_split "*addc_r_msb" +(define_insn_and_split "*addc_2r_t" [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") - (const_int 31)) - (match_operand:SI 2 "arith_reg_operand"))) + (plus:SI (mult:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 2)) + (match_operand 2 "treg_set_expr"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" - "&& can_create_pseudo_p ()" - [(parallel [(set (match_dup 0) (plus:SI (reg:SI T_REG) (match_dup 2))) - (clobber (reg:SI T_REG))])] -{ - emit_insn (gen_shll (gen_reg_rtx (SImode), operands[1])); -}) + "&& 1" + [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 1)) + (match_dup 2))) + (clobber (reg:SI T_REG))])]) -;; Use shll-addc to do 'reg + reg + ((unsigned int)reg >> 31)'. -(define_insn_and_split "*addc_r_r_msb" +;; Convert '(op2 + T) - op3' into 'op2 + (-op3) + T' +(define_insn_and_split "*addc_negreg_t" [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (plus:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") - (const_int 31)) - (match_operand:SI 2 "arith_reg_operand")) - (match_operand:SI 3 "arith_reg_operand"))) + (minus:SI (plus:SI (match_operand 1 "treg_set_expr") + (match_operand:SI 2 "arith_reg_operand")) + (match_operand:SI 3 "arith_reg_operand"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" - "&& can_create_pseudo_p ()" - [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3)) - (reg:SI T_REG))) + "&& 1" + [(set (match_dup 4) (neg:SI (match_dup 3))) + (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 4)) + (match_dup 1))) (clobber (reg:SI T_REG))])] { - emit_insn (gen_shll (gen_reg_rtx (SImode), operands[1])); + operands[4] = gen_reg_rtx (SImode); }) -;; Canonicalize '2 * reg + ((unsigned int)reg >> 31)' -;; into 'reg + reg + (reg & 1)'. -(define_insn_and_split "*addc_2r_msb" - [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (mult:SI (match_operand:SI 1 "arith_reg_operand") - (const_int 2)) - (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand") - (const_int 31)))) - (clobber (reg:SI T_REG))] - "TARGET_SH1" - "#" - "&& can_create_pseudo_p ()" - [(parallel [(set (match_dup 0) - (plus:SI (plus:SI (lshiftrt:SI (match_dup 2) (const_int 31)) - (match_dup 1)) - (match_dup 1))) - (clobber (reg:SI T_REG))])]) - (define_expand "addsi3" [(set (match_operand:SI 0 "arith_reg_operand" "") (plus:SI (match_operand:SI 1 "arith_operand" "") @@ -2169,15 +2299,60 @@ ;; We allow a reg or 0 for one of the operands in order to be able to ;; do 'reg - T' sequences. Reload will load the constant 0 into the reg ;; as needed. -(define_insn "*subc" - [(set (match_operand:SI 0 "arith_reg_dest" "=r") - (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") - (match_operand:SI 2 "arith_reg_or_0_operand" "r")) - (match_operand:SI 3 "t_reg_operand" ""))) +(define_insn_and_split "*subc" + [(set (match_operand:SI 0 "arith_reg_dest") + (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "arith_reg_or_0_operand")) + (match_operand 3 "treg_set_expr"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" - "subc %2,%0" - [(set_attr "type" "arith")]) + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + sh_treg_insns ti = sh_split_treg_set_expr (operands[3], curr_insn); + if (ti.has_trailing_nott ()) + { + if (operands[2] == const0_rtx) + { + /* op1 - (1 - T) = op1 - 1 + T = op1 + (-1) + T */ + remove_insn (ti.trailing_nott ()); + emit_insn (gen_addc (operands[0], operands[1], + force_reg (SImode, GEN_INT (-1)))); + DONE; + } + else if (!TARGET_SH2A) + { + /* op1 - op2 - (1 - T) = op1 + (0 - op2 - 1) + T = op1 + ~op2 + T + On SH2A keep the nott insn, because nott-subc sequence doesn't + mutate the inputs. */ + remove_insn (ti.trailing_nott ()); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_one_cmplsi2 (tmp, operands[2])); + emit_insn (gen_addc (operands[0], operands[1], tmp)); + DONE; + } + } + + emit_insn (gen_subc (operands[0], operands[1], + force_reg (SImode, operands[2]))); + DONE; +}) + +;; Convert reg - T - reg = reg - reg - T +(define_insn_and_split "*subc" + [(set (match_operand:SI 0 "arith_reg_dest") + (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand 2 "treg_set_expr")) + (match_operand:SI 3 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (minus:SI (minus:SI (match_dup 1) (match_dup 3)) + (match_dup 2))) + (clobber (reg:SI T_REG))])]) ;; Split reg - reg - 1 into a sett subc sequence, as it can be scheduled ;; better, if the sett insn can be done early. @@ -2187,13 +2362,12 @@ (plus:SI (not:SI (match_operand:SI 1 "arith_reg_operand" "")) (match_operand:SI 2 "arith_reg_operand" ""))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" "&& 1" - [(set (reg:SI T_REG) (const_int 1)) - (parallel [(set (match_dup 0) + [(parallel [(set (match_dup 0) (minus:SI (minus:SI (match_dup 2) (match_dup 1)) - (reg:SI T_REG))) + (const_int 1))) (clobber (reg:SI T_REG))])]) ;; Split 'reg - T' into 'reg - 0 - T' to utilize the subc insn. @@ -2208,12 +2382,14 @@ ;; can be scheduled much better since the load of the constant can be ;; done earlier, before any comparison insns that store the result in ;; the T bit. +;; However, avoid things like 'reg - 1', which would expand into a +;; 3 insn sequence, instead of add #imm8. (define_insn_and_split "*subc" [(set (match_operand:SI 0 "arith_reg_dest" "") (minus:SI (match_operand:SI 1 "arith_reg_operand" "") - (match_operand:SI 2 "t_reg_operand" ""))) + (match_operand 2 "treg_set_expr_not_const01"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" "&& 1" [(parallel [(set (match_dup 0) @@ -2221,6 +2397,67 @@ (match_dup 2))) (clobber (reg:SI T_REG))])]) +;; Convert +;; (1 - T) - op2 = 1 - op2 - T +(define_insn_and_split "*subc_negt_reg" + [(set (match_operand:SI 0 "arith_reg_dest") + (minus:SI (match_operand 1 "treg_set_expr_not_const01") + (match_operand:SI 2 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + sh_treg_insns ti = sh_split_treg_set_expr (operands[1], curr_insn); + if (ti.remove_trailing_nott ()) + { + /* (1 - T) - op2 = 1 - op2 - T */ + emit_insn (gen_subc (operands[0], + force_reg (SImode, GEN_INT (1)), operands[2])); + } + else + { + /* T - op2: use movt,sub sequence. */ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_movt (tmp, get_t_reg_rtx ())); + emit_insn (gen_subsi3 (operands[0], tmp, operands[2])); + } + DONE; +}) + +;; Convert +;; op1 - (1 - T) + op3 = op1 - 1 + T + op3 +;; (op1 - T) + op3 = op1 - (-op3) - T +(define_insn_and_split "*subc_negreg_t" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand 2 "treg_set_expr")) + (match_operand:SI 3 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + sh_treg_insns ti = sh_split_treg_set_expr (operands[2], curr_insn); + if (ti.remove_trailing_nott ()) + { + /* op1 - (1 - T) + op3 = (op1 - 1) + op3 + T */ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_addsi3 (tmp, operands[1], GEN_INT (-1))); + emit_insn (gen_addc (operands[0], tmp, operands[3])); + } + else + { + /* (op1 - T) + op3' = 'op1 - (-op3) - T */ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_negsi2 (tmp, operands[3])); + emit_insn (gen_subc (operands[0], operands[1], tmp)); + } + DONE; +}) + (define_insn "*subsi3_internal" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") @@ -3995,10 +4232,38 @@ label: offset += 8 - GET_MODE_SIZE (inmode); operands[5] = gen_rtx_SUBREG (inmode, operands[0], offset); }) - -;; ------------------------------------------------------------------------- -;; Shifts and rotates -;; ------------------------------------------------------------------------- + +;; ------------------------------------------------------------------------- +;; Shifts and rotates +;; ------------------------------------------------------------------------- + +;; Let combine see that we can get the MSB and LSB into the T bit +;; via shll and shlr. This allows it to plug it into insns that can have +;; the T bit as an input (e.g. addc). +;; FIXME: On SH2A use bld #0,Rn instead of shlr to avoid mutating the input. +(define_insn_and_split "*reg_lsb_t" + [(set (reg:SI T_REG) + (and:SI (match_operand:SI 0 "arith_reg_operand") + (const_int 1)))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[0])); +}) + +(define_insn_and_split "*reg_msb_t" + [(set (reg:SI T_REG) + (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand") + (const_int 31)))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[0])); +}) (define_expand "rotldi3" [(set (match_operand:DI 0 "arith_reg_dest" "") @@ -4236,20 +4501,46 @@ label: ;; directly. Otherwise we have to insert a shift in between. (define_insn_and_split "*rotcr" [(set (match_operand:SI 0 "arith_reg_dest") - (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_or_0_operand") (match_operand:SI 2 "const_int_operand")) - (ashift:SI (match_operand:SI 3 "arith_reg_or_t_reg_operand") + (ashift:SI (match_operand 3 "arith_reg_or_treg_set_expr") (const_int 31)))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" - "&& can_create_pseudo_p ()" + "&& 1" [(const_int 0)] { + rtx prev_set_t_insn = NULL_RTX; + + if (!arith_reg_operand (operands[3], SImode)) + { + sh_treg_insns ti = sh_split_treg_set_expr (operands[3], curr_insn); + if (!ti.was_treg_operand ()) + prev_set_t_insn = ti.first_insn (); + + operands[3] = get_t_reg_rtx (); + + if (TARGET_SH2A && ti.has_trailing_nott () && operands[1] == const0_rtx) + { + /* Convert to a movrt, rotr sequence. */ + remove_insn (ti.trailing_nott ()); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_movnegt (tmp, get_t_reg_rtx ())); + emit_insn (gen_rotrsi3_1 (operands[0], tmp)); + DONE; + } + } + + if (operands[1] == const0_rtx) + { + operands[1] = gen_reg_rtx (SImode); + emit_insn (gen_movt (operands[1], get_t_reg_rtx ())); + } + if (INTVAL (operands[2]) > 1) { const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1); - rtx prev_set_t_insn = NULL_RTX; rtx tmp_t_reg = NULL_RTX; /* If we're going to emit a shift sequence that clobbers the T_REG, @@ -4260,7 +4551,8 @@ label: if (sh_lshrsi_clobbers_t_reg_p (shift_count) && ! sh_dynamicalize_shift_p (shift_count)) { - prev_set_t_insn = prev_nonnote_insn_bb (curr_insn); + if (prev_set_t_insn == NULL) + prev_set_t_insn = prev_nonnote_insn_bb (curr_insn); /* Skip the nott insn, which was probably inserted by the splitter of *rotcr_neg_t. Don't use one of the recog functions @@ -4320,9 +4612,9 @@ label: ;; it so that it will try the pattern above. (define_split [(set (match_operand:SI 0 "arith_reg_dest") - (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand") + (ior:SI (ashift:SI (match_operand 1 "arith_reg_or_treg_set_expr") (const_int 31)) - (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand") + (lshiftrt:SI (match_operand:SI 2 "arith_reg_or_0_operand") (match_operand:SI 3 "const_int_operand"))))] "TARGET_SH1 && can_create_pseudo_p ()" [(parallel [(set (match_dup 0) @@ -4421,7 +4713,7 @@ label: [(set (match_operand:SI 0 "arith_reg_dest") (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand") (match_operand:SI 2 "const_int_operand")) - (match_operand:SI 3 "t_reg_operand"))) + (match_operand 3 "treg_set_expr"))) (clobber (reg:SI T_REG))] "TARGET_SH1" "#" @@ -4429,7 +4721,11 @@ label: [(parallel [(set (match_dup 0) (ior:SI (ashift:SI (match_dup 1) (match_dup 2)) (and:SI (match_dup 3) (const_int 1)))) - (clobber (reg:SI T_REG))])]) + (clobber (reg:SI T_REG))])] +{ + sh_split_treg_set_expr (operands[3], curr_insn); + operands[3] = get_t_reg_rtx (); +}) (define_insn_and_split "*rotcl" [(set (match_operand:SI 0 "arith_reg_dest") @@ -4484,20 +4780,51 @@ label: emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3])); }) +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand 2 "const_int_operand")) + (zero_extract:SI (match_operand:SI 3 "arith_reg_operand") + (const_int 1) + (match_operand 4 "const_int_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 1) (match_dup 2)) + (and:SI (match_dup 5) (const_int 1)))) + (clobber (reg:SI T_REG))])] +{ + if (TARGET_SH2A && satisfies_constraint_K03 (operands[4])) + { + /* On SH2A we can use the bld insn to zero extract a single bit + into the T bit. */ + operands[5] = get_t_reg_rtx (); + emit_insn (gen_bldsi_reg (operands[3], operands[4])); + } + else + { + /* If we can't use the bld insn we have to emit a tst + nott sequence + to get the extracted bit into the T bit. + This will probably be worse than pre-shifting the operand. */ + operands[5] = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3 (operands[5], operands[3], operands[4])); + } +}) + ;; rotcr combine bridge pattern which will make combine try out more ;; complex patterns. (define_insn_and_split "*rotcr" [(set (match_operand:SI 0 "arith_reg_dest") - (ashift:SI (match_operand:SI 1 "t_reg_operand") (const_int 31)))] - "TARGET_SH1" + (ashift:SI (match_operand 1 "treg_set_expr") (const_int 31)))] + "TARGET_SH1 && can_create_pseudo_p ()" "#" "&& 1" - [(set (match_dup 0) (match_dup 1)) - (parallel [(set (match_dup 0) - (ior:SI (lshiftrt:SI (match_dup 0) (const_int 1)) + [(parallel [(set (match_dup 0) + (ior:SI (lshiftrt:SI (const_int 0) (const_int 1)) (ashift:SI (match_dup 1) (const_int 31)))) - (set (reg:SI T_REG) - (and:SI (match_dup 0) (const_int 1)))])]) + (clobber (reg:SI T_REG))])]) (define_insn_and_split "*rotcr" [(set (match_operand:SI 0 "arith_reg_dest") @@ -4999,6 +5326,21 @@ label: DONE; }) +;; If the shift amount is changed by combine it will try to plug the +;; use on the symbol of the library function and the PR clobber. +(define_insn_and_split "*ashrsi2_31" + [(set (match_operand:SI 0 "arith_reg_dest") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 31))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (use (match_operand:SI 2 "symbol_ref_operand"))] + "TARGET_SH1" + "#" + "&& 1" + [(parallel [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31))) + (clobber (reg:SI T_REG))])]) + (define_insn "ashrsi3_d" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") @@ -5618,14 +5960,20 @@ label: ;; T bit doesn't matter. This is easier for combine to pick up. ;; Notice that '0 - x - 1' is the same as '~x', thus we don't specify ;; extra patterns for this case. -(define_insn "*negc" +(define_insn_and_split "*negc" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r")) - (match_operand:SI 2 "t_reg_operand" ""))) + (match_operand 2 "treg_set_expr"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" - "negc %1,%0" - [(set_attr "type" "arith")]) + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + sh_split_treg_set_expr (operands[2], curr_insn); + emit_insn (gen_negc (operands[0], operands[1])); + DONE; +}); (define_insn "*negdi_media" [(set (match_operand:DI 0 "arith_reg_dest" "=r") @@ -6101,11 +6449,14 @@ label: ;; the displacement value to zero. However, doing so too early results in ;; missed opportunities for other optimizations such as post-inc or index ;; addressing loads. -;; Although the 'zero_extend_movu_operand' predicate does not allow simple -;; register addresses (an address without a displacement, index, post-inc), -;; zero-displacement addresses might be generated during reload, wich are -;; simplified to simple register addresses in turn. Thus, we have to -;; provide the Sdd and Sra alternatives in the patterns. +;; We don't allow the zero extending loads to match during RTL expansion +;; (see zero_extend_operand predicate), as this would pessimize other +;; optimization opportunities such as bit extractions of unsigned mems, +;; where the zero extraction is irrelevant. If the zero extracting mem +;; loads are emitted early it will be more difficult to change them back +;; to sign extending loads (which are preferred). +;; The combine pass will also try to combine mem loads and zero extends, +;; which is prevented by 'sh_legitimate_combined_insn'. (define_insn "*zero_extendsi2_disp_mem" [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") (zero_extend:SI @@ -10672,25 +11023,25 @@ label: ;; On SH the thread pointer is kept in the GBR. ;; These patterns are usually expanded from the respective built-in functions. (define_expand "get_thread_pointersi" - [(set (match_operand:SI 0 "register_operand") (reg:SI GBR_REG))] + [(set (match_operand:SI 0 "arith_reg_dest") (reg:SI GBR_REG))] "TARGET_SH1") ;; The store_gbr insn can also be used on !TARGET_SH1 for doing TLS accesses. (define_insn "store_gbr" - [(set (match_operand:SI 0 "register_operand" "=r") (reg:SI GBR_REG))] + [(set (match_operand:SI 0 "arith_reg_dest" "=r") (reg:SI GBR_REG))] "" "stc gbr,%0" [(set_attr "type" "tls_load")]) (define_expand "set_thread_pointersi" [(set (reg:SI GBR_REG) - (unspec_volatile:SI [(match_operand:SI 0 "register_operand")] + (unspec_volatile:SI [(match_operand:SI 0 "arith_reg_operand")] UNSPECV_GBR))] "TARGET_SH1") (define_insn "load_gbr" [(set (reg:SI GBR_REG) - (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] + (unspec_volatile:SI [(match_operand:SI 0 "arith_reg_operand" "r")] UNSPECV_GBR))] "TARGET_SH1" "ldc %0,gbr" @@ -10708,7 +11059,7 @@ label: ;; zero displacement for some strange reason. (define_insn "*mov_gbr_load" - [(set (match_operand:QIHISI 0 "register_operand" "=z") + [(set (match_operand:QIHISI 0 "arith_reg_dest" "=z") (mem:QIHISI (plus:SI (reg:SI GBR_REG) (match_operand:QIHISI 1 "gbr_displacement"))))] "TARGET_SH1" @@ -10716,14 +11067,14 @@ label: [(set_attr "type" "load")]) (define_insn "*mov_gbr_load" - [(set (match_operand:QIHISI 0 "register_operand" "=z") + [(set (match_operand:QIHISI 0 "arith_reg_dest" "=z") (mem:QIHISI (reg:SI GBR_REG)))] "TARGET_SH1" "mov. @(0,gbr),%0" [(set_attr "type" "load")]) (define_insn "*mov_gbr_load" - [(set (match_operand:SI 0 "register_operand" "=z") + [(set (match_operand:SI 0 "arith_reg_dest" "=z") (sign_extend:SI (mem:QIHI (plus:SI (reg:SI GBR_REG) (match_operand:QIHI 1 "gbr_displacement")))))] @@ -10732,7 +11083,7 @@ label: [(set_attr "type" "load")]) (define_insn "*mov_gbr_load" - [(set (match_operand:SI 0 "register_operand" "=z") + [(set (match_operand:SI 0 "arith_reg_dest" "=z") (sign_extend:SI (mem:QIHI (reg:SI GBR_REG))))] "TARGET_SH1" "mov. @(0,gbr),%0" @@ -10759,7 +11110,7 @@ label: ;; Do not match this insn during or after reload because it can't be split ;; afterwards. (define_insn_and_split "*movdi_gbr_load" - [(set (match_operand:DI 0 "register_operand") + [(set (match_operand:DI 0 "arith_reg_dest") (match_operand:DI 1 "gbr_address_mem"))] "TARGET_SH1 && can_create_pseudo_p ()" "#" @@ -11562,7 +11913,7 @@ label: (use (match_operand:SI 2 "arith_reg_operand" "r"))] "TARGET_SH1" "negc %2,%0" - "&& 1" + "&& !sh_in_recog_treg_set_expr ()" [(const_int 0)] { if (sh_split_movrt_negc_to_movt_xor (curr_insn, operands)) @@ -11583,7 +11934,7 @@ label: (clobber (reg:SI T_REG))] "TARGET_SH1 && ! TARGET_SH2A" "#" - "&& 1" + "&& !sh_in_recog_treg_set_expr ()" [(const_int 0)] { if (sh_split_movrt_negc_to_movt_xor (curr_insn, operands)) @@ -11617,61 +11968,107 @@ label: ;; T = 0: 0x80000000 -> reg ;; T = 1: 0x7FFFFFFF -> reg ;; This works because 0 - 0x80000000 = 0x80000000. -;; -;; This insn must not match again after it has been split into the constant -;; load and negc. This is accomplished by the special negc insn that -;; has a use on the operand. (define_insn_and_split "*mov_t_msb_neg" [(set (match_operand:SI 0 "arith_reg_dest") (minus:SI (const_int -2147483648) ;; 0x80000000 - (match_operand 1 "t_reg_operand"))) + (match_operand 1 "treg_set_expr"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p ()" "#" - "&& can_create_pseudo_p ()" - [(set (match_dup 2) (const_int -2147483648)) - (parallel [(set (match_dup 0) (minus:SI (neg:SI (match_dup 2)) - (reg:SI T_REG))) - (clobber (reg:SI T_REG)) - (use (match_dup 2))])] + "&& 1" + [(const_int 0)] { - operands[2] = gen_reg_rtx (SImode); -}) + if (negt_reg_operand (operands[1], VOIDmode)) + { + emit_insn (gen_addc (operands[0], + force_reg (SImode, const0_rtx), + force_reg (SImode, GEN_INT (2147483647)))); + DONE; + } -(define_insn "*mov_t_msb_neg_negc" - [(set (match_operand:SI 0 "arith_reg_dest" "=r") - (minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r")) - (match_operand:SI 2 "t_reg_operand"))) - (clobber (reg:SI T_REG)) - (use (match_dup 1))] - "TARGET_SH1" - "negc %1,%0" - [(set_attr "type" "arith")]) + sh_treg_insns ti = sh_split_treg_set_expr (operands[1], curr_insn); + if (ti.remove_trailing_nott ()) + emit_insn (gen_addc (operands[0], + force_reg (SImode, const0_rtx), + force_reg (SImode, GEN_INT (2147483647)))); + else + emit_insn (gen_negc (operands[0], + force_reg (SImode, GEN_INT (-2147483648LL)))); + DONE; +}) +;; 0x7fffffff + T +;; 0x7fffffff + (1-T) = 0 - 0x80000000 - T (define_insn_and_split "*mov_t_msb_neg" [(set (match_operand:SI 0 "arith_reg_dest") - (plus:SI (match_operand 1 "negt_reg_operand") + (plus:SI (match_operand 1 "treg_set_expr") (const_int 2147483647))) ;; 0x7fffffff (clobber (reg:SI T_REG))] "TARGET_SH1" "#" "&& can_create_pseudo_p ()" - [(parallel [(set (match_dup 0) - (minus:SI (const_int -2147483648) (reg:SI T_REG))) - (clobber (reg:SI T_REG))])]) + [(const_int 0)] +{ + if (negt_reg_operand (operands[1], VOIDmode)) + { + emit_insn (gen_negc (operands[0], + force_reg (SImode, GEN_INT (-2147483648LL)))); + DONE; + } + + sh_treg_insns ti = sh_split_treg_set_expr (operands[1], curr_insn); + if (ti.remove_trailing_nott ()) + emit_insn (gen_negc (operands[0], + force_reg (SImode, GEN_INT (-2147483648LL)))); + else + emit_insn (gen_addc (operands[0], + force_reg (SImode, const0_rtx), + force_reg (SImode, GEN_INT (2147483647)))); + DONE; +}) (define_insn_and_split "*mov_t_msb_neg" [(set (match_operand:SI 0 "arith_reg_dest") - (if_then_else:SI (match_operand 1 "t_reg_operand") - (const_int 2147483647) ;; 0x7fffffff - (const_int -2147483648))) ;; 0x80000000 + (if_then_else:SI (match_operand 1 "treg_set_expr") + (match_operand 2 "const_int_operand") + (match_operand 3 "const_int_operand"))) (clobber (reg:SI T_REG))] - "TARGET_SH1" + "TARGET_SH1 && can_create_pseudo_p () + && ((INTVAL (operands[2]) == -2147483648LL + && INTVAL (operands[3]) == 2147483647LL) + || (INTVAL (operands[2]) == 2147483647LL + && INTVAL (operands[3]) == -2147483648LL))" "#" - "&& can_create_pseudo_p ()" - [(parallel [(set (match_dup 0) - (minus:SI (const_int -2147483648) (reg:SI T_REG))) - (clobber (reg:SI T_REG))])]) + "&& 1" + [(const_int 0)] +{ + sh_treg_insns ti = sh_split_treg_set_expr (operands[1], curr_insn); + + if (INTVAL (operands[2]) == -2147483648LL) + { + if (ti.remove_trailing_nott ()) + emit_insn (gen_negc (operands[0], + force_reg (SImode, GEN_INT (-2147483648LL)))); + else + emit_insn (gen_addc (operands[0], + force_reg (SImode, const0_rtx), + force_reg (SImode, operands[3]))); + DONE; + } + else if (INTVAL (operands[2]) == 2147483647LL) + { + if (ti.remove_trailing_nott ()) + emit_insn (gen_addc (operands[0], + force_reg (SImode, const0_rtx), + force_reg (SImode, GEN_INT (2147483647LL)))); + else + emit_insn (gen_negc (operands[0], + force_reg (SImode, GEN_INT (-2147483648LL)))); + DONE; + } + else + gcc_unreachable (); +}) ;; The *negnegt pattern helps the combine pass to figure out how to fold ;; an explicit double T bit negation. @@ -11683,24 +12080,29 @@ label: "" [(const_int 0)]) -;; Store T bit as all zeros or ones in a reg. -(define_insn "mov_neg_si_t" - [(set (match_operand:SI 0 "arith_reg_dest" "=r") - (neg:SI (match_operand 1 "t_reg_operand" "")))] - "TARGET_SH1" - "subc %0,%0" - [(set_attr "type" "arith")]) - -;; Store negated T bit as all zeros or ones in a reg. -;; Use the following sequence: +;; Store (negated) T bit as all zeros or ones in a reg. ;; subc Rn,Rn ! Rn = Rn - Rn - T; T = T ;; not Rn,Rn ! Rn = 0 - Rn -(define_split - [(set (match_operand:SI 0 "arith_reg_dest" "") - (neg:SI (match_operand 1 "negt_reg_operand" "")))] +(define_insn_and_split "mov_neg_si_t" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (neg:SI (match_operand 1 "treg_set_expr")))] "TARGET_SH1" - [(set (match_dup 0) (neg:SI (reg:SI T_REG))) - (set (match_dup 0) (not:SI (match_dup 0)))]) +{ + gcc_assert (t_reg_operand (operands[1], VOIDmode)); + return "subc %0,%0"; +} + "&& can_create_pseudo_p () && !t_reg_operand (operands[1], VOIDmode)" + [(const_int 0)] +{ + sh_treg_insns ti = sh_split_treg_set_expr (operands[1], curr_insn); + emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ())); + + if (ti.remove_trailing_nott ()) + emit_insn (gen_one_cmplsi2 (operands[0], operands[0])); + + DONE; +} + [(set_attr "type" "arith")]) ;; The *movtt pattern eliminates redundant T bit to T bit moves / tests. (define_insn_and_split "*movtt" @@ -11719,6 +12121,10 @@ label: ;; This requires an additional pseudo. The SH specific sh_treg_combine RTL ;; pass will look for this insn. Disallow using it if pseudos can't be ;; created. +;; Don't split the nott inside the splitting of a treg_set_expr, or else +;; surrounding insns might not see and recombine it. Defer the splitting +;; of the nott until after the whole insn containing the treg_set_expr +;; has been split. (define_insn_and_split "nott" [(set (reg:SI T_REG) (xor:SI (match_operand:SI 0 "t_reg_operand") (const_int 1)))] @@ -11727,7 +12133,7 @@ label: gcc_assert (TARGET_SH2A); return "nott"; } - "! TARGET_SH2A && can_create_pseudo_p ()" + "!TARGET_SH2A && can_create_pseudo_p () && !sh_in_recog_treg_set_expr ()" [(set (match_dup 0) (reg:SI T_REG)) (set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))] { @@ -11868,6 +12274,66 @@ label: DONE; }) +;; Sometimes the T bit result of insns is needed in normal registers. +;; Instead of open coding all the pattern variations, use the treg_set_expr +;; predicate to match any T bit output insn and split it out after. +;; This pattern should be below all other related patterns so that it is +;; considered as a last resort option during matching. This allows +;; overriding it with special case patterns. +(define_insn_and_split "any_treg_expr_to_reg" + [(set (match_operand:SI 0 "arith_reg_dest") + (match_operand 1 "treg_set_expr")) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& !sh_in_recog_treg_set_expr ()" + [(const_int 0)] +{ + if (dump_file) + fprintf (dump_file, "splitting any_treg_expr_to_reg\n"); + + if (t_reg_operand (operands[1], VOIDmode)) + { + if (dump_file) + fprintf (dump_file, "t_reg_operand: emitting movt\n"); + emit_insn (gen_movt (operands[0], get_t_reg_rtx ())); + DONE; + } + if (negt_reg_operand (operands[1], VOIDmode)) + { + if (dump_file) + fprintf (dump_file, "negt_reg_operand: emitting movrt\n"); + emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ())); + DONE; + } + + /* If the split out insns ended with a nott, emit a movrt sequence, + otherwise a normal movt. */ + sh_treg_insns ti = sh_split_treg_set_expr (operands[1], curr_insn); + rtx_insn* i = NULL; + if (ti.remove_trailing_nott ()) + { + /* Emit this same insn_and_split again. However, the next time it + is split, it will emit the actual negc/movrt insn. This gives + other surrounding insns the chance to see the trailing movrt. */ + if (dump_file) + fprintf (dump_file, + "any_treg_expr_to_reg: replacing trailing nott with movrt\n"); + i = emit_insn (gen_any_treg_expr_to_reg ( + operands[0], gen_rtx_XOR (SImode, get_t_reg_rtx (), + const1_rtx))); + } + else + { + i = emit_insn (gen_movt (operands[0], get_t_reg_rtx ())); + if (dump_file) + fprintf (dump_file, "any_treg_expr_to_reg: appending movt\n"); + } + + add_reg_note (i, REG_UNUSED, get_t_reg_rtx ()); + DONE; +}) + ;; ------------------------------------------------------------------------- ;; Instructions to cope with inline literal tables ;; ------------------------------------------------------------------------- @@ -13416,7 +13882,7 @@ label: else if (REG_P (operands[3]) && satisfies_constraint_M (operands[1])) { - emit_insn (gen_bld_reg (operands[3], const0_rtx)); + emit_insn (gen_bldsi_reg (operands[3], const0_rtx)); emit_insn (gen_bst_m2a (operands[0], operands[2])); DONE; } @@ -13558,6 +14024,193 @@ label: FAIL; }) +;; ------------------------------------------------------------------------- +;; Extract negated single bit and zero extend it. +;; Generally we don't care about the exact xor const_int value, as long +;; as it contains the extracted bit. For simplicity, the pattern variations +;; that convert everything into the primary '*neg_zero_extract_0' pattern use +;; a xor const_int -1 value. + +(define_insn_and_split "*neg_zero_extract_0" + [(set (reg:SI T_REG) + (zero_extract:SI (xor:QIHISI (match_operand:QIHISI 0 "arith_reg_operand") + (match_operand 1 "const_int_operand")) + (const_int 1) + (match_operand 2 "const_int_operand")))] + "TARGET_SH1 && can_create_pseudo_p () + && INTVAL (operands[1]) & (1LL << INTVAL (operands[2]))" + "#" + "&& 1" + [(set (reg:SI T_REG) (eq:SI (and:SI (match_dup 0) (match_dup 2)) + (const_int 0)))] +{ + if (INTVAL (operands[2]) == 31 && mode == SImode) + { + /* Use cmp/pz to extract bit 31 into the T bit. */ + emit_insn (gen_cmpgesi_t (operands[0], const0_rtx)); + DONE; + } + + operands[2] = GEN_INT ((1 << INTVAL (operands[2]))); + if (GET_MODE (operands[0]) != SImode) + operands[0] = simplify_gen_subreg (SImode, operands[0], mode, 0); +}) + +(define_insn_and_split "*neg_zero_extract_1" + [(set (reg:SI T_REG) + (and:SI (not:SI (match_operand:SI 0 "arith_reg_operand")) + (const_int 1)))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) (zero_extract:SI (xor:SI (match_dup 0) (const_int -1)) + (const_int 1) (const_int 0)))]) + +;; x & (1 << n) == 0: 0x00000000 + 1 = 1 +;; x & (1 << n) != 0: 0xFFFFFFFF + 1 = 0 +(define_insn_and_split "*neg_zero_extract_2" + [(set (reg:SI T_REG) + (plus:SI (sign_extract:SI (match_operand:QIHISI 0 "arith_reg_operand") + (const_int 1) + (match_operand 1 "const_int_operand")) + (const_int 1)))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (reg:SI T_REG) (zero_extract:SI (xor:SI (match_dup 0) (const_int -1)) + (const_int 1) (match_dup 1)))]) + +;; (signed)x >> 31 + 1 = (x >= 0) ^ 1 +(define_insn_and_split "*neg_zero_extract_3" + [(set (reg:SI T_REG) + (plus:SI (ashiftrt:SI (match_operand:SI 0 "arith_reg_operand") + (const_int 31)) + (const_int 1)))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (reg:SI T_REG) (zero_extract:SI (xor:SI (match_dup 0) (const_int -1)) + (const_int 1) (const_int 31)))]) + +;; This is required for some bit patterns of DImode subregs. +;; It looks like combine gets confused by the DImode right shift and fails +;; to simplify things. +(define_insn_and_split "*neg_zero_extract_4" + [(set (reg:SI T_REG) + (and:SI (and:SI + (lshiftrt:SI (xor:SI (match_operand:SI 0 "arith_reg_operand") + (match_operand 1 "const_int_operand")) + (match_operand 2 "const_int_operand")) + (not:SI (ashift:SI (match_operand:SI 3 "arith_reg_operand") + (match_operand 4 "const_int_operand")))) + (const_int 1)))] + "TARGET_SH1 && can_create_pseudo_p () + && INTVAL (operands[4]) > 0 + && INTVAL (operands[1]) & (1LL << INTVAL (operands[2]))" + "#" + "&& 1" + [(set (reg:SI T_REG) (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1)) + (const_int 1) (match_dup 2)))]) + +(define_insn_and_split "*neg_zero_extract_5" + [(set (reg:SI T_REG) + (and:SI (not:SI (subreg:SI + (lshiftrt:DI (match_operand:DI 0 "arith_reg_operand") + (match_operand 1 "const_int_operand")) + 0)) + (const_int 1)))] + "TARGET_SH1 && TARGET_LITTLE_ENDIAN && can_create_pseudo_p () + && INTVAL (operands[1]) < 32" + "#" + "&& 1" + [(set (reg:SI T_REG) (zero_extract:SI (xor:SI (match_dup 0) (const_int -1)) + (const_int 1) (match_dup 1)))] +{ + operands[0] = gen_lowpart (SImode, operands[0]); +}) + +(define_insn_and_split "*neg_zero_extract_6" + [(set (reg:SI T_REG) + (and:SI (not:SI (subreg:SI + (lshiftrt:DI (match_operand:DI 0 "arith_reg_operand") + (match_operand 1 "const_int_operand")) + 4)) + (const_int 1)))] + "TARGET_SH1 && TARGET_BIG_ENDIAN && can_create_pseudo_p () + && INTVAL (operands[1]) < 32" + "#" + "&& 1" + [(set (reg:SI T_REG) (zero_extract:SI (xor:SI (match_dup 0) (const_int -1)) + (const_int 1) (match_dup 1)))] +{ + operands[0] = gen_lowpart (SImode, operands[0]); +}) + +;; ------------------------------------------------------------------------- +;; Extract single bit and zero extend it. +;; All patterns store the result bit in the T bit, although that is not +;; always possible to do with a single insn and a nott must be appended. +;; The trailing nott will be optimized away in most cases. E.g. if the +;; extracted bit is fed into a branch condition, the condition can be +;; inverted and the nott will be eliminated. +;; FIXME: In cases where the trailing nott can't be eliminated, try to +;; convert it into a (not, tst) sequence, which could be better on non-SH2A. + +;; On SH2A the 'bld_reg' insn will be used if the bit position fits. +(define_insn_and_split "*zero_extract_0" + [(set (reg:SI T_REG) + (zero_extract:SI (match_operand:QIHISI 0 "arith_reg_operand") + (const_int 1) + (match_operand 1 "const_int_operand")))] + "TARGET_SH1 && can_create_pseudo_p () + && !(TARGET_SH2A && satisfies_constraint_K03 (operands[1]))" + "#" + "&& 1" + [(set (reg:SI T_REG) (eq:SI (and:SI (match_dup 0) (match_dup 1)) + (const_int 0))) + (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))] +{ + if (INTVAL (operands[1]) == 31 && mode == SImode) + { + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[0])); + DONE; + } + + operands[1] = GEN_INT (1 << INTVAL (operands[1])); + if (GET_MODE (operands[0]) != SImode) + operands[0] = simplify_gen_subreg (SImode, operands[0], mode, 0); +}) + +;; This is required for some bit patterns of DImode subregs. +;; It looks like combine gets confused by the DImode right shift and fails +;; to simplify things. +(define_insn_and_split "*zero_extract_1" + [(set (reg:SI T_REG) + (subreg:SI (zero_extract:DI (match_operand:SI 0 "arith_reg_operand") + (const_int 1) + (match_operand 1 "const_int_operand")) + 0))] + "TARGET_SH1 && TARGET_LITTLE_ENDIAN && can_create_pseudo_p () + && INTVAL (operands[1]) < 32" + "#" + "&& 1" + [(set (reg:SI T_REG) + (zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1)))]) + +(define_insn_and_split "*zero_extract_2" + [(set (reg:SI T_REG) + (subreg:SI (zero_extract:DI (match_operand:SI 0 "arith_reg_operand") + (const_int 1) + (match_operand 1 "const_int_operand")) + 4))] + "TARGET_SH1 && TARGET_BIG_ENDIAN && can_create_pseudo_p () + && INTVAL (operands[1]) < 32" + "#" + "&& 1" + [(set (reg:SI T_REG) + (zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1)))]) + +;; ------------------------------------------------------------------------- ;; SH2A instructions for bitwise operations. ;; FIXME: Convert multiple instruction insns to insn_and_split. ;; FIXME: Use iterators to fold at least and,xor,or insn variations. @@ -13652,17 +14305,9 @@ label: [(set_attr "length" "4,4")]) ;; Store a specified bit of the LSB 8 bits of a register in the T bit. -(define_insn "bld_reg" - [(set (reg:SI T_REG) - (zero_extract:SI (match_operand:SI 0 "arith_reg_operand" "r") - (const_int 1) - (match_operand 1 "const_int_operand" "K03")))] - "TARGET_SH2A && satisfies_constraint_K03 (operands[1])" - "bld %1,%0") - -(define_insn "*bld_regqi" +(define_insn "bld_reg" [(set (reg:SI T_REG) - (zero_extract:SI (match_operand:QI 0 "arith_reg_operand" "r") + (zero_extract:SI (match_operand:QIHISI 0 "arith_reg_operand" "r") (const_int 1) (match_operand 1 "const_int_operand" "K03")))] "TARGET_SH2A && satisfies_constraint_K03 (operands[1])" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c0bc34711ea..8b3dfc41b64 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,34 @@ +2015-01-24 Oleg Endo + + PR target/49263 + PR target/53987 + PR target/64345 + PR target/59533 + PR target/52933 + PR target/54236 + PR target/51244 + * gcc.target/sh/pr64345-1.c: New. + * gcc.target/sh/pr64345-2.c: New. + * gcc.target/sh/pr59533-1.c: New. + * gcc.target/sh/pr49263.c: Adjust matching of expected insns. + * gcc.target/sh/pr52933-2.c: Likewise. + * gcc.target/sh/pr54089-1.c: Likewise. + * gcc.target/sh/pr54236-1.c: Likewise. + * gcc.target/sh/pr51244-20-sh2a.c: Likewise. + * gcc.target/sh/pr49263-1.c: Remove xfails. + * gcc.target/sh/pr49263-2.c: Likewise. + * gcc.target/sh/pr49263-3.c: Likewise. + * gcc.target/sh/pr53987-1.c: Likewise. + * gcc.target/sh/pr52933-1.c: Adjust matching of expected insns. + (test_24, test_25, test_26, test_27, test_28, test_29, test_30): New. + * gcc.target/sh/pr51244-12.c: Adjust matching of expected insns. + (test05, test06, test07, test08, test09, test10, test11, test12): New. + * gcc.target/sh/pr54236-3.c: Adjust matching of expected insns. + (test_002, test_003, test_004, test_005, test_006, test_007, test_008, + test_009): New. + * gcc.target/sh/pr51244-4.c: Adjust matching of expected insns. + (test_02): New. + 2015-01-24 Tom de Vries * gcc.target/i386/funcspec-5.c: Replace avx512 with avx512f. diff --git a/gcc/testsuite/gcc.target/sh/pr49263-1.c b/gcc/testsuite/gcc.target/sh/pr49263-1.c index fd3cb4bbd28..92a0cb0e038 100644 --- a/gcc/testsuite/gcc.target/sh/pr49263-1.c +++ b/gcc/testsuite/gcc.target/sh/pr49263-1.c @@ -3,9 +3,9 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ /* { dg-final { scan-assembler-not "and" } } */ -/* { dg-final { scan-assembler-not "bclr" { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "extu" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "exts" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-not "bclr" } } */ +/* { dg-final { scan-assembler-times "extu" 1 } } */ +/* { dg-final { scan-assembler-times "exts" 1 } } */ #define make_func(__valtype__, __valget__, __tstval__, __suff__)\ int test_imm_##__tstval__##__suff__ (__valtype__ val) \ diff --git a/gcc/testsuite/gcc.target/sh/pr49263-2.c b/gcc/testsuite/gcc.target/sh/pr49263-2.c index 3385161e390..a6318fcd432 100644 --- a/gcc/testsuite/gcc.target/sh/pr49263-2.c +++ b/gcc/testsuite/gcc.target/sh/pr49263-2.c @@ -3,13 +3,12 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ /* { dg-final { scan-assembler-not "and" } } */ -/* { dg-final { scan-assembler-not "exts" { xfail *-*-* } } } */ - +/* { dg-final { scan-assembler-not "exts" } } */ /* { dg-final { scan-assembler-times "tst\t#127,r0" 2 } } */ -/* { dg-final { scan-assembler-times "tst\t#255,r0" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "65407" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "-129" 2 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "extu" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "tst\t#255,r0" 1 } } */ +/* { dg-final { scan-assembler-times "65407" 1 } } */ +/* { dg-final { scan-assembler-times "-129" 2 } } */ +/* { dg-final { scan-assembler-times "extu" 1 } } */ int test_00 (unsigned char x) diff --git a/gcc/testsuite/gcc.target/sh/pr49263-3.c b/gcc/testsuite/gcc.target/sh/pr49263-3.c index 1edfe876715..0fbed940dde 100644 --- a/gcc/testsuite/gcc.target/sh/pr49263-3.c +++ b/gcc/testsuite/gcc.target/sh/pr49263-3.c @@ -2,18 +2,18 @@ is shifted by a constant amount. */ /* { dg-do compile } */ /* { dg-options "-O2" } */ -/* { dg-final { scan-assembler-not "and|shl|sha|exts" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-not "and|shl|sha|exts" } } */ -/* { dg-final { scan-assembler-times "tst\t#7,r0" 3 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "tst\t#12,r0" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "tst\t#24,r0" 6 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "tst\t#13,r0" 3 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "tst\t#242,r0" 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "tst\t#7,r0" 3 } } */ +/* { dg-final { scan-assembler-times "tst\t#12,r0" 1 } } */ +/* { dg-final { scan-assembler-times "tst\t#24,r0" 6 } } */ +/* { dg-final { scan-assembler-times "tst\t#13,r0" 3 } } */ +/* { dg-final { scan-assembler-times "tst\t#242,r0" 3 } } */ /* { dg-final { scan-assembler-times "tst\t#252,r0" 1 } } */ -/* { dg-final { scan-assembler-times "tst\t#64,r0" 6 { target { ! sh2a } xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "tst\t#64,r0" 4 { target { sh2a } xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "bld\t#6" 2 { target { sh2a } xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "tst\t#64,r0" 6 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#64,r0" 4 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld\t#6" 2 { target { sh2a } } } } */ int test_00 (unsigned char* x, int y, int z) diff --git a/gcc/testsuite/gcc.target/sh/pr49263.c b/gcc/testsuite/gcc.target/sh/pr49263.c index 783d865592c..2393e531fe3 100644 --- a/gcc/testsuite/gcc.target/sh/pr49263.c +++ b/gcc/testsuite/gcc.target/sh/pr49263.c @@ -5,6 +5,8 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ /* { dg-final { scan-assembler-not "and" } } */ +/* { dg-final { scan-assembler-not "extu" } } */ +/* { dg-final { scan-assembler-not "exts" } } */ #define make_func(__valtype__, __valget__, __tstval__, __suff__)\ int test_imm_##__tstval__##__suff__ (__valtype__ val) \ diff --git a/gcc/testsuite/gcc.target/sh/pr51244-12.c b/gcc/testsuite/gcc.target/sh/pr51244-12.c index da941015cc8..ae856f8c0ff 100644 --- a/gcc/testsuite/gcc.target/sh/pr51244-12.c +++ b/gcc/testsuite/gcc.target/sh/pr51244-12.c @@ -4,8 +4,9 @@ /* { dg-do compile } */ /* { dg-options "-O1" } */ /* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */ -/* { dg-final { scan-assembler-times "negc" 10 } } */ -/* { dg-final { scan-assembler-not "movrt|#-1|add|sub" } } */ +/* { dg-final { scan-assembler-times "negc" 15 } } */ +/* { dg-final { scan-assembler-times "addc" 3 } } */ +/* { dg-final { scan-assembler-not "movrt|#-1|add\t|sub\t|movt" } } */ int test00 (int a, int b, int* x) @@ -66,3 +67,56 @@ test04_inv (int a) { return ((a & 0x55) != 0) ? 0x80000000 : 0x7FFFFFFF; } + +int +test05 (int a, int b) +{ + /* 1x addc */ + return a != b ? 0x7FFFFFFF : 0x80000000; +} + +int +test06 (char a) +{ + return ((a & 0x03) == 0) ? 0x7FFFFFFF : 0x80000000; +} + +int +test07 (char a) +{ + return ((a & 0x80) == 0) ? 0x7FFFFFFF : 0x80000000; +} + +int +test08 (char a) +{ + return ((a & 1) == 0) ? 0x7FFFFFFF : 0x80000000; +} + +int +test09 (int a) +{ + /* 1x cmp/pz, 1x addc */ + return a < 0 ? 0x7FFFFFFF : 0x80000000; +} + +int +test10 (int a) +{ + /* 1x cmp/pz, 1x negc */ + return a >= 0 ? 0x7FFFFFFF : 0x80000000; +} + +int +test11 (int a) +{ + /* 1x cmp/pl, 1x negc */ + return a > 0 ? 0x7FFFFFFF : 0x80000000; +} + +int +test12 (int a) +{ + /* 1x cmp/pl, 1x addc */ + return a <= 0 ? 0x7FFFFFFF : 0x80000000; +} diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20-sh2a.c b/gcc/testsuite/gcc.target/sh/pr51244-20-sh2a.c index 3208f932add..82ed9e0218b 100644 --- a/gcc/testsuite/gcc.target/sh/pr51244-20-sh2a.c +++ b/gcc/testsuite/gcc.target/sh/pr51244-20-sh2a.c @@ -4,8 +4,8 @@ /* { dg-options "-O2" } */ /* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m2a*" } } */ /* { dg-final { scan-assembler-times "tst" 6 } } */ -/* { dg-final { scan-assembler-times "movt" 1 } } */ -/* { dg-final { scan-assembler-times "nott" 1 } } */ +/* { dg-final { scan-assembler-not "movt" } } */ +/* { dg-final { scan-assembler-times "nott" 2 } } */ /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */ /* { dg-final { scan-assembler-times "cmp/hi" 4 } } */ /* { dg-final { scan-assembler-times "cmp/gt" 3 } } */ diff --git a/gcc/testsuite/gcc.target/sh/pr51244-4.c b/gcc/testsuite/gcc.target/sh/pr51244-4.c index a11429b15e4..ac967bffd8c 100644 --- a/gcc/testsuite/gcc.target/sh/pr51244-4.c +++ b/gcc/testsuite/gcc.target/sh/pr51244-4.c @@ -1,19 +1,31 @@ /* Check that storing the (negated) T bit as all ones or zeros in a reg - uses the subc instruction. On SH2A a sequence with the movrt instruction - is also OK instead of subc. */ + uses the subc instruction. */ /* { dg-do compile } */ /* { dg-options "-O1 -mbranch-cost=2" } */ /* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ -/* { dg-final { scan-assembler-not "movt|tst|negc" } } */ -/* { dg-final { scan-assembler "subc|movrt|neg|not" } } */ +/* { dg-final { scan-assembler-not "movt|tst|negc|movrt" } } */ +/* { dg-final { scan-assembler-times "subc" 3 } } */ +/* { dg-final { scan-assembler-times "not\t" 1 } } */ +/* { dg-final { scan-assembler-times "shll" 1 } } */ +/* { dg-final { scan-assembler-not "cmp/gt" } } */ -int test_00 (int x, int y) +int +test_00 (int x, int y) { + /* 1x subc, 1x not */ return x != y ? -1 : 0; } -int test_01 (int x, int y) +int +test_01 (int x, int y) { + /* 1x subc */ return x == y ? -1 : 0; } +int +test_02 (int x) +{ + /* 1x shll, 1x subc */ + return 0 <= x ? 0 : -1; +} diff --git a/gcc/testsuite/gcc.target/sh/pr52933-1.c b/gcc/testsuite/gcc.target/sh/pr52933-1.c index b65707ee40b..0c69027281d 100644 --- a/gcc/testsuite/gcc.target/sh/pr52933-1.c +++ b/gcc/testsuite/gcc.target/sh/pr52933-1.c @@ -5,8 +5,13 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ /* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */ -/* { dg-final { scan-assembler-times "div0s" 25 } } */ +/* { dg-final { scan-assembler-times "div0s" 32 } } */ /* { dg-final { scan-assembler-not "tst" } } */ +/* { dg-final { scan-assembler-not "not\t" } } */ +/* { dg-final { scan-assembler-not "nott" } } */ + +/* { dg-final { scan-assembler-times "negc" 9 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "movrt" 9 { target { sh2a } } } } */ typedef unsigned char bool; @@ -166,3 +171,45 @@ test_23 (int a, int b, int c, int d) /* Should emit 2x div0s. */ return ((a < 0) == (b < 0)) | ((c < 0) == (d < 0)); } + +bool +test_24 (int a, int b) +{ + return a >= 0 != b >= 0; +} + +bool +test_25 (int a, int b) +{ + return !(a < 0 != b < 0); +} + +int +test_26 (int a, int b, int c, int d) +{ + return a >= 0 != b >= 0 ? c : d; +} + +int +test_27 (int a, int b) +{ + return a >= 0 == b >= 0; +} + +int +test_28 (int a, int b, int c, int d) +{ + return a >= 0 == b >= 0 ? c : d; +} + +int +test_29 (int a, int b) +{ + return ((a >> 31) ^ (b >= 0)) & 1; +} + +int +test_30 (int a, int b) +{ + return ((a >> 31) ^ (b >> 31)) & 1; +} diff --git a/gcc/testsuite/gcc.target/sh/pr52933-2.c b/gcc/testsuite/gcc.target/sh/pr52933-2.c index 865cb370913..67e2753b217 100644 --- a/gcc/testsuite/gcc.target/sh/pr52933-2.c +++ b/gcc/testsuite/gcc.target/sh/pr52933-2.c @@ -6,7 +6,12 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mpretend-cmove" } */ /* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */ -/* { dg-final { scan-assembler-times "div0s" 25 } } */ +/* { dg-final { scan-assembler-times "div0s" 32 } } */ /* { dg-final { scan-assembler-not "tst" } } */ +/* { dg-final { scan-assembler-not "not\t" } } */ +/* { dg-final { scan-assembler-not "nott" } } */ + +/* { dg-final { scan-assembler-times "negc" 9 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "movrt" 9 { target { sh2a } } } } */ #include "pr52933-1.c" diff --git a/gcc/testsuite/gcc.target/sh/pr53987-1.c b/gcc/testsuite/gcc.target/sh/pr53987-1.c index a0394ac32d2..003dd4f49d2 100644 --- a/gcc/testsuite/gcc.target/sh/pr53987-1.c +++ b/gcc/testsuite/gcc.target/sh/pr53987-1.c @@ -6,7 +6,7 @@ /* { dg-final { scan-assembler-not "exts.b" } } */ /* { dg-final { scan-assembler-not "exts.w" } } */ /* { dg-final { scan-assembler-not "movu" } } */ -/* { dg-final { scan-assembler-not "tst\t#255" { xfail *-*-*} } } */ +/* { dg-final { scan-assembler-not "tst\t#255" } } */ int test_00 (unsigned char* x, char* xx, int y, int z) diff --git a/gcc/testsuite/gcc.target/sh/pr54089-1.c b/gcc/testsuite/gcc.target/sh/pr54089-1.c index 4dbd1b8e558..418ba694392 100644 --- a/gcc/testsuite/gcc.target/sh/pr54089-1.c +++ b/gcc/testsuite/gcc.target/sh/pr54089-1.c @@ -4,6 +4,8 @@ /* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ /* { dg-final { scan-assembler-times "rotcr" 24 } } */ /* { dg-final { scan-assembler-times "shll\t" 1 } } */ +/* { dg-final { scan-assembler-not "and\t#1" } } */ +/* { dg-final { scan-assembler-not "cmp/pl" } } */ typedef char bool; diff --git a/gcc/testsuite/gcc.target/sh/pr54236-1.c b/gcc/testsuite/gcc.target/sh/pr54236-1.c index f7568a92c59..b5c35464a6a 100644 --- a/gcc/testsuite/gcc.target/sh/pr54236-1.c +++ b/gcc/testsuite/gcc.target/sh/pr54236-1.c @@ -7,8 +7,12 @@ /* { dg-final { scan-assembler-times "addc" 4 } } */ /* { dg-final { scan-assembler-times "subc" 3 } } */ /* { dg-final { scan-assembler-times "sett" 5 } } */ -/* { dg-final { scan-assembler-times "negc" 1 } } */ -/* { dg-final { scan-assembler-not "movt" } } */ + +/* { dg-final { scan-assembler-times "negc" 2 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-not "movt" { target { ! sh2a } } } } */ + +/* { dg-final { scan-assembler-times "bld" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "movt" 1 { target { sh2a } } } } */ int test_00 (int a, int b, int c, int d) @@ -64,7 +68,8 @@ test_07 (int *vec) { /* Must not see a 'sett' or 'addc' here. This is a case where combine tries to produce - 'a + (0 - b) + 1' out of 'a - b + 1'. */ + 'a + (0 - b) + 1' out of 'a - b + 1'. + On non-SH2A there is a 'tst + negc', on SH2A a 'bld + movt'. */ int z = vec[0]; int vi = vec[1]; int zi = vec[2]; diff --git a/gcc/testsuite/gcc.target/sh/pr54236-3.c b/gcc/testsuite/gcc.target/sh/pr54236-3.c index fc0d111a9e9..933ece3639b 100644 --- a/gcc/testsuite/gcc.target/sh/pr54236-3.c +++ b/gcc/testsuite/gcc.target/sh/pr54236-3.c @@ -2,11 +2,13 @@ If everything works as expected we won't see any movt instructions in these cases. */ /* { dg-do compile } */ -/* { dg-options "-O1" } */ +/* { dg-options "-O2" } */ /* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ -/* { dg-final { scan-assembler-times "addc" 1 } } */ -/* { dg-final { scan-assembler-times "subc" 1 } } */ -/* { dg-final { scan-assembler-not "movt" } } */ +/* { dg-final { scan-assembler-times "addc" 4 } } */ +/* { dg-final { scan-assembler-times "subc" 5 } } */ +/* { dg-final { scan-assembler-times "movt" 1 } } */ +/* { dg-final { scan-assembler-times "sub\t" 1 } } */ +/* { dg-final { scan-assembler-times "neg\t" 2 } } */ int test_000 (int* x, unsigned int c) @@ -29,3 +31,66 @@ test_001 (int* x, unsigned int c) s -= ! (x[i] & 0x3000); return s; } + +int +test_002 (int a, int b, int c) +{ + /* 1x tst, 1x subc */ + return ((a & b) != 0) - c; +} + +int +test_003 (int a, int b, int c) +{ + /* 1x tst, 1x movt, 1x sub */ + return ((a & b) == 0) - c; +} + +int +test_004 (int a, int b, int c) +{ + /* 1x tst, 1x addc */ + return c - ((a & b) != 0); +} + +int +test_005 (int a, int b, int c) +{ + /* 1x shll, 1x subc */ + int x = a < 0; + return c - (b + x); +} + +int +test_006 (int a, int b, int c) +{ + /* 1x neg, 1x cmp/pl, 1x addc */ + int x = a > 0; + int y = b + x; + return y - c; +} + +int +test_007 (int a, int b, int c) +{ + /* 1x add #-1, 1x cmp/eq, 1x addc */ + int x = a != 1; + int y = b - x; + return c + y; +} + +int +test_008 (int a, int b, int c) +{ + /* 1x neg, 1x cmp/gt, 1x subc */ + int x = a > 1; + int y = b - x; + return c + y; +} + +int +test_009 (int a, int b, int c, int d) +{ + /* 1x div0s, 1x subc */ + return c - d - (a < 0 != b < 0); +} diff --git a/gcc/testsuite/gcc.target/sh/pr59533-1.c b/gcc/testsuite/gcc.target/sh/pr59533-1.c new file mode 100644 index 00000000000..f45e4b83d71 --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/pr59533-1.c @@ -0,0 +1,185 @@ +/* Check that the cmp/pz instruction is generated as expected. */ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ + +/* { dg-final { scan-assembler-times "shll" 1 } } */ +/* { dg-final { scan-assembler-times "movt" 5 } } */ +/* { dg-final { scan-assembler-times "rotcl" 1 } } */ +/* { dg-final { scan-assembler-times "and" 3 } } */ +/* { dg-final { scan-assembler-times "extu.b" 5 } } */ + +/* { dg-final { scan-assembler-times "cmp/pz" 22 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "addc" 3 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "subc" 12 { target { ! sh2a } } } } */ + +/* { dg-final { scan-assembler-times "cmp/pz" 20 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "addc" 5 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "subc" 10 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld" 2 { target { sh2a } } } } */ + +int +test_00 (unsigned char* a) +{ + /* 1x cmp/pz, 1x movt */ + return a[0] < 128; +} + +int +test_01 (unsigned char* a) +{ + /* 1x cmp/pz, 1x addc */ + return a[0] + (a[0] < 128); +} + +int +test_02 (unsigned char* a) +{ + /* 1x cmp/pz, 1x addc */ + return a[0] + ((a[0] & 0x80) == 0); +} + +int +test_03 (unsigned char* a) +{ + /* 1x cmp/pz, 1x subc + SH2A: 1x bld, 1x addc */ + return a[0] + (a[0] > 127); +} + +int +test_04 (unsigned char* a) +{ + /* 1x cmp/pz, 1x subc + SH2A: 1x bld, 1x addc */ + return a[0] + ((a[0] & 0x80) != 0); +} + +int +test_05 (unsigned char* a, int b, int c) +{ + /* 1x cmp/pz */ + if (a[0] < 128) + return c; + else + return b + 50; +} + +unsigned int +test_06 (unsigned int a) +{ + /* 1x cmp/pz, 1x movt */ + return ~a >> 31; +} + +int +test_07 (unsigned short* a) +{ + /* 1x cmp/pz */ + return a[0] < 32768; +} + +int +test_08 (unsigned short* a) +{ + /* 1x cmp/pz, 1x addc */ + return a[0] + (a[0] < 32768); +} + +unsigned int +test_09 (unsigned int a) +{ + /* 1x cmp/pz, 1x movt */ + return (a >> 31) ^ 1; +} + +unsigned int +test_10 (unsigned int a, unsigned int b) +{ + /* 1x cmp/pz, 1x rotcl */ + return (a << 1) | ((a >> 31) ^ 1); +} + +unsigned int +test_11 (int x) +{ + /* 1x cmp/pz, 1x subc */ + return ~(x >> 31); +} + +unsigned int +test_12 (int x) +{ + /* 1x cmp/pz, 1x subc */ + return 0xFFFFFFFF - (x >> 31); +} + +unsigned int +test_13 (int x) +{ + /* 1x cmp/pz, 1x subc, 1x add */ + return ~(x >> 31) << 1; +} + +unsigned int +test_14 (int x) +{ + /* 1x cmp/pz, 1x subc */ + return ~(x >> 31) >> 1; +} + +unsigned int +test_15 (int x) +{ + /* 1x cmp/pz, 1x subc */ + return ~(x >> 31) >> 31; +} + +unsigned int +test_16 (int x) +{ + /* 1x cmp/pz, 1x subc, 1x and */ + return ~(x >> 31) & 0xFF000000; +} + +unsigned int +test_17 (int x) +{ + /* 1x cmp/pz, 1x subc, 1x and */ + return ~(x >> 31) & 0x00FF0000; +} + +unsigned int +test_18 (int x) +{ + /* 1x cmp/pz, 1x subc, 1x and */ + return ~(x >> 31) & 0x0000FF00; +} + +unsigned int +test_19 (int x) +{ + /* 1x cmp/pz, 1x subc, 1x extu.b */ + return ~(x >> 31) & 0x000000FF; +} + +unsigned int +test_20 (int x, unsigned int y, unsigned int z) +{ + /* 1x shll */ + return ~(x >> 31) ? y : z; +} + +int +test_21 (int x) +{ + /* 1x cmp/pz, 1x subc */ + return x >= 0 ? 0xFFFFFFFF : 0; +} + +int +test_22 (int x) +{ + /* 1x cmp/pz, 1x movt */ + return (x >> 31) + 1; +} diff --git a/gcc/testsuite/gcc.target/sh/pr64345-1.c b/gcc/testsuite/gcc.target/sh/pr64345-1.c new file mode 100644 index 00000000000..16c37d79d88 --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/pr64345-1.c @@ -0,0 +1,97 @@ +/* Verify that single bit zero extractions emit the expected + insns sequences. */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "exts|extu|sha|shld|subc|xor" } } */ + +/* { dg-final { scan-assembler-times "tst" 716 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "cmp/pz" 6 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "shll\t" 6 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "shlr\t" 8 { target { ! sh2a } } } } */ + +/* { dg-final { scan-assembler-times "tst" 442 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld" 276 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "cmp/pz" 6 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "shll\t" 4 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "shlr\t" 8 { target { sh2a } } } } */ + +/* { dg-final { scan-assembler-times "and\t#1" 32 } } */ + +#define make_func(type,shift)\ + int test_##type##_##_shift_##shift##_0 (type x)\ + {\ + return ((x >> shift) ^ 1) & 1 ? -40 : -10;\ + }\ + int test_##type##_##_shift_##shift##_1 (type x)\ + {\ + return ((x >> shift) ^ 0) & 1 ? -40 : -10;\ + }\ + int test_##type##_##_shift_##shift##_2 (type x)\ + {\ + return ((x >> shift) ^ 1) & 1;\ + }\ + int test_##type##_##_shift_##shift##_3 (type x)\ + {\ + return ((x >> shift) ^ 0) & 1;\ + }\ + int test_##type##_##_shift_##shift##_4 (type x)\ + {\ + return (x & (1 << shift)) == 0;\ + }\ + int test_##type##_##_shift_##shift##_5 (type x)\ + {\ + return (x & (1 << shift)) != 0;\ + }\ +\ + int test_##type##_##_shift_##shift##_6 (type* x)\ + {\ + return ((*x >> shift) ^ 1) & 1 ? -40 : -10;\ + }\ + int test_##type##_##_shift_##shift##_7 (type* x)\ + {\ + return ((*x >> shift) ^ 0) & 1 ? -40 : -10;\ + }\ + int test_##type##_##_shift_##shift##_8 (type* x)\ + {\ + return ((*x >> shift) ^ 1) & 1;\ + }\ + int test_##type##_##_shift_##shift##_9 (type* x)\ + {\ + return ((*x >> shift) ^ 0) & 1;\ + }\ + int test_##type##_##_shift_##shift##_10 (type* x)\ + {\ + return (*x & (1 << shift)) == 0;\ + }\ + int test_##type##_##_shift_##shift##_11 (type* x)\ + {\ + return (*x & (1 << shift)) != 0;\ + } + +#define make_funcs(type)\ + make_func (type, 0)\ + make_func (type, 1)\ + make_func (type, 2)\ + make_func (type, 3)\ + make_func (type, 4)\ + make_func (type, 5)\ + make_func (type, 6)\ + make_func (type, 7) + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +typedef signed int int32_t; +typedef unsigned int uint32_t; +typedef signed long long int64_t; +typedef unsigned long long uint64_t; + +make_funcs (int8_t) +make_funcs (uint8_t) +make_funcs (int16_t) +make_funcs (uint16_t) +make_funcs (int32_t) +make_funcs (uint32_t) +make_funcs (int64_t) +make_funcs (uint64_t) diff --git a/gcc/testsuite/gcc.target/sh/pr64345-2.c b/gcc/testsuite/gcc.target/sh/pr64345-2.c new file mode 100644 index 00000000000..dba1552cb57 --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/pr64345-2.c @@ -0,0 +1,116 @@ +/* Verify that the TST insn is used to extract a zero extended + single bit into the T bit (for a following conditional branch) and into + a GP register. */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "exts|extu|sha|shld|subc|xor" } } */ + +/* { dg-final { scan-assembler-times "tst" 61 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#1," 1 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#2" 2 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#4" 2 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#8" 2 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#16" 2 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#32" 2 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#64" 2 { target { ! sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#128" 2 { target { ! sh2a } } } } */ + +/* { dg-final { scan-assembler-times "tst" 54 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#1," 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#2" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#4" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#8" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#16" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#32" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#64" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "tst\t#128" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld\t#1," 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld\t#2" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld\t#3" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld\t#4" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld\t#5" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld\t#6" 1 { target { sh2a } } } } */ +/* { dg-final { scan-assembler-times "bld\t#7" 1 { target { sh2a } } } } */ + +/* { dg-final { scan-assembler-times "and\t#1" 1 } } */ +/* { dg-final { scan-assembler-times "cmp/pz" 1 } } */ +/* { dg-final { scan-assembler-times "shll" 1 } } */ + +typedef struct +{ + unsigned int b31 : 1; + unsigned int b30 : 1; + unsigned int b29 : 1; + unsigned int b28 : 1; + unsigned int b27 : 1; + unsigned int b26 : 1; + unsigned int b25 : 1; + unsigned int b24 : 1; + unsigned int b23 : 1; + unsigned int b22 : 1; + unsigned int b21 : 1; + unsigned int b20 : 1; + unsigned int b19 : 1; + unsigned int b18 : 1; + unsigned int b17 : 1; + unsigned int b16 : 1; + unsigned int b15 : 1; + unsigned int b14 : 1; + unsigned int b13 : 1; + unsigned int b12 : 1; + unsigned int b11 : 1; + unsigned int b10 : 1; + unsigned int b9 : 1; + unsigned int b8 : 1; + unsigned int b7 : 1; + unsigned int b6 : 1; + unsigned int b5 : 1; + unsigned int b4 : 1; + unsigned int b3 : 1; + unsigned int b2 : 1; + unsigned int b1 : 1; + unsigned int b0 : 1; +} S; + +#define make_funcs(bitpos)\ + unsigned int test_b##bitpos##_0 (S s)\ + {\ + return s.b##bitpos;\ + }\ + unsigned int test_b##bitpos##_1 (S s)\ + {\ + return !s.b##bitpos;\ + }\ + +make_funcs (0) +make_funcs (1) +make_funcs (2) +make_funcs (3) +make_funcs (4) +make_funcs (5) +make_funcs (6) +make_funcs (7) +make_funcs (8) +make_funcs (9) +make_funcs (10) +make_funcs (11) +make_funcs (12) +make_funcs (13) +make_funcs (14) +make_funcs (15) +make_funcs (16) +make_funcs (17) +make_funcs (18) +make_funcs (19) +make_funcs (20) +make_funcs (21) +make_funcs (22) +make_funcs (23) +make_funcs (24) +make_funcs (25) +make_funcs (26) +make_funcs (27) +make_funcs (28) +make_funcs (29) +make_funcs (30) +make_funcs (31) -- 2.30.2