From 078134e5a412ee137f14f6f7d21a4aeae0d7e49c Mon Sep 17 00:00:00 2001 From: Georg-Johann Lay Date: Tue, 25 Oct 2016 08:35:17 +0000 Subject: [PATCH] New avt target pass to work around performance loss by PR fix. gcc/ New avt target pass to work around performance loss by PR fix. PR target/71676 PR target/71678 * config/avr/avr.md (casesi__sequence) [qi,hi]: New insn. (*cmp) [qi,qq,uqq,hi,hq,uhq,ha,uha]: Rename to cmp3. * config/avr/predicates.md (extend_operator): New. * config/avr/avr-passes.def (avr_pass_casesi): Register new pass. * config/avr/avr-protos.h (avr_casei_sequence_check_operands) (make_avr_pass_casesi): New prototypes. * config/avr/avr.c (print-rtl.h): Include it. (pass_data avr_pass_data_casesi): Data for new pass. (avr_pass_casesi): New class implementing rtl_opt_pass .avr-casesi. (make_avr_pass_casesi, avr_parallel_insn_from_insns) (avr_is_casesi_sequence, avr_casei_sequence_check_operands) (avr_optimize_casesi): New functions. gcc/testsuite/ PR target/71676 PR target/71678 * gcc.target/avr/pr71676-2.c: New test. From-SVN: r241504 --- gcc/ChangeLog | 19 ++ gcc/config/avr/avr-passes.def | 11 + gcc/config/avr/avr-protos.h | 2 + gcc/config/avr/avr.c | 350 +++++++++++++++++++++++ gcc/config/avr/avr.md | 80 +++++- gcc/config/avr/predicates.md | 4 + gcc/testsuite/ChangeLog | 6 + gcc/testsuite/gcc.target/avr/pr71676-2.c | 46 +++ 8 files changed, 510 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/avr/pr71676-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 183efac1744..43859363309 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2016-10-25 Georg-Johann Lay + + New avt target pass to work around performance loss by PR fix. + + PR target/71676 + PR target/71678 + * config/avr/avr.md (casesi__sequence) [qi,hi]: New insn. + (*cmp) [qi,qq,uqq,hi,hq,uhq,ha,uha]: Rename to cmp3. + * config/avr/predicates.md (extend_operator): New. + * config/avr/avr-passes.def (avr_pass_casesi): Register new pass. + * config/avr/avr-protos.h (avr_casei_sequence_check_operands) + (make_avr_pass_casesi): New prototypes. + * config/avr/avr.c (print-rtl.h): Include it. + (pass_data avr_pass_data_casesi): Data for new pass. + (avr_pass_casesi): New class implementing rtl_opt_pass .avr-casesi. + (make_avr_pass_casesi, avr_parallel_insn_from_insns) + (avr_is_casesi_sequence, avr_casei_sequence_check_operands) + (avr_optimize_casesi): New functions. + 2016-10-25 Georg-Johann Lay Pitchumani Sivanupandi diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def index 64d9ce4f183..b536a22ddd6 100644 --- a/gcc/config/avr/avr-passes.def +++ b/gcc/config/avr/avr-passes.def @@ -17,6 +17,17 @@ along with GCC; see the file COPYING3. If not see . */ +/* casesi uses a SImode switch index which is quite costly as most code will + work on HImode or QImode. The following pass runs right after .expand and + tries to fix such situations by operating on the original mode. This + reduces code size and register pressure. + + The assertion is that the code generated by casesi is unaltered and a + a sign-extend or zero-extend from QImode or HImode precedes the casesi + insns withaout any insns in between. */ + +INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi); + /* This avr-specific pass (re)computes insn notes, in particular REG_DEAD notes which are used by `avr.c::reg_unused_after' and branch offset computations. These notes must be correct, i.e. there must be no diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 1a5efa8fb31..9238f5163ab 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -129,6 +129,7 @@ extern bool avr_mem_memx_p (rtx); extern bool avr_load_libgcc_p (rtx); extern bool avr_xload_libgcc_p (machine_mode); extern rtx avr_eval_addr_attrib (rtx x); +extern bool avr_casei_sequence_check_operands (rtx *xop); static inline unsigned regmask (machine_mode mode, unsigned regno) @@ -158,6 +159,7 @@ namespace gcc { class context; } class rtl_opt_pass; extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *); +extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *); /* From avr-log.c */ diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index 1a49fdc0d96..7ad2b6422b0 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -48,6 +48,7 @@ #include "builtins.h" #include "context.h" #include "tree-pass.h" +#include "print-rtl.h" /* This file should be included last. */ #include "target-def.h" @@ -334,6 +335,41 @@ public: } }; // avr_pass_recompute_notes +static const pass_data avr_pass_data_casesi = +{ + RTL_PASS, // type + "", // name (will be patched) + OPTGROUP_NONE, // optinfo_flags + TV_DF_SCAN, // tv_id + 0, // properties_required + 0, // properties_provided + 0, // properties_destroyed + 0, // todo_flags_start + 0 // todo_flags_finish +}; + + +class avr_pass_casesi : public rtl_opt_pass +{ +public: + avr_pass_casesi (gcc::context *ctxt, const char *name) + : rtl_opt_pass (avr_pass_data_casesi, ctxt) + { + this->name = name; + } + + void avr_rest_of_handle_casesi (function*); + + virtual bool gate (function*) { return optimize > 0; } + + virtual unsigned int execute (function *func) + { + avr_rest_of_handle_casesi (func); + + return 0; + } +}; // avr_pass_casesi + } // anon namespace rtl_opt_pass* @@ -342,6 +378,320 @@ make_avr_pass_recompute_notes (gcc::context *ctxt) return new avr_pass_recompute_notes (ctxt, "avr-notes-free-cfg"); } +rtl_opt_pass* +make_avr_pass_casesi (gcc::context *ctxt) +{ + return new avr_pass_casesi (ctxt, "avr-casesi"); +} + + +/* Make one parallel insn with all the patterns from insns i[0]..i[5]. */ + +static rtx_insn* +avr_parallel_insn_from_insns (rtx_insn *i[6]) +{ + rtvec vec = gen_rtvec (6, PATTERN (i[0]), PATTERN (i[1]), PATTERN (i[2]), + PATTERN (i[3]), PATTERN (i[4]), PATTERN (i[5])); + start_sequence(); + emit (gen_rtx_PARALLEL (VOIDmode, vec)); + rtx_insn *insn = get_insns(); + end_sequence(); + + return insn; +} + + +/* Return true if we see an insn stream generated by casesi expander together + with an extension to SImode of the switch value. + + If this is the case, fill in the insns from casesi to INSNS[1..5] and + the SImode extension to INSNS[0]. Moreover, extract the operands of + pattern casesi__sequence forged from the sequence to recog_data. */ + +static bool +avr_is_casesi_sequence (basic_block bb, rtx_insn *insn, rtx_insn *insns[6]) +{ + rtx set_5, set_0; + + /* A first and quick test for a casesi sequences. As a side effect of + the test, harvest respective insns to INSNS[0..5]. */ + + if (!(JUMP_P (insns[5] = insn) + // casesi is the only insn that comes up with UNSPEC_INDEX_JMP, + // hence the following test ensures that we are actually dealing + // with code from casesi. + && (set_5 = single_set (insns[5])) + && UNSPEC == GET_CODE (SET_SRC (set_5)) + && UNSPEC_INDEX_JMP == XINT (SET_SRC (set_5), 1) + + && (insns[4] = prev_real_insn (insns[5])) + && (insns[3] = prev_real_insn (insns[4])) + && (insns[2] = prev_real_insn (insns[3])) + && (insns[1] = prev_real_insn (insns[2])) + + // Insn prior to casesi. + && (insns[0] = prev_real_insn (insns[1])) + && (set_0 = single_set (insns[0])) + && extend_operator (SET_SRC (set_0), SImode))) + { + return false; + } + + if (dump_file) + { + fprintf (dump_file, ";; Sequence from casesi in " + "[bb %d]:\n\n", bb->index); + for (int i = 0; i < 6; i++) + print_rtl_single (dump_file, insns[i]); + } + + /* We have to deal with quite some operands. Extracting them by hand + would be tedious, therefore wrap the insn patterns into a parallel, + run recog against it and then use insn extract to get the operands. */ + + rtx_insn *xinsn = avr_parallel_insn_from_insns (insns); + + INSN_CODE (xinsn) = recog (PATTERN (xinsn), xinsn, NULL /* num_clobbers */); + + /* Failing to recognize means that someone changed the casesi expander or + that some passes prior to this one performed some unexpected changes. + Gracefully drop such situations instead of aborting. */ + + if (INSN_CODE (xinsn) < 0) + { + if (dump_file) + fprintf (dump_file, ";; Sequence not recognized, giving up.\n\n"); + + return false; + } + + gcc_assert (CODE_FOR_casesi_qi_sequence == INSN_CODE (xinsn) + || CODE_FOR_casesi_hi_sequence == INSN_CODE (xinsn)); + + extract_insn (xinsn); + + // Assert on the anatomy of xinsn's operands we are going to work with. + + gcc_assert (11 == recog_data.n_operands); + gcc_assert (4 == recog_data.n_dups); + + if (dump_file) + { + fprintf (dump_file, ";; Operands extracted:\n"); + for (int i = 0; i < recog_data.n_operands; i++) + avr_fdump (dump_file, ";; $%d = %r\n", i, recog_data.operand[i]); + fprintf (dump_file, "\n"); + } + + return true; +} + + +/* Perform some extra checks on operands of casesi__sequence. + Not all operand dependencies can be described by means of predicates. + This function performs left over checks and should always return true. + Returning false means that someone changed the casesi expander but did + not adjust casesi__sequence. */ + +bool +avr_casei_sequence_check_operands (rtx *xop) +{ + rtx sub_5 = NULL_RTX; + + if (AVR_HAVE_EIJMP_EICALL + // The last clobber op of the tablejump. + && xop[8] == all_regs_rtx[24]) + { + // $6 is: (subreg:SI ($5) 0) + sub_5 = xop[6]; + } + + if (!AVR_HAVE_EIJMP_EICALL + // $6 is: (plus:HI (subreg:SI ($5) 0) + // (label_ref ($3))) + && PLUS == GET_CODE (xop[6]) + && LABEL_REF == GET_CODE (XEXP (xop[6], 1)) + && rtx_equal_p (xop[3], XEXP (XEXP (xop[6], 1), 0)) + // The last clobber op of the tablejump. + && xop[8] == const0_rtx) + { + sub_5 = XEXP (xop[6], 0); + } + + if (sub_5 + && SUBREG_P (sub_5) + && 0 == SUBREG_BYTE (sub_5) + && rtx_equal_p (xop[5], SUBREG_REG (sub_5))) + return true; + + if (dump_file) + fprintf (dump_file, "\n;; Failed condition for casesi__sequence\n\n"); + + return false; +} + + +/* INSNS[1..5] is a sequence as generated by casesi and INSNS[0] is an + extension of an 8-bit or 16-bit integer to SImode. XOP contains the + operands of INSNS as extracted by insn_extract from pattern + casesi__sequence: + + $0: SImode reg switch value as result of $9. + $1: Negative of smallest index in switch. + $2: Number of entries in switch. + $3: Label to table. + $4: Label if out-of-bounds. + $5: $0 + $1. + $6: 3-byte PC: subreg:HI ($5) + label_ref ($3) + 2-byte PC: subreg:HI ($5) + $7: HI reg index into table (Z or pseudo) + $8: R24 or const0_rtx (to be clobbered) + $9: Extension to SImode of an 8-bit or 16-bit integer register $10. + $10: QImode or HImode register input of $9. + + Try to optimize this sequence, i.e. use the original HImode / QImode + switch value instead of SImode. */ + +static void +avr_optimize_casesi (rtx_insn *insns[6], rtx *xop) +{ + // Original mode of the switch value; this is QImode or HImode. + machine_mode mode = GET_MODE (xop[10]); + + // How the original switch value was extended to SImode; this is + // SIGN_EXTEND or ZERO_EXTEND. + enum rtx_code code = GET_CODE (xop[9]); + + // Lower index, upper index (plus one) and range of case calues. + HOST_WIDE_INT low_idx = -INTVAL (xop[1]); + HOST_WIDE_INT num_idx = INTVAL (xop[2]); + HOST_WIDE_INT hig_idx = low_idx + num_idx; + + // Maximum ranges of (un)signed QImode resp. HImode. + int imin = QImode == mode ? INT8_MIN : INT16_MIN; + int imax = QImode == mode ? INT8_MAX : INT16_MAX; + unsigned umax = QImode == mode ? UINT8_MAX : UINT16_MAX; + + // Testing the case range and whether it fits into the range of the + // (un)signed mode. This test should actually always pass because it + // makes no sense to have case values outside the mode range. Notice + // that case labels which are unreachable because they are outside the + // mode of the switch value (e.g. "case -1" for uint8_t) have already + // been thrown away by the middle-end. + + if (SIGN_EXTEND == code + && low_idx >= imin + && hig_idx <= imax) + { + // ok + } + else if (ZERO_EXTEND == code + && low_idx >= 0 + && (unsigned) hig_idx <= umax) + { + // ok + } + else + { + if (dump_file) + fprintf (dump_file, ";; Case ranges too big, giving up.\n\n"); + return; + } + + // Do normalization of switch value $10 and out-of-bound check in its + // original mode instead of in SImode. Use a newly created pseudo. + // This will replace insns[1..2]. + + start_sequence(); + + rtx_insn *seq1, *seq2, *last1, *last2; + + rtx reg = copy_to_mode_reg (mode, xop[10]); + + rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3; + rtx (*gen_cmp)(rtx,rtx) = QImode == mode ? gen_cmpqi3 : gen_cmphi3; + + emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode))); + emit_insn (gen_cmp (reg, gen_int_mode (num_idx, mode))); + + seq1 = get_insns(); + last1 = get_last_insn(); + end_sequence(); + + emit_insn_before (seq1, insns[1]); + + // After the out-of-bounds test and corresponding branch, use a + // 16-bit index. If QImode is used, extend it to HImode first. + // This will replace insns[4]. + + start_sequence(); + + if (QImode == mode) + reg = force_reg (HImode, gen_rtx_fmt_e (code, HImode, reg)); + + rtx pat_4 = AVR_3_BYTE_PC + ? gen_movhi (xop[7], reg) + : gen_addhi3 (xop[7], reg, gen_rtx_LABEL_REF (VOIDmode, xop[3])); + + emit_insn (pat_4); + + seq2 = get_insns(); + last2 = get_last_insn(); + end_sequence(); + + emit_insn_after (seq2, insns[4]); + + if (dump_file) + { + fprintf (dump_file, ";; New insns: "); + + for (rtx_insn *insn = seq1; ; insn = NEXT_INSN (insn)) + { + fprintf (dump_file, "%d, ", INSN_UID (insn)); + if (insn == last1) + break; + } + for (rtx_insn *insn = seq2; ; insn = NEXT_INSN (insn)) + { + fprintf (dump_file, "%d%s", INSN_UID (insn), + insn == last2 ? ".\n\n" : ", "); + if (insn == last2) + break; + } + + fprintf (dump_file, ";; Deleting insns: %d, %d, %d.\n\n", + INSN_UID (insns[1]), INSN_UID (insns[2]), INSN_UID (insns[4])); + } + + // Pseudodelete the SImode and subreg of SImode insns. We don't care + // about the extension insns[0]: Its result is now unused and other + // passes will clean it up. + + SET_INSN_DELETED (insns[1]); + SET_INSN_DELETED (insns[2]); + SET_INSN_DELETED (insns[4]); +} + + +void +avr_pass_casesi::avr_rest_of_handle_casesi (function *func) +{ + basic_block bb; + + FOR_EACH_BB_FN (bb, func) + { + rtx_insn *insn, *insns[6]; + + FOR_BB_INSNS (bb, insn) + { + if (avr_is_casesi_sequence (bb, insn, insns)) + { + avr_optimize_casesi (insns, recog_data.operand); + } + } + } +} + /* Set `avr_arch' as specified by `-mmcu='. Return true on success. */ diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 446ee402d79..cfc6b8d0dc7 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -4598,9 +4598,9 @@ (set_attr "length" "4")]) -;; "*cmpqi" -;; "*cmpqq" "*cmpuqq" -(define_insn "*cmp" +;; "cmpqi3" +;; "cmpqq3" "cmpuqq3" +(define_insn "cmp3" [(set (cc0) (compare (match_operand:ALL1 0 "register_operand" "r ,r,d") (match_operand:ALL1 1 "nonmemory_operand" "Y00,r,i")))] @@ -4640,10 +4640,10 @@ [(set_attr "cc" "compare") (set_attr "length" "2")]) -;; "*cmphi" -;; "*cmphq" "*cmpuhq" -;; "*cmpha" "*cmpuha" -(define_insn "*cmp" +;; "cmphi3" +;; "cmphq3" "cmpuhq3" +;; "cmpha3" "cmpuha3" +(define_insn "cmp3" [(set (cc0) (compare (match_operand:ALL2 0 "register_operand" "!w ,r ,r,d ,r ,d,r") (match_operand:ALL2 1 "nonmemory_operand" "Y00,Y00,r,s ,s ,M,n Ynn"))) @@ -5153,6 +5153,27 @@ (set_attr "cc" "clobber")]) +;; FIXME: casesi comes up with an SImode switch value $0 which +;; is quite some overhead because most code would use HI or +;; even QI. We add an AVR specific pass .avr-casesi which +;; tries to recover from the superfluous extension to SImode. +;; +;; Using "tablejump" could be a way out, but this also does +;; not perform in a satisfying manner as the middle end will +;; already multiply the table index by 2. Note that this +;; multiplication is performed by libgcc's __tablejump2__. +;; The multiplication there, however, runs *after* the table +;; start (a byte address) has been added, not before it like +;; "tablejump" will do. +;; +;; The preferred solution would be to let the middle ends pass +;; down information on the index as an additional casesi operand. +;; +;; If this expander is changed, you'll likely have to go through +;; "casesi__sequence" (used to recog + extract casesi +;; sequences in pass .avr-casesi) and propagate all adjustments +;; also to that pattern and the code of the extra pass. + (define_expand "casesi" [(parallel [(set (match_dup 5) (plus:SI (match_operand:SI 0 "register_operand") @@ -5198,6 +5219,49 @@ }) +;; This insn is used only for easy operand extraction. +;; The elements must match an extension to SImode plus +;; a sequence generated by casesi above. + +;; "casesi_qi_sequence" +;; "casesi_hi_sequence" +(define_insn "casesi__sequence" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 9 "extend_operator" + [(match_operand:QIHI 10 "register_operand")])) + + ;; What follows is a matcher for code from casesi. + ;; We keep the same operand numbering (except for $9 and $10 + ;; which don't appear in casesi). + (parallel [(set (match_operand:SI 5 "register_operand") + (plus:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand"))) + (clobber (scratch:QI))]) + (parallel [(set (cc0) + (compare (match_dup 5) + (match_operand:SI 2 "const_int_operand"))) + (clobber (scratch:QI))]) + + (set (pc) + (if_then_else (gtu (cc0) + (const_int 0)) + (label_ref (match_operand 4)) + (pc))) + + (set (match_operand:HI 7 "register_operand") + (match_operand:HI 6)) + + (parallel [(set (pc) + (unspec:HI [(match_dup 7)] UNSPEC_INDEX_JMP)) + (use (label_ref (match_operand 3))) + (clobber (match_dup 7)) + (clobber (match_operand:QI 8))])] + "optimize + && avr_casei_sequence_check_operands (operands)" + { gcc_unreachable(); } + ) + + ;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ;; This instruction sets Z flag @@ -5550,7 +5614,7 @@ ;; This peephole avoids code like ;; -;; TST Rn ; *cmpqi +;; TST Rn ; cmpqi3 ;; BREQ .+2 ; branch ;; RJMP .Lm ;; diff --git a/gcc/config/avr/predicates.md b/gcc/config/avr/predicates.md index 9114c52ba48..6a7a1b98d25 100644 --- a/gcc/config/avr/predicates.md +++ b/gcc/config/avr/predicates.md @@ -178,6 +178,10 @@ (and (match_operand 0 "comparison_operator") (not (match_code "gt,gtu,le,leu")))) +;; True for SIGN_EXTEND, ZERO_EXTEND. +(define_predicate "extend_operator" + (match_code "sign_extend,zero_extend")) + ;; Return true if OP is a valid call operand. (define_predicate "call_insn_operand" (and (match_code "mem") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 07cc777723f..4fa3b4f9b64 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-10-25 Georg-Johann Lay + + PR target/71676 + PR target/71678 + * gcc.target/avr/pr71676-2.c: New test. + 2016-10-25 Georg-Johann Lay Pitchumani Sivanupandi diff --git a/gcc/testsuite/gcc.target/avr/pr71676-2.c b/gcc/testsuite/gcc.target/avr/pr71676-2.c new file mode 100644 index 00000000000..a3a7a08032d --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/pr71676-2.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-dp -w -Os -fno-tree-switch-conversion" } */ + +#define MK_FUN(NAME, TYP, V) \ + unsigned char __attribute__((noinline,noclone)) \ + select_## NAME (TYP x, unsigned char y) \ + { \ + switch (x) \ + { \ + case V + 0: return 0 + y; \ + case V + 1: return 1; \ + case V + 2: return 2 + y; \ + case V + 3: return 3; \ + case V + 4: return 4 + y; \ + case V + 5: return 5; \ + case V + 6: return 6 + y; \ + case V + 7: return 7; \ + case V + 8: return 8 + y; \ + case V + 9: return 9; \ + case V + 10: return 10 + y; \ + case V + 11: return 11; \ + case V + 12: return 12 + y; \ + case V + 13: return 13; \ + case V + 14: return 14 + y; \ + case V + 15: return 15; \ + } \ + return x; \ + } + +MK_FUN (0_s8, signed char, 0) +MK_FUN (0_u8, unsigned char, 0) +MK_FUN (0_s16, signed int, 0) +MK_FUN (0_u16, unsigned int, 0) + +MK_FUN (m4_s8, signed char, -4) +MK_FUN (m4_u8, unsigned char, -4) +MK_FUN (m4_s16, signed int, -4) +MK_FUN (m4_u16, unsigned int, -4) + +MK_FUN (4_s8, signed char, 4) +MK_FUN (4_u8, unsigned char, 4) +MK_FUN (4_s16, signed int, 4) +MK_FUN (4_u16, unsigned int, 4) + +/* { dg-final { scan-assembler-not "extendqisi" } } */ +/* { dg-final { scan-assembler-not "extendhisi" } } */ -- 2.30.2