#include "builtins.h"
#include "context.h"
#include "tree-pass.h"
+#include "print-rtl.h"
/* This file should be included last. */
#include "target-def.h"
}
}; // avr_pass_recompute_notes
+static const pass_data avr_pass_data_casesi =
+{
+ RTL_PASS, // type
+ "", // name (will be patched)
+ OPTGROUP_NONE, // optinfo_flags
+ TV_DF_SCAN, // tv_id
+ 0, // properties_required
+ 0, // properties_provided
+ 0, // properties_destroyed
+ 0, // todo_flags_start
+ 0 // todo_flags_finish
+};
+
+
+class avr_pass_casesi : public rtl_opt_pass
+{
+public:
+ avr_pass_casesi (gcc::context *ctxt, const char *name)
+ : rtl_opt_pass (avr_pass_data_casesi, ctxt)
+ {
+ this->name = name;
+ }
+
+ void avr_rest_of_handle_casesi (function*);
+
+ virtual bool gate (function*) { return optimize > 0; }
+
+ virtual unsigned int execute (function *func)
+ {
+ avr_rest_of_handle_casesi (func);
+
+ return 0;
+ }
+}; // avr_pass_casesi
+
} // anon namespace
rtl_opt_pass*
return new avr_pass_recompute_notes (ctxt, "avr-notes-free-cfg");
}
+rtl_opt_pass*
+make_avr_pass_casesi (gcc::context *ctxt)
+{
+ return new avr_pass_casesi (ctxt, "avr-casesi");
+}
+
+
+/* Make one parallel insn with all the patterns from insns i[0]..i[5]. */
+
+static rtx_insn*
+avr_parallel_insn_from_insns (rtx_insn *i[6])
+{
+ rtvec vec = gen_rtvec (6, PATTERN (i[0]), PATTERN (i[1]), PATTERN (i[2]),
+ PATTERN (i[3]), PATTERN (i[4]), PATTERN (i[5]));
+ start_sequence();
+ emit (gen_rtx_PARALLEL (VOIDmode, vec));
+ rtx_insn *insn = get_insns();
+ end_sequence();
+
+ return insn;
+}
+
+
+/* Return true if we see an insn stream generated by casesi expander together
+ with an extension to SImode of the switch value.
+
+ If this is the case, fill in the insns from casesi to INSNS[1..5] and
+ the SImode extension to INSNS[0]. Moreover, extract the operands of
+ pattern casesi_<mode>_sequence forged from the sequence to recog_data. */
+
+static bool
+avr_is_casesi_sequence (basic_block bb, rtx_insn *insn, rtx_insn *insns[6])
+{
+ rtx set_5, set_0;
+
+ /* A first and quick test for a casesi sequences. As a side effect of
+ the test, harvest respective insns to INSNS[0..5]. */
+
+ if (!(JUMP_P (insns[5] = insn)
+ // casesi is the only insn that comes up with UNSPEC_INDEX_JMP,
+ // hence the following test ensures that we are actually dealing
+ // with code from casesi.
+ && (set_5 = single_set (insns[5]))
+ && UNSPEC == GET_CODE (SET_SRC (set_5))
+ && UNSPEC_INDEX_JMP == XINT (SET_SRC (set_5), 1)
+
+ && (insns[4] = prev_real_insn (insns[5]))
+ && (insns[3] = prev_real_insn (insns[4]))
+ && (insns[2] = prev_real_insn (insns[3]))
+ && (insns[1] = prev_real_insn (insns[2]))
+
+ // Insn prior to casesi.
+ && (insns[0] = prev_real_insn (insns[1]))
+ && (set_0 = single_set (insns[0]))
+ && extend_operator (SET_SRC (set_0), SImode)))
+ {
+ return false;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; Sequence from casesi in "
+ "[bb %d]:\n\n", bb->index);
+ for (int i = 0; i < 6; i++)
+ print_rtl_single (dump_file, insns[i]);
+ }
+
+ /* We have to deal with quite some operands. Extracting them by hand
+ would be tedious, therefore wrap the insn patterns into a parallel,
+ run recog against it and then use insn extract to get the operands. */
+
+ rtx_insn *xinsn = avr_parallel_insn_from_insns (insns);
+
+ INSN_CODE (xinsn) = recog (PATTERN (xinsn), xinsn, NULL /* num_clobbers */);
+
+ /* Failing to recognize means that someone changed the casesi expander or
+ that some passes prior to this one performed some unexpected changes.
+ Gracefully drop such situations instead of aborting. */
+
+ if (INSN_CODE (xinsn) < 0)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; Sequence not recognized, giving up.\n\n");
+
+ return false;
+ }
+
+ gcc_assert (CODE_FOR_casesi_qi_sequence == INSN_CODE (xinsn)
+ || CODE_FOR_casesi_hi_sequence == INSN_CODE (xinsn));
+
+ extract_insn (xinsn);
+
+ // Assert on the anatomy of xinsn's operands we are going to work with.
+
+ gcc_assert (11 == recog_data.n_operands);
+ gcc_assert (4 == recog_data.n_dups);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; Operands extracted:\n");
+ for (int i = 0; i < recog_data.n_operands; i++)
+ avr_fdump (dump_file, ";; $%d = %r\n", i, recog_data.operand[i]);
+ fprintf (dump_file, "\n");
+ }
+
+ return true;
+}
+
+
+/* Perform some extra checks on operands of casesi_<mode>_sequence.
+ Not all operand dependencies can be described by means of predicates.
+ This function performs left over checks and should always return true.
+ Returning false means that someone changed the casesi expander but did
+ not adjust casesi_<mode>_sequence. */
+
+bool
+avr_casei_sequence_check_operands (rtx *xop)
+{
+ rtx sub_5 = NULL_RTX;
+
+ if (AVR_HAVE_EIJMP_EICALL
+ // The last clobber op of the tablejump.
+ && xop[8] == all_regs_rtx[24])
+ {
+ // $6 is: (subreg:SI ($5) 0)
+ sub_5 = xop[6];
+ }
+
+ if (!AVR_HAVE_EIJMP_EICALL
+ // $6 is: (plus:HI (subreg:SI ($5) 0)
+ // (label_ref ($3)))
+ && PLUS == GET_CODE (xop[6])
+ && LABEL_REF == GET_CODE (XEXP (xop[6], 1))
+ && rtx_equal_p (xop[3], XEXP (XEXP (xop[6], 1), 0))
+ // The last clobber op of the tablejump.
+ && xop[8] == const0_rtx)
+ {
+ sub_5 = XEXP (xop[6], 0);
+ }
+
+ if (sub_5
+ && SUBREG_P (sub_5)
+ && 0 == SUBREG_BYTE (sub_5)
+ && rtx_equal_p (xop[5], SUBREG_REG (sub_5)))
+ return true;
+
+ if (dump_file)
+ fprintf (dump_file, "\n;; Failed condition for casesi_<mode>_sequence\n\n");
+
+ return false;
+}
+
+
+/* INSNS[1..5] is a sequence as generated by casesi and INSNS[0] is an
+ extension of an 8-bit or 16-bit integer to SImode. XOP contains the
+ operands of INSNS as extracted by insn_extract from pattern
+ casesi_<mode>_sequence:
+
+ $0: SImode reg switch value as result of $9.
+ $1: Negative of smallest index in switch.
+ $2: Number of entries in switch.
+ $3: Label to table.
+ $4: Label if out-of-bounds.
+ $5: $0 + $1.
+ $6: 3-byte PC: subreg:HI ($5) + label_ref ($3)
+ 2-byte PC: subreg:HI ($5)
+ $7: HI reg index into table (Z or pseudo)
+ $8: R24 or const0_rtx (to be clobbered)
+ $9: Extension to SImode of an 8-bit or 16-bit integer register $10.
+ $10: QImode or HImode register input of $9.
+
+ Try to optimize this sequence, i.e. use the original HImode / QImode
+ switch value instead of SImode. */
+
+static void
+avr_optimize_casesi (rtx_insn *insns[6], rtx *xop)
+{
+ // Original mode of the switch value; this is QImode or HImode.
+ machine_mode mode = GET_MODE (xop[10]);
+
+ // How the original switch value was extended to SImode; this is
+ // SIGN_EXTEND or ZERO_EXTEND.
+ enum rtx_code code = GET_CODE (xop[9]);
+
+ // Lower index, upper index (plus one) and range of case calues.
+ HOST_WIDE_INT low_idx = -INTVAL (xop[1]);
+ HOST_WIDE_INT num_idx = INTVAL (xop[2]);
+ HOST_WIDE_INT hig_idx = low_idx + num_idx;
+
+ // Maximum ranges of (un)signed QImode resp. HImode.
+ int imin = QImode == mode ? INT8_MIN : INT16_MIN;
+ int imax = QImode == mode ? INT8_MAX : INT16_MAX;
+ unsigned umax = QImode == mode ? UINT8_MAX : UINT16_MAX;
+
+ // Testing the case range and whether it fits into the range of the
+ // (un)signed mode. This test should actually always pass because it
+ // makes no sense to have case values outside the mode range. Notice
+ // that case labels which are unreachable because they are outside the
+ // mode of the switch value (e.g. "case -1" for uint8_t) have already
+ // been thrown away by the middle-end.
+
+ if (SIGN_EXTEND == code
+ && low_idx >= imin
+ && hig_idx <= imax)
+ {
+ // ok
+ }
+ else if (ZERO_EXTEND == code
+ && low_idx >= 0
+ && (unsigned) hig_idx <= umax)
+ {
+ // ok
+ }
+ else
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; Case ranges too big, giving up.\n\n");
+ return;
+ }
+
+ // Do normalization of switch value $10 and out-of-bound check in its
+ // original mode instead of in SImode. Use a newly created pseudo.
+ // This will replace insns[1..2].
+
+ start_sequence();
+
+ rtx_insn *seq1, *seq2, *last1, *last2;
+
+ rtx reg = copy_to_mode_reg (mode, xop[10]);
+
+ rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3;
+ rtx (*gen_cmp)(rtx,rtx) = QImode == mode ? gen_cmpqi3 : gen_cmphi3;
+
+ emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode)));
+ emit_insn (gen_cmp (reg, gen_int_mode (num_idx, mode)));
+
+ seq1 = get_insns();
+ last1 = get_last_insn();
+ end_sequence();
+
+ emit_insn_before (seq1, insns[1]);
+
+ // After the out-of-bounds test and corresponding branch, use a
+ // 16-bit index. If QImode is used, extend it to HImode first.
+ // This will replace insns[4].
+
+ start_sequence();
+
+ if (QImode == mode)
+ reg = force_reg (HImode, gen_rtx_fmt_e (code, HImode, reg));
+
+ rtx pat_4 = AVR_3_BYTE_PC
+ ? gen_movhi (xop[7], reg)
+ : gen_addhi3 (xop[7], reg, gen_rtx_LABEL_REF (VOIDmode, xop[3]));
+
+ emit_insn (pat_4);
+
+ seq2 = get_insns();
+ last2 = get_last_insn();
+ end_sequence();
+
+ emit_insn_after (seq2, insns[4]);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; New insns: ");
+
+ for (rtx_insn *insn = seq1; ; insn = NEXT_INSN (insn))
+ {
+ fprintf (dump_file, "%d, ", INSN_UID (insn));
+ if (insn == last1)
+ break;
+ }
+ for (rtx_insn *insn = seq2; ; insn = NEXT_INSN (insn))
+ {
+ fprintf (dump_file, "%d%s", INSN_UID (insn),
+ insn == last2 ? ".\n\n" : ", ");
+ if (insn == last2)
+ break;
+ }
+
+ fprintf (dump_file, ";; Deleting insns: %d, %d, %d.\n\n",
+ INSN_UID (insns[1]), INSN_UID (insns[2]), INSN_UID (insns[4]));
+ }
+
+ // Pseudodelete the SImode and subreg of SImode insns. We don't care
+ // about the extension insns[0]: Its result is now unused and other
+ // passes will clean it up.
+
+ SET_INSN_DELETED (insns[1]);
+ SET_INSN_DELETED (insns[2]);
+ SET_INSN_DELETED (insns[4]);
+}
+
+
+void
+avr_pass_casesi::avr_rest_of_handle_casesi (function *func)
+{
+ basic_block bb;
+
+ FOR_EACH_BB_FN (bb, func)
+ {
+ rtx_insn *insn, *insns[6];
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (avr_is_casesi_sequence (bb, insn, insns))
+ {
+ avr_optimize_casesi (insns, recog_data.operand);
+ }
+ }
+ }
+}
+
/* Set `avr_arch' as specified by `-mmcu='.
Return true on success. */
(set_attr "length" "4")])
-;; "*cmpqi"
-;; "*cmpqq" "*cmpuqq"
-(define_insn "*cmp<mode>"
+;; "cmpqi3"
+;; "cmpqq3" "cmpuqq3"
+(define_insn "cmp<mode>3"
[(set (cc0)
(compare (match_operand:ALL1 0 "register_operand" "r ,r,d")
(match_operand:ALL1 1 "nonmemory_operand" "Y00,r,i")))]
[(set_attr "cc" "compare")
(set_attr "length" "2")])
-;; "*cmphi"
-;; "*cmphq" "*cmpuhq"
-;; "*cmpha" "*cmpuha"
-(define_insn "*cmp<mode>"
+;; "cmphi3"
+;; "cmphq3" "cmpuhq3"
+;; "cmpha3" "cmpuha3"
+(define_insn "cmp<mode>3"
[(set (cc0)
(compare (match_operand:ALL2 0 "register_operand" "!w ,r ,r,d ,r ,d,r")
(match_operand:ALL2 1 "nonmemory_operand" "Y00,Y00,r,s ,s ,M,n Ynn")))
(set_attr "cc" "clobber")])
+;; FIXME: casesi comes up with an SImode switch value $0 which
+;; is quite some overhead because most code would use HI or
+;; even QI. We add an AVR specific pass .avr-casesi which
+;; tries to recover from the superfluous extension to SImode.
+;;
+;; Using "tablejump" could be a way out, but this also does
+;; not perform in a satisfying manner as the middle end will
+;; already multiply the table index by 2. Note that this
+;; multiplication is performed by libgcc's __tablejump2__.
+;; The multiplication there, however, runs *after* the table
+;; start (a byte address) has been added, not before it like
+;; "tablejump" will do.
+;;
+;; The preferred solution would be to let the middle ends pass
+;; down information on the index as an additional casesi operand.
+;;
+;; If this expander is changed, you'll likely have to go through
+;; "casesi_<mode>_sequence" (used to recog + extract casesi
+;; sequences in pass .avr-casesi) and propagate all adjustments
+;; also to that pattern and the code of the extra pass.
+
(define_expand "casesi"
[(parallel [(set (match_dup 5)
(plus:SI (match_operand:SI 0 "register_operand")
})
+;; This insn is used only for easy operand extraction.
+;; The elements must match an extension to SImode plus
+;; a sequence generated by casesi above.
+
+;; "casesi_qi_sequence"
+;; "casesi_hi_sequence"
+(define_insn "casesi_<mode>_sequence"
+ [(set (match_operand:SI 0 "register_operand")
+ (match_operator:SI 9 "extend_operator"
+ [(match_operand:QIHI 10 "register_operand")]))
+
+ ;; What follows is a matcher for code from casesi.
+ ;; We keep the same operand numbering (except for $9 and $10
+ ;; which don't appear in casesi).
+ (parallel [(set (match_operand:SI 5 "register_operand")
+ (plus:SI (match_dup 0)
+ (match_operand:SI 1 "const_int_operand")))
+ (clobber (scratch:QI))])
+ (parallel [(set (cc0)
+ (compare (match_dup 5)
+ (match_operand:SI 2 "const_int_operand")))
+ (clobber (scratch:QI))])
+
+ (set (pc)
+ (if_then_else (gtu (cc0)
+ (const_int 0))
+ (label_ref (match_operand 4))
+ (pc)))
+
+ (set (match_operand:HI 7 "register_operand")
+ (match_operand:HI 6))
+
+ (parallel [(set (pc)
+ (unspec:HI [(match_dup 7)] UNSPEC_INDEX_JMP))
+ (use (label_ref (match_operand 3)))
+ (clobber (match_dup 7))
+ (clobber (match_operand:QI 8))])]
+ "optimize
+ && avr_casei_sequence_check_operands (operands)"
+ { gcc_unreachable(); }
+ )
+
+
;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;; This instruction sets Z flag
;; This peephole avoids code like
;;
-;; TST Rn ; *cmpqi
+;; TST Rn ; cmpqi3
;; BREQ .+2 ; branch
;; RJMP .Lm
;;
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-dp -w -Os -fno-tree-switch-conversion" } */
+
+#define MK_FUN(NAME, TYP, V) \
+ unsigned char __attribute__((noinline,noclone)) \
+ select_## NAME (TYP x, unsigned char y) \
+ { \
+ switch (x) \
+ { \
+ case V + 0: return 0 + y; \
+ case V + 1: return 1; \
+ case V + 2: return 2 + y; \
+ case V + 3: return 3; \
+ case V + 4: return 4 + y; \
+ case V + 5: return 5; \
+ case V + 6: return 6 + y; \
+ case V + 7: return 7; \
+ case V + 8: return 8 + y; \
+ case V + 9: return 9; \
+ case V + 10: return 10 + y; \
+ case V + 11: return 11; \
+ case V + 12: return 12 + y; \
+ case V + 13: return 13; \
+ case V + 14: return 14 + y; \
+ case V + 15: return 15; \
+ } \
+ return x; \
+ }
+
+MK_FUN (0_s8, signed char, 0)
+MK_FUN (0_u8, unsigned char, 0)
+MK_FUN (0_s16, signed int, 0)
+MK_FUN (0_u16, unsigned int, 0)
+
+MK_FUN (m4_s8, signed char, -4)
+MK_FUN (m4_u8, unsigned char, -4)
+MK_FUN (m4_s16, signed int, -4)
+MK_FUN (m4_u16, unsigned int, -4)
+
+MK_FUN (4_s8, signed char, 4)
+MK_FUN (4_u8, unsigned char, 4)
+MK_FUN (4_s16, signed int, 4)
+MK_FUN (4_u16, unsigned int, 4)
+
+/* { dg-final { scan-assembler-not "extendqisi" } } */
+/* { dg-final { scan-assembler-not "extendhisi" } } */