From 296799ba06ab9ff273038e9b4dd93ca9ba65bf1c Mon Sep 17 00:00:00 2001 From: Georg-Johann Lay Date: Thu, 28 Jul 2011 08:03:07 +0000 Subject: [PATCH] re PR target/49687 ([avr] Missed optimization for widening MUL) PR target/49687 * config/avr/t-avr (LIB1ASMFUNCS): Remove _xmulhisi3_exit. Add _muluhisi3, _mulshisi3, _usmulhisi3. * config/avr/libgcc.S (__mulsi3): Rewrite. (__mulhisi3): Rewrite. (__umulhisi3): Rewrite. (__usmulhisi3): New. (__muluhisi3): New. (__mulshisi3): New. (__mulohisi3): New. (__mulqi3, __mulqihi3, __umulqihi3, __mulhi3): Use DEFUN/ENDF to declare. * config/avr/predicates.md (pseudo_register_operand): Rewrite. (pseudo_register_or_const_int_operand): New. (combine_pseudo_register_operand): New. (u16_operand): New. (s16_operand): New. (o16_operand): New. * config/avr/avr.c (avr_rtx_costs): Handle costs for mult:SI. * config/avr/avr.md (QIHI, QIHI2): New mode iterators. (any_extend, any_extend2): New code iterators. (extend_prefix): New code attribute. (mulsi3): Rewrite. Turn insn to expander. (mulhisi3): Ditto. (umulhisi3): Ditto. (usmulhisi3): New expander. (*mulsi3): New insn-and-split. (mulusi3): New insn-and-split. (mulssi3): New insn-and-split. (mulohisi3): New insn-and-split. (*uumulqihisi3, *uumulhiqisi3, *uumulhihisi3, *uumulqiqisi3, *usmulqihisi3, *usmulhiqisi3, *usmulhihisi3, *usmulqiqisi3, *sumulqihisi3, *sumulhiqisi3, *sumulhihisi3, *sumulqiqisi3, *ssmulqihisi3, *ssmulhiqisi3, *ssmulhihisi3, *ssmulqiqisi3): New insn-and-split. (*mulsi3_call): Rewrite. (*mulhisi3_call): Rewrite. (*umulhisi3_call): Rewrite. (*usmulhisi3_call): New insn. (*muluhisi3_call): New insn. (*mulshisi3_call): New insn. (*mulohisi3_call): New insn. (extendqihi2): Use combine_pseudo_register_operand as predicate for operand 1. (extendqisi2): Ditto. (zero_extendqihi2): Ditto. (zero_extendqisi2): Ditto. (zero_extendhisi2): Ditto. (extendhisi2): Ditto. Don't early-clobber operand 0. From-SVN: r176862 --- gcc/ChangeLog | 52 +++++ gcc/config/avr/avr.c | 28 +++ gcc/config/avr/avr.md | 349 ++++++++++++++++++++++++++---- gcc/config/avr/libgcc.S | 403 ++++++++++++++++++----------------- gcc/config/avr/predicates.md | 43 +++- gcc/config/avr/t-avr | 4 +- 6 files changed, 637 insertions(+), 242 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2245872ca9c..3ac229fdc8a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,55 @@ +2011-07-28 Georg-Johann Lay + + PR target/49687 + * config/avr/t-avr (LIB1ASMFUNCS): Remove _xmulhisi3_exit. + Add _muluhisi3, _mulshisi3, _usmulhisi3. + * config/avr/libgcc.S (__mulsi3): Rewrite. + (__mulhisi3): Rewrite. + (__umulhisi3): Rewrite. + (__usmulhisi3): New. + (__muluhisi3): New. + (__mulshisi3): New. + (__mulohisi3): New. + (__mulqi3, __mulqihi3, __umulqihi3, __mulhi3): Use DEFUN/ENDF to + declare. + * config/avr/predicates.md (pseudo_register_operand): Rewrite. + (pseudo_register_or_const_int_operand): New. + (combine_pseudo_register_operand): New. + (u16_operand): New. + (s16_operand): New. + (o16_operand): New. + * config/avr/avr.c (avr_rtx_costs): Handle costs for mult:SI. + * config/avr/avr.md (QIHI, QIHI2): New mode iterators. + (any_extend, any_extend2): New code iterators. + (extend_prefix): New code attribute. + (mulsi3): Rewrite. Turn insn to expander. + (mulhisi3): Ditto. + (umulhisi3): Ditto. + (usmulhisi3): New expander. + (*mulsi3): New insn-and-split. + (mulusi3): New insn-and-split. + (mulssi3): New insn-and-split. + (mulohisi3): New insn-and-split. + (*uumulqihisi3, *uumulhiqisi3, *uumulhihisi3, *uumulqiqisi3, + *usmulqihisi3, *usmulhiqisi3, *usmulhihisi3, *usmulqiqisi3, + *sumulqihisi3, *sumulhiqisi3, *sumulhihisi3, *sumulqiqisi3, + *ssmulqihisi3, *ssmulhiqisi3, *ssmulhihisi3, *ssmulqiqisi3): New + insn-and-split. + (*mulsi3_call): Rewrite. + (*mulhisi3_call): Rewrite. + (*umulhisi3_call): Rewrite. + (*usmulhisi3_call): New insn. + (*muluhisi3_call): New insn. + (*mulshisi3_call): New insn. + (*mulohisi3_call): New insn. + (extendqihi2): Use combine_pseudo_register_operand as predicate + for operand 1. + (extendqisi2): Ditto. + (zero_extendqihi2): Ditto. + (zero_extendqisi2): Ditto. + (zero_extendhisi2): Ditto. + (extendhisi2): Ditto. Don't early-clobber operand 0. + 2011-07-28 Uros Bizjak * config/i386/i386.c (add->lea splitter): Add SWI mode to PLUS RTX. diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index eb1707e922b..d9ed2248947 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -5515,6 +5515,34 @@ avr_rtx_costs (rtx x, int codearg, int outer_code ATTRIBUTE_UNUSED, int *total, return false; break; + case SImode: + if (AVR_HAVE_MUL) + { + if (!speed) + { + /* Add some additional costs besides CALL like moves etc. */ + + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4); + } + else + { + /* Just a rough estimate. Even with -O2 we don't want bulky + code expanded inline. */ + + *total = COSTS_N_INSNS (25); + } + } + else + { + if (speed) + *total = COSTS_N_INSNS (300); + else + /* Add some additional costs besides CALL like moves etc. */ + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4); + } + + return true; + default: return false; } diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 66c3db279c5..3f3bb6a2b5b 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -127,12 +127,25 @@ (const_int 2))] (const_int 2))) -;; Define mode iterator +;; Define mode iterators +(define_mode_iterator QIHI [(QI "") (HI "")]) +(define_mode_iterator QIHI2 [(QI "") (HI "")]) (define_mode_iterator QISI [(QI "") (HI "") (SI "")]) (define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")]) (define_mode_iterator HIDI [(HI "") (SI "") (DI "")]) (define_mode_iterator HISI [(HI "") (SI "")]) +;; Define code iterators +;; Define two incarnations so that we can build the cross product. +(define_code_iterator any_extend [sign_extend zero_extend]) +(define_code_iterator any_extend2 [sign_extend zero_extend]) + +;; Define code attributes +(define_code_attr extend_prefix + [(sign_extend "s") + (zero_extend "u")]) + + ;;======================================================================== ;; The following is used by nonlocal_goto and setjmp. ;; The receiver pattern will create no instructions since internally @@ -1350,69 +1363,310 @@ ;; Operand 2 (reg:SI 18) not clobbered on the enhanced core. ;; All call-used registers clobbered otherwise - normal library call. +;; To support widening multiplicatioon with constant we postpone +;; expanding to the implicit library call until post combine and +;; prior to register allocation. Clobber all hard registers that +;; might be used by the (widening) multiply until it is split and +;; it's final register footprint is worked out. + (define_expand "mulsi3" - [(set (reg:SI 22) (match_operand:SI 1 "register_operand" "")) - (set (reg:SI 18) (match_operand:SI 2 "register_operand" "")) - (parallel [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18))) - (clobber (reg:HI 26)) - (clobber (reg:HI 30))]) - (set (match_operand:SI 0 "register_operand" "") (reg:SI 22))] + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:DI 18))])] "AVR_HAVE_MUL" - "") + { + if (u16_operand (operands[2], SImode)) + { + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1])); + DONE; + } -(define_insn "*mulsi3_call" - [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18))) - (clobber (reg:HI 26)) - (clobber (reg:HI 30))] - "AVR_HAVE_MUL" - "%~call __mulsi3" - [(set_attr "type" "xcall") - (set_attr "cc" "clobber")]) + if (o16_operand (operands[2], SImode)) + { + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1])); + DONE; + } + }) -(define_expand "mulhisi3" - [(set (reg:HI 18) - (match_operand:HI 1 "register_operand" "")) - (set (reg:HI 20) - (match_operand:HI 2 "register_operand" "")) +(define_insn_and_split "*mulsi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (match_operand:SI 1 "pseudo_register_operand" "r") + (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:SI 18) + (match_dup 1)) (set (reg:SI 22) - (mult:SI (sign_extend:SI (reg:HI 18)) - (sign_extend:SI (reg:HI 20)))) - (set (match_operand:SI 0 "register_operand" "") + (match_dup 2)) + (parallel [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:HI 26))]) + (set (match_dup 0) + (reg:SI 22))] + { + if (u16_operand (operands[2], SImode)) + { + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1])); + DONE; + } + + if (o16_operand (operands[2], SImode)) + { + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1])); + DONE; + } + }) + +;; "muluqisi3" +;; "muluhisi3" +(define_insn_and_split "mulusi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:HI 26) + (match_dup 1)) + (set (reg:SI 18) + (match_dup 2)) + (set (reg:SI 22) + (mult:SI (zero_extend:SI (reg:HI 26)) + (reg:SI 18))) + (set (match_dup 0) + (reg:SI 22))] + { + /* Do the QI -> HI extension explicitely before the multiplication. */ + /* Do the HI -> SI extension implicitely and after the multiplication. */ + + if (QImode == mode) + operands[1] = gen_rtx_ZERO_EXTEND (HImode, operands[1]); + + if (u16_operand (operands[2], SImode)) + { + operands[1] = force_reg (HImode, operands[1]); + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_umulhisi3 (operands[0], operands[1], operands[2])); + DONE; + } + }) + +;; "mulsqisi3" +;; "mulshisi3" +(define_insn_and_split "mulssi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:HI 26) + (match_dup 1)) + (set (reg:SI 18) + (match_dup 2)) + (set (reg:SI 22) + (mult:SI (sign_extend:SI (reg:HI 26)) + (reg:SI 18))) + (set (match_dup 0) (reg:SI 22))] + { + /* Do the QI -> HI extension explicitely before the multiplication. */ + /* Do the HI -> SI extension implicitely and after the multiplication. */ + + if (QImode == mode) + operands[1] = gen_rtx_SIGN_EXTEND (HImode, operands[1]); + + if (u16_operand (operands[2], SImode) + || s16_operand (operands[2], SImode)) + { + rtx xop2 = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + + operands[1] = force_reg (HImode, operands[1]); + + if (u16_operand (operands[2], SImode)) + emit_insn (gen_usmulhisi3 (operands[0], xop2, operands[1])); + else + emit_insn (gen_mulhisi3 (operands[0], operands[1], xop2)); + + DONE; + } + }) + +;; One-extend operand 1 + +(define_insn_and_split "mulohisi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (not:SI (zero_extend:SI + (not:HI (match_operand:HI 1 "pseudo_register_operand" "r")))) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:HI 26) + (match_dup 1)) + (set (reg:SI 18) + (match_dup 2)) + (set (reg:SI 22) + (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26)))) + (reg:SI 18))) + (set (match_dup 0) + (reg:SI 22))] + "") + +(define_expand "mulhisi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "")) + (sign_extend:SI (match_operand:HI 2 "register_operand" "")))) + (clobber (reg:DI 18))])] "AVR_HAVE_MUL" "") (define_expand "umulhisi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "")) + (zero_extend:SI (match_operand:HI 2 "register_operand" "")))) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + "") + +(define_expand "usmulhisi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "")) + (sign_extend:SI (match_operand:HI 2 "register_operand" "")))) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + "") + +;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3" +;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3" +;; "*sumulqihisi3" "*sumulhiqisi3" "*sumulhihisi3" "*sumulqiqisi3" +;; "*ssmulqihisi3" "*ssmulhiqisi3" "*ssmulhihisi3" "*ssmulqiqisi3" +(define_insn_and_split + "*mulsi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (any_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) + (any_extend2:SI (match_operand:QIHI2 2 "pseudo_register_operand" "r")))) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" [(set (reg:HI 18) - (match_operand:HI 1 "register_operand" "")) - (set (reg:HI 20) - (match_operand:HI 2 "register_operand" "")) - (set (reg:SI 22) - (mult:SI (zero_extend:SI (reg:HI 18)) - (zero_extend:SI (reg:HI 20)))) - (set (match_operand:SI 0 "register_operand" "") + (match_dup 1)) + (set (reg:HI 26) + (match_dup 2)) + (set (reg:SI 22) + (mult:SI (match_dup 3) + (match_dup 4))) + (set (match_dup 0) (reg:SI 22))] + { + rtx xop1 = operands[1]; + rtx xop2 = operands[2]; + + /* Do the QI -> HI extension explicitely before the multiplication. */ + /* Do the HI -> SI extension implicitely and after the multiplication. */ + + if (QImode == mode) + xop1 = gen_rtx_fmt_e (, HImode, xop1); + + if (QImode == mode) + xop2 = gen_rtx_fmt_e (, HImode, xop2); + + if ( == + || == ZERO_EXTEND) + { + operands[1] = xop1; + operands[2] = xop2; + operands[3] = gen_rtx_fmt_e (, SImode, gen_rtx_REG (HImode, 18)); + operands[4] = gen_rtx_fmt_e (, SImode, gen_rtx_REG (HImode, 26)); + } + else + { + /* = SIGN_EXTEND */ + /* = ZERO_EXTEND */ + + operands[1] = xop2; + operands[2] = xop1; + operands[3] = gen_rtx_ZERO_EXTEND (SImode, gen_rtx_REG (HImode, 18)); + operands[4] = gen_rtx_SIGN_EXTEND (SImode, gen_rtx_REG (HImode, 26)); + } + }) + +(define_insn "*mulsi3_call" + [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:HI 26))] "AVR_HAVE_MUL" - "") + "%~call __mulsi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) (define_insn "*mulhisi3_call" - [(set (reg:SI 22) + [(set (reg:SI 22) (mult:SI (sign_extend:SI (reg:HI 18)) - (sign_extend:SI (reg:HI 20))))] + (sign_extend:SI (reg:HI 26))))] "AVR_HAVE_MUL" "%~call __mulhisi3" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) (define_insn "*umulhisi3_call" - [(set (reg:SI 22) + [(set (reg:SI 22) (mult:SI (zero_extend:SI (reg:HI 18)) - (zero_extend:SI (reg:HI 20))))] + (zero_extend:SI (reg:HI 26))))] "AVR_HAVE_MUL" "%~call __umulhisi3" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) +(define_insn "*usmulhisi3_call" + [(set (reg:SI 22) + (mult:SI (zero_extend:SI (reg:HI 18)) + (sign_extend:SI (reg:HI 26))))] + "AVR_HAVE_MUL" + "%~call __usmulhisi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*muluhisi3_call" + [(set (reg:SI 22) + (mult:SI (zero_extend:SI (reg:HI 26)) + (reg:SI 18)))] + "AVR_HAVE_MUL" + "%~call __muluhisi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*mulshisi3_call" + [(set (reg:SI 22) + (mult:SI (sign_extend:SI (reg:HI 26)) + (reg:SI 18)))] + "AVR_HAVE_MUL" + "%~call __mulshisi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*mulohisi3_call" + [(set (reg:SI 22) + (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26)))) + (reg:SI 18)))] + "AVR_HAVE_MUL" + "%~call __mulohisi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + ; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % ; divmod @@ -2400,9 +2654,16 @@ ;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x ;; sign extend +;; We keep combiner from inserting hard registers into the input of sign- and +;; zero-extends. A hard register in the input operand is not wanted because +;; 32-bit multiply patterns clobber some hard registers and extends with a +;; hard register that overlaps these clobbers won't be combined to a widening +;; multiplication. There is no need for combine to propagate hard registers, +;; register allocation can do it just as well. + (define_insn "extendqihi2" [(set (match_operand:HI 0 "register_operand" "=r,r") - (sign_extend:HI (match_operand:QI 1 "register_operand" "0,*r")))] + (sign_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))] "" "@ clr %B0\;sbrc %0,7\;com %B0 @@ -2412,7 +2673,7 @@ (define_insn "extendqisi2" [(set (match_operand:SI 0 "register_operand" "=r,r") - (sign_extend:SI (match_operand:QI 1 "register_operand" "0,*r")))] + (sign_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))] "" "@ clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0 @@ -2421,8 +2682,8 @@ (set_attr "cc" "set_n,set_n")]) (define_insn "extendhisi2" - [(set (match_operand:SI 0 "register_operand" "=r,&r") - (sign_extend:SI (match_operand:HI 1 "register_operand" "0,*r")))] + [(set (match_operand:SI 0 "register_operand" "=r,r") + (sign_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r")))] "" "@ clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0 @@ -2439,7 +2700,7 @@ (define_insn_and_split "zero_extendqihi2" [(set (match_operand:HI 0 "register_operand" "=r") - (zero_extend:HI (match_operand:QI 1 "register_operand" "r")))] + (zero_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))] "" "#" "reload_completed" @@ -2455,7 +2716,7 @@ (define_insn_and_split "zero_extendqisi2" [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (match_operand:QI 1 "register_operand" "r")))] + (zero_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))] "" "#" "reload_completed" @@ -2470,8 +2731,8 @@ }) (define_insn_and_split "zero_extendhisi2" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (match_operand:HI 1 "register_operand" "r")))] + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))] "" "#" "reload_completed" diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S index 7f3feeb23a4..f6084a70518 100644 --- a/gcc/config/avr/libgcc.S +++ b/gcc/config/avr/libgcc.S @@ -72,10 +72,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see .endm +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ #if !defined (__AVR_HAVE_MUL__) /******************************************************* - Multiplication 8 x 8 + Multiplication 8 x 8 without MUL *******************************************************/ #if defined (L_mulqi3) @@ -83,9 +84,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define r_arg1 r24 /* multiplier */ #define r_res __tmp_reg__ /* result */ - .global __mulqi3 - .func __mulqi3 -__mulqi3: +DEFUN __mulqi3 clr r_res ; clear result __mulqi3_loop: sbrc r_arg1,0 @@ -97,18 +96,16 @@ __mulqi3_loop: __mulqi3_exit: mov r_arg1,r_res ; result to return register ret +ENDF __mulqi3 #undef r_arg2 #undef r_arg1 #undef r_res -.endfunc #endif /* defined (L_mulqi3) */ #if defined (L_mulqihi3) - .global __mulqihi3 - .func __mulqihi3 -__mulqihi3: +DEFUN __mulqihi3 clr r25 sbrc r24, 7 dec r25 @@ -116,21 +113,19 @@ __mulqihi3: sbrc r22, 7 dec r22 rjmp __mulhi3 - .endfunc +ENDF __mulqihi3: #endif /* defined (L_mulqihi3) */ #if defined (L_umulqihi3) - .global __umulqihi3 - .func __umulqihi3 -__umulqihi3: +DEFUN __umulqihi3 clr r25 clr r23 rjmp __mulhi3 - .endfunc +ENDF __umulqihi3 #endif /* defined (L_umulqihi3) */ /******************************************************* - Multiplication 16 x 16 + Multiplication 16 x 16 without MUL *******************************************************/ #if defined (L_mulhi3) #define r_arg1L r24 /* multiplier Low */ @@ -140,9 +135,7 @@ __umulqihi3: #define r_resL __tmp_reg__ /* result Low */ #define r_resH r21 /* result High */ - .global __mulhi3 - .func __mulhi3 -__mulhi3: +DEFUN __mulhi3 clr r_resH ; clear result clr r_resL ; clear result __mulhi3_loop: @@ -166,6 +159,7 @@ __mulhi3_exit: mov r_arg1H,r_resH ; result to return register mov r_arg1L,r_resL ret +ENDF __mulhi3 #undef r_arg1L #undef r_arg1H @@ -174,168 +168,51 @@ __mulhi3_exit: #undef r_resL #undef r_resH -.endfunc #endif /* defined (L_mulhi3) */ -#endif /* !defined (__AVR_HAVE_MUL__) */ /******************************************************* - Widening Multiplication 32 = 16 x 16 + Widening Multiplication 32 = 16 x 16 without MUL *******************************************************/ - + #if defined (L_mulhisi3) DEFUN __mulhisi3 -#if defined (__AVR_HAVE_MUL__) - -;; r25:r22 = r19:r18 * r21:r20 - -#define A0 18 -#define B0 20 -#define C0 22 - -#define A1 A0+1 -#define B1 B0+1 -#define C1 C0+1 -#define C2 C0+2 -#define C3 C0+3 - - ; C = (signed)A1 * (signed)B1 - muls A1, B1 - movw C2, R0 - - ; C += A0 * B0 - mul A0, B0 - movw C0, R0 - - ; C += (signed)A1 * B0 - mulsu A1, B0 - sbci C3, 0 - add C1, R0 - adc C2, R1 - clr __zero_reg__ - adc C3, __zero_reg__ - - ; C += (signed)B1 * A0 - mulsu B1, A0 - sbci C3, 0 - XJMP __xmulhisi3_exit - -#undef A0 -#undef A1 -#undef B0 -#undef B1 -#undef C0 -#undef C1 -#undef C2 -#undef C3 - -#else /* !__AVR_HAVE_MUL__ */ ;;; FIXME: This is dead code (noone calls it) - mov_l r18, r24 - mov_h r19, r25 - clr r24 - sbrc r23, 7 - dec r24 - mov r25, r24 - clr r20 - sbrc r19, 7 - dec r20 - mov r21, r20 - XJMP __mulsi3 -#endif /* __AVR_HAVE_MUL__ */ + mov_l r18, r24 + mov_h r19, r25 + clr r24 + sbrc r23, 7 + dec r24 + mov r25, r24 + clr r20 + sbrc r19, 7 + dec r20 + mov r21, r20 + XJMP __mulsi3 ENDF __mulhisi3 #endif /* defined (L_mulhisi3) */ #if defined (L_umulhisi3) DEFUN __umulhisi3 -#if defined (__AVR_HAVE_MUL__) - -;; r25:r22 = r19:r18 * r21:r20 - -#define A0 18 -#define B0 20 -#define C0 22 - -#define A1 A0+1 -#define B1 B0+1 -#define C1 C0+1 -#define C2 C0+2 -#define C3 C0+3 - - ; C = A1 * B1 - mul A1, B1 - movw C2, R0 - - ; C += A0 * B0 - mul A0, B0 - movw C0, R0 - - ; C += A1 * B0 - mul A1, B0 - add C1, R0 - adc C2, R1 - clr __zero_reg__ - adc C3, __zero_reg__ - - ; C += B1 * A0 - mul B1, A0 - XJMP __xmulhisi3_exit - -#undef A0 -#undef A1 -#undef B0 -#undef B1 -#undef C0 -#undef C1 -#undef C2 -#undef C3 - -#else /* !__AVR_HAVE_MUL__ */ ;;; FIXME: This is dead code (noone calls it) - mov_l r18, r24 - mov_h r19, r25 - clr r24 - clr r25 - clr r20 - clr r21 - XJMP __mulsi3 -#endif /* __AVR_HAVE_MUL__ */ + mov_l r18, r24 + mov_h r19, r25 + clr r24 + clr r25 + mov_l r20, r24 + mov_h r21, r25 + XJMP __mulsi3 ENDF __umulhisi3 #endif /* defined (L_umulhisi3) */ -#if defined (L_xmulhisi3_exit) - -;;; Helper for __mulhisi3 resp. __umulhisi3. - -#define C0 22 -#define C1 C0+1 -#define C2 C0+2 -#define C3 C0+3 - -DEFUN __xmulhisi3_exit - add C1, R0 - adc C2, R1 - clr __zero_reg__ - adc C3, __zero_reg__ - ret -ENDF __xmulhisi3_exit - -#undef C0 -#undef C1 -#undef C2 -#undef C3 - -#endif /* defined (L_xmulhisi3_exit) */ - #if defined (L_mulsi3) /******************************************************* - Multiplication 32 x 32 + Multiplication 32 x 32 without MUL *******************************************************/ #define r_arg1L r22 /* multiplier Low */ #define r_arg1H r23 #define r_arg1HL r24 #define r_arg1HH r25 /* multiplier High */ - #define r_arg2L r18 /* multiplicand Low */ #define r_arg2H r19 #define r_arg2HL r20 @@ -346,43 +223,7 @@ ENDF __xmulhisi3_exit #define r_resHL r30 #define r_resHH r31 /* result High */ - - .global __mulsi3 - .func __mulsi3 -__mulsi3: -#if defined (__AVR_HAVE_MUL__) - mul r_arg1L, r_arg2L - movw r_resL, r0 - mul r_arg1H, r_arg2H - movw r_resHL, r0 - mul r_arg1HL, r_arg2L - add r_resHL, r0 - adc r_resHH, r1 - mul r_arg1L, r_arg2HL - add r_resHL, r0 - adc r_resHH, r1 - mul r_arg1HH, r_arg2L - add r_resHH, r0 - mul r_arg1HL, r_arg2H - add r_resHH, r0 - mul r_arg1H, r_arg2HL - add r_resHH, r0 - mul r_arg1L, r_arg2HH - add r_resHH, r0 - clr r_arg1HH ; use instead of __zero_reg__ to add carry - mul r_arg1H, r_arg2L - add r_resH, r0 - adc r_resHL, r1 - adc r_resHH, r_arg1HH ; add carry - mul r_arg1L, r_arg2H - add r_resH, r0 - adc r_resHL, r1 - adc r_resHH, r_arg1HH ; add carry - movw r_arg1L, r_resL - movw r_arg1HL, r_resHL - clr r1 ; __zero_reg__ clobbered by "mul" - ret -#else +DEFUN __mulsi3 clr r_resHH ; clear result clr r_resHL ; clear result clr r_resH ; clear result @@ -414,13 +255,13 @@ __mulsi3_exit: mov_h r_arg1H,r_resH mov_l r_arg1L,r_resL ret -#endif /* defined (__AVR_HAVE_MUL__) */ +ENDF __mulsi3 + #undef r_arg1L #undef r_arg1H #undef r_arg1HL #undef r_arg1HH - #undef r_arg2L #undef r_arg2H #undef r_arg2HL @@ -431,9 +272,181 @@ __mulsi3_exit: #undef r_resHL #undef r_resHH -.endfunc #endif /* defined (L_mulsi3) */ + +#endif /* !defined (__AVR_HAVE_MUL__) */ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +#if defined (__AVR_HAVE_MUL__) +#define A0 26 +#define B0 18 +#define C0 22 + +#define A1 A0+1 + +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 + +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 + +/******************************************************* + Widening Multiplication 32 = 16 x 16 +*******************************************************/ + +#if defined (L_mulhisi3) +;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18 +;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __mulhisi3 + XCALL __umulhisi3 + ;; Sign-extend B + tst B1 + brpl 1f + sub C2, A0 + sbc C3, A1 +1: ;; Sign-extend A + XJMP __usmulhisi3_tail +ENDF __mulhisi3 +#endif /* L_mulhisi3 */ + +#if defined (L_usmulhisi3) +;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18 +;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __usmulhisi3 + XCALL __umulhisi3 + ;; FALLTHRU +ENDF __usmulhisi3 + +DEFUN __usmulhisi3_tail + ;; Sign-extend A + sbrs A1, 7 + ret + sub C2, B0 + sbc C3, B1 + ret +ENDF __usmulhisi3_tail +#endif /* L_usmulhisi3 */ + +#if defined (L_umulhisi3) +;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18 +;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __umulhisi3 + mul A0, B0 + movw C0, r0 + mul A1, B1 + movw C2, r0 + mul A0, B1 + rcall 1f + mul A1, B0 +1: add C1, r0 + adc C2, r1 + clr __zero_reg__ + adc C3, __zero_reg__ + ret +ENDF __umulhisi3 +#endif /* L_umulhisi3 */ + +/******************************************************* + Widening Multiplication 32 = 16 x 32 +*******************************************************/ + +#if defined (L_mulshisi3) +;;; R25:R22 = (signed long) R27:R26 * R21:R18 +;;; (C3:C0) = (signed long) A1:A0 * B3:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __mulshisi3 +#ifdef __AVR_HAVE_JMP_CALL__ + ;; Some cores have problem skipping 2-word instruction + tst A1 + brmi __mulohisi3 +#else + sbrs A1, 7 +#endif /* __AVR_HAVE_JMP_CALL__ */ + XJMP __muluhisi3 + ;; FALLTHRU +ENDF __mulshisi3 + +;;; R25:R22 = (one-extended long) R27:R26 * R21:R18 +;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __mulohisi3 + XCALL __muluhisi3 + ;; One-extend R27:R26 (A1:A0) + sub C2, B0 + sbc C3, B1 + ret +ENDF __mulohisi3 +#endif /* L_mulshisi3 */ + +#if defined (L_muluhisi3) +;;; R25:R22 = (unsigned long) R27:R26 * R21:R18 +;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __muluhisi3 + XCALL __umulhisi3 + mul A0, B3 + add C3, r0 + mul A1, B2 + add C3, r0 + mul A0, B2 + add C2, r0 + adc C3, r1 + clr __zero_reg__ + ret +ENDF __muluhisi3 +#endif /* L_muluhisi3 */ + +/******************************************************* + Multiplication 32 x 32 +*******************************************************/ + +#if defined (L_mulsi3) +;;; R25:R22 = R25:R22 * R21:R18 +;;; (C3:C0) = C3:C0 * B3:B0 +;;; Clobbers: R26, R27, __tmp_reg__ +DEFUN __mulsi3 + movw A0, C0 + push C2 + push C3 + XCALL __muluhisi3 + pop A1 + pop A0 + ;; A1:A0 now contains the high word of A + mul A0, B0 + add C2, r0 + adc C3, r1 + mul A0, B1 + add C3, r0 + mul A1, B0 + add C3, r0 + clr __zero_reg__ + ret +ENDF __mulsi3 +#endif /* L_mulsi3 */ + +#undef A0 +#undef A1 + +#undef B0 +#undef B1 +#undef B2 +#undef B3 + +#undef C0 +#undef C1 +#undef C2 +#undef C3 + +#endif /* __AVR_HAVE_MUL__ */ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + /******************************************************* Division 8 / 8 => (result + remainder) *******************************************************/ diff --git a/gcc/config/avr/predicates.md b/gcc/config/avr/predicates.md index 6646cb54610..98262d54f55 100755 --- a/gcc/config/avr/predicates.md +++ b/gcc/config/avr/predicates.md @@ -155,10 +155,34 @@ (ior (match_test "register_operand (XEXP (op, 0), mode)") (match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))")))) +;; For some insns we must ensure that no hard register is inserted +;; into their operands because the insns are split and the split +;; involves hard registers. An example are divmod insn that are +;; split to insns that represent implicit library calls. + ;; True for register that is pseudo register. (define_predicate "pseudo_register_operand" - (and (match_code "reg") - (match_test "!HARD_REGISTER_P (op)"))) + (and (match_operand 0 "register_operand") + (not (and (match_code "reg") + (match_test "HARD_REGISTER_P (op)"))))) + +;; True for operand that is pseudo register or CONST_INT. +(define_predicate "pseudo_register_or_const_int_operand" + (ior (match_operand 0 "const_int_operand") + (match_operand 0 "pseudo_register_operand"))) + +;; We keep combiner from inserting hard registers into the input of sign- and +;; zero-extends. A hard register in the input operand is not wanted because +;; 32-bit multiply patterns clobber some hard registers and extends with a +;; hard register that overlaps these clobbers won't combine to a widening +;; multiplication. There is no need for combine to propagate or insert +;; hard registers, register allocation can do it just as well. + +;; True for operand that is pseudo register at combine time. +(define_predicate "combine_pseudo_register_operand" + (ior (match_operand 0 "pseudo_register_operand") + (and (match_operand 0 "register_operand") + (match_test "reload_completed || reload_in_progress")))) ;; Return true if OP is a constant integer that is either ;; 8 or 16 or 24. @@ -189,3 +213,18 @@ (define_predicate "register_or_s9_operand" (ior (match_operand 0 "register_operand") (match_operand 0 "s9_operand"))) + +;; Unsigned CONST_INT that fits in 16 bits, i.e. 0..65536. +(define_predicate "u16_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, (1<<16)-1)"))) + +;; Signed CONST_INT that fits in 16 bits, i.e. -32768..32767. +(define_predicate "s16_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -(1<<15), (1<<15)-1)"))) + +;; One-extended CONST_INT that fits in 16 bits, i.e. -65536..-1. +(define_predicate "o16_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -(1<<16), -1)"))) diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr index a5357f0ecf6..d79dd5a47b6 100644 --- a/gcc/config/avr/t-avr +++ b/gcc/config/avr/t-avr @@ -41,7 +41,9 @@ LIB1ASMFUNCS = \ _mulhi3 \ _mulhisi3 \ _umulhisi3 \ - _xmulhisi3_exit \ + _usmulhisi3 \ + _muluhisi3 \ + _mulshisi3 \ _mulsi3 \ _udivmodqi4 \ _divmodqi4 \ -- 2.30.2