From 6130646b8811ec5af4d58bd08fee7ebd8ffbc973 Mon Sep 17 00:00:00 2001 From: Georg-Johann Lay Date: Fri, 29 Jul 2011 09:33:54 +0000 Subject: [PATCH] re PR target/49313 (Inefficient libgcc implementations for avr) PR target/49313 * config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction. (__ctzsi2): Result for 0 may be undefined. (__ctzhi2): Result for 0 may be undefined. (__popcounthi2): Don't clobber r30. Use __popcounthi2_tail. (__popcountsi2): Ditto. And don't clobber r26. (__popcountdi2): Ditto. And don't clobber r27. * config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum. (parityhi2): New expand. (paritysi2): New expand. (popcounthi2): New expand. (popcountsi2): New expand. (clzhi2): New expand. (clzsi2): New expand. (ctzhi2): New expand. (ctzsi2): New expand. (ffshi2): New expand. (ffssi2): New expand. (copysignsf3): New insn. (bswapsi2): New expand. (*parityhi2.libgcc): New insn. (*parityqihi2.libgcc): New insn. (*paritysihi2.libgcc): New insn. (*popcounthi2.libgcc): New insn. (*popcountsi2.libgcc): New insn. (*popcountqi2.libgcc): New insn. (*popcountqihi2.libgcc): New insn-and-split. (*clzhi2.libgcc): New insn. (*clzsihi2.libgcc): New insn. (*ctzhi2.libgcc): New insn. (*ctzsihi2.libgcc): New insn. (*ffshi2.libgcc): New insn. (*ffssihi2.libgcc): New insn. (*bswapsi2.libgcc): New insn. From-SVN: r176920 --- gcc/ChangeLog | 37 +++++ gcc/config/avr/avr.md | 290 ++++++++++++++++++++++++++++++++++++++++ gcc/config/avr/libgcc.S | 47 ++++--- 3 files changed, 355 insertions(+), 19 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a184cc27cc9..2d12436df0c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,40 @@ +2011-07-29 Georg-Johann Lay + + PR target/49313 + * config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction. + (__ctzsi2): Result for 0 may be undefined. + (__ctzhi2): Result for 0 may be undefined. + (__popcounthi2): Don't clobber r30. Use __popcounthi2_tail. + (__popcountsi2): Ditto. And don't clobber r26. + (__popcountdi2): Ditto. And don't clobber r27. + * config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum. + (parityhi2): New expand. + (paritysi2): New expand. + (popcounthi2): New expand. + (popcountsi2): New expand. + (clzhi2): New expand. + (clzsi2): New expand. + (ctzhi2): New expand. + (ctzsi2): New expand. + (ffshi2): New expand. + (ffssi2): New expand. + (copysignsf3): New insn. + (bswapsi2): New expand. + (*parityhi2.libgcc): New insn. + (*parityqihi2.libgcc): New insn. + (*paritysihi2.libgcc): New insn. + (*popcounthi2.libgcc): New insn. + (*popcountsi2.libgcc): New insn. + (*popcountqi2.libgcc): New insn. + (*popcountqihi2.libgcc): New insn-and-split. + (*clzhi2.libgcc): New insn. + (*clzsihi2.libgcc): New insn. + (*ctzhi2.libgcc): New insn. + (*ctzsihi2.libgcc): New insn. + (*ffshi2.libgcc): New insn. + (*ffssihi2.libgcc): New insn. + (*bswapsi2.libgcc): New insn. + 2011-07-29 Richard Guenther * tree-vrp.c (get_value_range): Only set parameter default diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index a14d96d42de..2c215fdecc5 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -55,6 +55,7 @@ UNSPEC_FMUL UNSPEC_FMULS UNSPEC_FMULSU + UNSPEC_COPYSIGN ]) (define_c_enum "unspecv" @@ -3941,6 +3942,295 @@ [(set_attr "length" "9") (set_attr "cc" "clobber")]) + +;; Parity + +(define_expand "parityhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (set (reg:HI 24) + (parity:HI (reg:HI 24))) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "paritysi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (set (reg:HI 24) + (parity:HI (reg:SI 22))) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*parityhi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:HI 24)))] + "" + "%~call __parityhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*parityqihi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:QI 24)))] + "" + "%~call __parityqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*paritysihi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:SI 22)))] + "" + "%~call __paritysi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;; Popcount + +(define_expand "popcounthi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (set (reg:HI 24) + (popcount:HI (reg:HI 24))) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "popcountsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (set (reg:HI 24) + (popcount:HI (reg:SI 22))) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*popcounthi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:HI 24)))] + "" + "%~call __popcounthi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountsi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:SI 22)))] + "" + "%~call __popcountsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountqi2.libgcc" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24)))] + "" + "%~call __popcountqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "*popcountqihi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:QI 24)))] + "" + "#" + "" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24))) + (set (reg:QI 25) + (const_int 0))] + "") + +;; Count Leading Zeros + +(define_expand "clzhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (clz:HI (reg:HI 24))) + (clobber (reg:QI 26))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "clzsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (clz:HI (reg:SI 22))) + (clobber (reg:QI 26))]) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*clzhi2.libgcc" + [(set (reg:HI 24) + (clz:HI (reg:HI 24))) + (clobber (reg:QI 26))] + "" + "%~call __clzhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*clzsihi2.libgcc" + [(set (reg:HI 24) + (clz:HI (reg:SI 22))) + (clobber (reg:QI 26))] + "" + "%~call __clzsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Count Trailing Zeros + +(define_expand "ctzhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ctz:HI (reg:HI 24))) + (clobber (reg:QI 26))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "ctzsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ctz:HI (reg:SI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))]) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*ctzhi2.libgcc" + [(set (reg:HI 24) + (ctz:HI (reg:HI 24))) + (clobber (reg:QI 26))] + "" + "%~call __ctzhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*ctzsihi2.libgcc" + [(set (reg:HI 24) + (ctz:HI (reg:SI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))] + "" + "%~call __ctzsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Find First Set + +(define_expand "ffshi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ffs:HI (reg:HI 24))) + (clobber (reg:QI 26))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "ffssi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ffs:HI (reg:SI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))]) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*ffshi2.libgcc" + [(set (reg:HI 24) + (ffs:HI (reg:HI 24))) + (clobber (reg:QI 26))] + "" + "%~call __ffshi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*ffssihi2.libgcc" + [(set (reg:HI 24) + (ffs:HI (reg:SI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))] + "" + "%~call __ffssi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Copysign + +(define_insn "copysignsf3" + [(set (match_operand:SF 0 "register_operand" "=r") + (unspec:SF [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "register_operand" "r")] + UNSPEC_COPYSIGN))] + "" + "bst %D2,7\;bld %D0,7" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +;; Swap Bytes (change byte-endianess) + +(define_expand "bswapsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (set (reg:SI 22) + (bswap:SI (reg:SI 22))) + (set (match_operand:SI 0 "register_operand" "") + (reg:SI 22))] + "" + "") + +(define_insn "*bswapsi2.libgcc" + [(set (reg:SI 22) + (bswap:SI (reg:SI 22)))] + "" + "%~call __bswapsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + ;; CPU instructions ;; NOP taking 1 or 2 Ticks diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S index d3d6e4fd74f..f6084a70518 100644 --- a/gcc/config/avr/libgcc.S +++ b/gcc/config/avr/libgcc.S @@ -1074,9 +1074,15 @@ ENDF __ffssi2 ;; clobbers: r26 DEFUN __ffshi2 clr r26 +#ifdef __AVR_HAVE_JMP_CALL__ + ;; Some cores have problem skipping 2-word instruction + tst r24 + breq 2f +#else cpse r24, __zero_reg__ +#endif /* __AVR_HAVE_JMP_CALL__ */ 1: XJMP __loop_ffsqi2 - ldi r26, 8 +2: ldi r26, 8 or r24, r25 brne 1b ret @@ -1106,12 +1112,12 @@ ENDF __loop_ffsqi2 #if defined (L_ctzsi2) ;; count trailing zeros ;; r25:r24 = ctz32 (r25:r22) -;; ctz(0) = 32 +;; clobbers: r26, r22 +;; ctz(0) = 255 +;; Note that ctz(0) in undefined for GCC DEFUN __ctzsi2 XCALL __ffssi2 dec r24 - sbrc r24, 7 - ldi r24, 32 ret ENDF __ctzsi2 #endif /* defined (L_ctzsi2) */ @@ -1119,12 +1125,12 @@ ENDF __ctzsi2 #if defined (L_ctzhi2) ;; count trailing zeros ;; r25:r24 = ctz16 (r25:r24) -;; ctz(0) = 16 +;; clobbers: r26 +;; ctz(0) = 255 +;; Note that ctz(0) in undefined for GCC DEFUN __ctzhi2 XCALL __ffshi2 dec r24 - sbrc r24, 7 - ldi r24, 16 ret ENDF __ctzhi2 #endif /* defined (L_ctzhi2) */ @@ -1258,47 +1264,50 @@ ENDF __parityqi2 #if defined (L_popcounthi2) ;; population count ;; r25:r24 = popcount16 (r25:r24) -;; clobbers: r30, __tmp_reg__ +;; clobbers: __tmp_reg__ DEFUN __popcounthi2 XCALL __popcountqi2 - mov r30, r24 + push r24 mov r24, r25 XCALL __popcountqi2 - add r24, r30 clr r25 - ret + ;; FALLTHRU ENDF __popcounthi2 + +DEFUN __popcounthi2_tail + pop __tmp_reg__ + add r24, __tmp_reg__ + ret +ENDF __popcounthi2_tail #endif /* defined (L_popcounthi2) */ #if defined (L_popcountsi2) ;; population count ;; r25:r24 = popcount32 (r25:r22) -;; clobbers: r26, r30, __tmp_reg__ +;; clobbers: __tmp_reg__ DEFUN __popcountsi2 XCALL __popcounthi2 - mov r26, r24 + push r24 mov_l r24, r22 mov_h r25, r23 XCALL __popcounthi2 - add r24, r26 - ret + XJMP __popcounthi2_tail ENDF __popcountsi2 #endif /* defined (L_popcountsi2) */ #if defined (L_popcountdi2) ;; population count ;; r25:r24 = popcount64 (r25:r18) -;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__ +;; clobbers: r22, r23, __tmp_reg__ DEFUN __popcountdi2 XCALL __popcountsi2 - mov r27, r24 + push r24 mov_l r22, r18 mov_h r23, r19 mov_l r24, r20 mov_h r25, r21 XCALL __popcountsi2 - add r24, r27 - ret + XJMP __popcounthi2_tail ENDF __popcountdi2 #endif /* defined (L_popcountdi2) */ -- 2.30.2