;; Contributed by Steve Chamberlain <sac@cygnus.com>
;; Optimizations by Toshiyasu Morita <toshiyasu.morita@hsa.hitachi.com>
-/* Copyright (C) 1994, 2000, 2001 Free Software Foundation, Inc.
+/* Copyright (C) 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
;; "supporting routines".
; general purpose normalize routine
-;
+;
; divisor in A0
; dividend in A1
; turns both into +ve numbers, and leaves what the answer sign
divnorm:
mov.b #0x0,A2L
or A0H,A0H ; is divisor > 0
- bge _lab1
+ bge _lab1
not A0H ; no - then make it +ve
not A0L
- adds #1,A0
+ adds #1,A0
xor #0x1,A2L ; and remember that in A2L
_lab1: or A1H,A1H ; look at dividend
- bge _lab2
+ bge _lab2
not A1H ; it is -ve, make it positive
not A1L
adds #1,A1
modnorm:
mov.b #0x0,A2L
or A0H,A0H ; is divisor > 0
- bge _lab7
+ bge _lab7
not A0H ; no - then make it +ve
not A0L
- adds #1,A0
+ adds #1,A0
xor #0x1,A2L ; and remember that in A2L
_lab7: or A1H,A1H ; look at dividend
- bge _lab8
+ bge _lab8
not A1H ; it is -ve, make it positive
not A1L
adds #1,A1
not A0H ; yes, so make it so
not A0L
adds #1,A0
-_lab4: rts
+_lab4: rts
; A0=A0%A1 signed
.global ___udivhi3
___udivhi3:
- ; A0 A1 A2 A3
+ ; A0 A1 A2 A3
; Nn Dd P
- sub.w A3,A3 ; Nn Dd xP 00
- or A1H,A1H
+ sub.w A3,A3 ; Nn Dd xP 00
+ or A1H,A1H
bne divlongway
- or A0H,A0H
- beq _lab6
+ or A0H,A0H
+ beq _lab6
; we know that D == 0 and N is != 0
mov.b A0H,A3L ; Nn Dd xP 0N
mov.b A3L,A0L ; Qq
mov.b A3H,A3L ; m
mov.b #0x0,A3H ; Qq 0m
- rts
+ rts
; D != 0 - which means the denominator is
; loop around to get the result.
mov.b #0x8,A2H ; 8
div8: add.b A0L,A0L ; n*=2
rotxl A3L ; Make remainder bigger
- rotxl A3H
+ rotxl A3H
sub.w A1,A3 ; Q-=N
bhs setbit ; set a bit ?
add.w A1,A3 ; no : too far , Q+=N
- dec A2H
- bne div8 ; next bit
- rts
+ dec A2H
+ bne div8 ; next bit
+ rts
setbit: inc A0L ; do insert bit
- dec A2H
- bne div8 ; next bit
- rts
+ dec A2H
+ bne div8 ; next bit
+ rts
#endif /* __H8300__ */
#endif /* L_divhi3 */
;; 4 byte integer divides for the H8/300.
;;
-;; We have one routine which does all the work and lots of
+;; We have one routine which does all the work and lots of
;; little ones which prepare the args and massage the sign.
;; We bunch all of this into one object file since there are several
;; "supporting routines".
postive:
mov.b A2H,A2H ; is the denominator -ve
bge postive2
- not A2L
+ not A2L
not A2H
not A3L
not A3H
- add.b #1,A3L
+ add.b #1,A3L
addx #0,A3H
addx #0,A2L
addx #0,A2H
mpostive:
mov.b A2H,A2H ; is the denominator -ve
bge mpostive2
- not A2L
+ not A2L
not A2H
not A3L
not A3H
- add.b #1,A3L
+ add.b #1,A3L
addx #0,A3H
addx #0,A2L
addx #0,A2H
; denominator in A2/A3
.global ___modsi3
___modsi3:
- PUSHP S2P
+ PUSHP S2P
PUSHP S0P
PUSHP S1P
mov.l S0P,A0P
#endif
bra exitdiv
-
+
.global ___divsi3
___divsi3:
PUSHP S2P
or S2L,S2L
beq reti
-
+
; should be -ve
#ifdef __H8300__
not A0H
reti:
POPP S2P
- rts
+ rts
; takes A0/A1 numerator (A0P for 300H)
; A2/A3 denominator (A1P for 300H)
mov.b S1H,S1L
mov.b #0x0,S1H
- rts
+ rts
; have to do the divide by shift and test
DenHighZero:
sub.w A3,S1 ; does it all fit
subx A2L,S0L
subx A2H,S0H
- bhs setone
+ bhs setone
add.w A3,S1 ; no, restore mistake
addx A2L,S0L
dec S2H
bne nextbit
- rts
-
+ rts
+
setone:
inc A1L
dec S2H
bne nextbit
- rts
+ rts
#else /* __H8300H__ */
;; HImode multiply.
; The H8/300 only has an 8*8->16 multiply.
; The answer is the same as:
-;
+;
; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
; (we can ignore A1.h * A0.h cause that will all off the top)
; A0 in
-; A1 in
+; A1 in
; A0 answer
#ifdef __H8300__
.global ___mulhi3
___mulhi3:
mov.b A1L,A2L ; A2l gets srcb.l
- mulxu A0L,A2 ; A2 gets first sub product
+ mulxu A0L,A2 ; A2 gets first sub product
mov.b A0H,A3L ; prepare for
mulxu A1L,A3 ; second sub product
add.b A3L,A2H ; sum first two terms
mov.b A1H,A3L ; third sub product
- mulxu A0L,A3
+ mulxu A0L,A3
add.b A3L,A2H ; almost there
mov.w A2,A0 ; that is
#ifdef L_mulsi3
;; SImode multiply.
-;;
+;;
;; I think that shift and add may be sufficient for this. Using the
;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
;; the inner loop uses maybe 20 cycles + overhead, but terminates
;; A0/A1 src_a
;; A2/A3 src_b
;;
-;; while (a)
+;; while (a)
;; {
;; if (a & 1)
;; r += b;
PUSHP S0P
PUSHP S1P
PUSHP S2P
-
+
sub.w S0,S0
sub.w S1,S1
-
+
; while (a)
_top: mov.w A0,A0
bne _more
rotxr A0L
rotxr A1H
rotxr A1L
-
+
; b <<= 1
add.w A3,A3
addx A2L,A2L
bra _top
_done:
- mov.w S0,A0
+ mov.w S0,A0
mov.w S1,A1
POPP S2P
POPP S1P
___mulsi3:
mov.w r1,r2 ; ( 2 states) b * d
mulxu r0,er2 ; (22 states)
-
+
mov.w e0,r3 ; ( 2 states) a * d
beq L_skip1 ; ( 4 states)
mulxu r1,er3 ; (22 states)