From 26731a9e43668e28553e494a9f5f3cf946a83012 Mon Sep 17 00:00:00 2001
From: Doug Evans <dje@gnu.org>
Date: Mon, 11 Jul 1994 22:45:37 +0000
Subject: [PATCH] Initial revision

From-SVN: r7726
---
 gcc/config/h8300/lib1funcs.asm | 672 +++++++++++++++++++++++++++++++++
 1 file changed, 672 insertions(+)
 create mode 100644 gcc/config/h8300/lib1funcs.asm

diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm
new file mode 100644
index 00000000000..370ba75ed04
--- /dev/null
+++ b/gcc/config/h8300/lib1funcs.asm
@@ -0,0 +1,672 @@
+;; libgcc1 routines for the Hitachi h8/300 cpu.
+;; Contributed by Steve Chamberlain.
+;; sac@cygnus.com
+;; This file is in the public domain.
+
+/* Assembler register definitions.  */
+
+#define A0 r0
+#define A0L r0l
+#define A0H r0h
+
+#define A1 r1
+#define A1L r1l
+#define A1H r1h
+
+#define A2 r2
+#define A2L r2l
+#define A2H r2h
+
+#define A3 r3
+#define A3L r3l
+#define A3H r3h
+
+#define S0 r4
+#define S0L r4l
+#define S0H r4h
+
+#define S1 r5
+#define S1L r5l
+#define S1H r5h
+
+#define S2 r6
+#define S2L r6l
+#define S2H r6h
+
+#ifdef __H8300__
+#define MOVP	mov.w	/* pointers are 16 bits */
+#define ADDP	add.w
+#define CMPP	cmp.w
+#define PUSHP	push
+#define POPP	pop
+
+#define A0P	r0
+#define A1P	r1
+#define A2P	r2
+#define A3P	r3
+#define S0P	r4
+#define S1P	r5
+#define S2P	r6
+#endif
+
+#ifdef __H8300H__
+#define MOVP	mov.l	/* pointers are 32 bits */
+#define ADDP	add.l
+#define CMPP	cmp.l
+#define PUSHP	push.l
+#define POPP	pop.l
+
+#define A0P	er0
+#define A1P	er1
+#define A2P	er2
+#define A3P	er3
+#define S0P	er4
+#define S1P	er5
+#define S2P	er6
+
+#define A0E	e0
+#define A1E	e1
+#define A2E	e2
+#define A3E	e3
+#endif
+
+#ifdef L_cmpsi2
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global ___cmpsi2
+___cmpsi2:
+	cmp.w	A2,A0
+	bne	.L2
+	cmp.w	A3,A1
+	bne	.L2
+	mov.w	#1,A0
+	rts
+.L2:
+	cmp.w	A0,A2
+	bgt	.L4
+	bne	.L3
+	cmp.w	A1,A3
+	bls	.L3
+.L4:
+	sub.w	A0,A0
+	rts
+.L3:
+	mov.w	#2,A0
+.L5:
+	rts
+	.end
+#endif
+#endif /* L_cmpsi2 */
+
+#ifdef L_ucmpsi2
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global ___ucmpsi2
+___ucmpsi2:
+	cmp.w	A2,A0
+	bne	.L2
+	cmp.w	A3,A1
+	bne	.L2
+	mov.w	#1,A0
+	rts
+.L2:
+	cmp.w	A0,A2
+	bhi	.L4
+	bne	.L3
+	cmp.w	A1,A3
+	bls	.L3
+.L4:
+	sub.w	A0,A0
+	rts
+.L3:
+	mov.w	#2,A0
+.L5:
+	rts
+	.end
+#endif
+#endif /* L_ucmpsi2 */
+
+#ifdef L_divhi3
+
+;; HImode divides for the H8/300.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+; general purpose normalize routine
+; 
+; divisor in A0
+; dividend in A1
+; turns both into +ve numbers, and leaves what the answer sign
+; should be in A2L
+
+#ifdef __H8300__
+	.section .text
+	.align 2
+divnorm:
+	mov.b	#0x0,A2L
+	or	A0H,A0H		; is divisor > 0
+	bge	_lab1			
+	not	A0H		; no - then make it +ve
+	not	A0L
+	adds	#1,A0			
+	xor	#0x1,A2L	; and remember that in A2L
+_lab1:	or	A1H,A1H	; look at dividend
+	bge	_lab2		
+	not	A1H		; it is -ve, make it positive
+	not	A1L
+	adds	#1,A1
+	xor	#0x1,A2L; and toggle sign of result
+_lab2:	rts
+
+; A0=A0/A1 signed
+
+	.global	___divhi3
+___divhi3:
+	bsr	divnorm
+	bsr	___udivhi3
+negans:	or	A2L,A2L	; should answer be negative ?
+	beq	_lab4
+	not	A0H	; yes, so make it so
+	not	A0L
+	adds	#1,A0
+_lab4:	rts	
+
+; A0=A0%A1 signed
+
+	.global	___modhi3
+___modhi3:
+	bsr	divnorm
+	bsr	___udivhi3
+	mov	A3,A0
+	bra	negans
+
+; A0=A0%A1 unsigned
+
+	.global	___umodhi3
+___umodhi3:
+	bsr	___udivhi3
+	mov	A3,A0
+	rts
+
+; A0=A0/A1 unsigned
+; A3=A0%A1 unsigned
+; A2H trashed
+; D high 8 bits of denom
+; d low 8 bits of denom
+; N high 8 bits of num
+; n low 8 bits of num
+; M high 8 bits of mod
+; m low 8 bits of mod
+; Q high 8 bits of quot
+; q low 8 bits of quot
+; P preserve
+
+; The h8 only has a 16/8 bit divide, so we look at the incoming and
+; see how to partition up the expression.
+
+	.global	___udivhi3
+___udivhi3:
+				; A0 A1 A2 A3 
+				; Nn Dd       P
+	sub.w	A3,A3		; Nn Dd xP 00 
+	or	A1H,A1H		 
+	bne	divlongway
+	or	A0H,A0H		
+	beq	_lab6		
+
+; we know that D == 0 and N is != 0
+	mov.b	A0H,A3L		; Nn Dd xP 0N
+	divxu	A1L,A3		;          MQ
+	mov.b	A3L,A0H	 	; Q
+; dealt with N, do n
+_lab6:	mov.b	A0L,A3L		;           n
+	divxu	A1L,A3		;          mq
+	mov.b	A3L,A0L		; Qq
+	mov.b	A3H,A3L         ;           m
+	mov.b	#0x0,A3H	; Qq       0m
+	rts	
+
+; D != 0 - which means the denominator is
+;          loop around to get the result.
+
+divlongway:
+	mov.b	A0H,A3L		; Nn Dd xP 0N
+	mov.b	#0x0,A0H	; high byte of answer has to be zero
+	mov.b	#0x8,A2H	;       8
+div8:	add.b	A0L,A0L		; n*=2
+	rotxl	A3L		; Make remainder bigger
+	rotxl	A3H		
+	sub.w	A1,A3		; Q-=N
+	bhs	setbit		; set a bit ?
+	add.w	A1,A3		;  no : too far , Q+=N
+
+	dec	A2H		
+	bne	div8		; next bit	
+	rts	
+
+setbit:	inc	A0L		; do insert bit
+	dec	A2H		
+	bne	div8		; next bit	
+	rts	
+
+#endif /* __H8300__ */
+#endif /* L_divhi3 */
+
+#ifdef L_divsi3
+
+;; 4 byte integer divides for the H8/300.
+;;
+;; We have one routine which does all the work and lots of 
+;; little ones which prepare the args and massage the sign.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+#ifdef __H8300H__
+	.h8300h
+#endif
+
+	.section .text
+	.align 2
+
+; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
+; This function is here to keep branch displacements small.
+
+#ifdef __H8300__
+
+divnorm:
+	mov.b	#0,S2L		; keep the sign in S2
+	mov.b	A0H,A0H		; is the numerator -ve
+	bge	postive
+
+	; negate arg
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0H
+	addx	#0,A0L
+
+	mov.b	#1,S2L		; the sign will be -ve
+postive:
+	mov.b	A2H,A2H		; is the denominator -ve
+	bge	postive2
+	not	A2L		
+	not	A2H
+	not	A3L
+	not	A3H
+	add.b	#1,A3L	
+	addx	#0,A3H
+	addx	#0,A2L
+	addx	#0,A2H
+	xor	#1,S2L		; toggle result sign
+postive2:
+	rts
+
+#else /* __H8300H__ */
+
+divnorm:
+	mov.b	#0,S2L		; keep the sign in S2
+	mov.l	A0P,A0P		; is the numerator -ve
+	bge	postive
+
+	neg.l	A0P		; negate arg
+	mov.b	#1,S2L		; the sign will be -ve
+
+postive:
+	mov.l	A1P,A1P		; is the denominator -ve
+	bge	postive2
+
+	neg.l	A1P		; negate arg
+	xor.b	#1,S2L		; toggle result sign
+
+postive2:
+	rts
+
+#endif
+
+; numerator in A0/A1
+; denominator in A2/A3
+	.global	___modsi3
+___modsi3:
+	PUSHP	S2P		
+	PUSHP	S0P
+	PUSHP	S1P
+
+	bsr	divnorm
+	bsr	divmodsi4
+#ifdef __H8300__
+	mov	S0,A0
+	mov	S1,A1
+#else
+	mov.l	S0P,A0P
+#endif
+	bra	exitdiv
+
+	.global	___udivsi3
+___udivsi3:
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	mov.b	#0,S2L	; keep sign low
+	bsr	divmodsi4
+	bra	exitdiv
+
+	.global	___umodsi3
+___umodsi3:
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	mov.b	#0,S2L	; keep sign low
+	bsr	divmodsi4
+#ifdef __H8300__
+	mov	S0,A0
+	mov	S1,A1
+#else
+	mov.l	S0P,A0P
+#endif
+	bra	exitdiv
+	
+	.global	___divsi3
+___divsi3:
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	jsr	divnorm
+	jsr	divmodsi4
+
+	; examine what the sign should be
+exitdiv:
+	POPP	S1P
+	POPP	S0P
+
+	or	S2L,S2L
+	beq	reti
+	
+	; should be -ve
+#ifdef __H8300__
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0H
+	addx	#0,A0L
+#else /* __H8300H__ */
+	neg.l	A0P
+#endif
+
+reti:
+	POPP	S2P
+	rts	
+
+	; takes A0/A1 numerator (A0P for 300h)
+	; A2/A3 denominator (A1P for 300h)
+	; returns A0/A1 quotient (A0P for 300h)
+	; S0/S1 remainder (S0P for 300h)
+	; trashes S2
+
+#ifdef __H8300__
+
+divmodsi4:
+        sub.w	S0,S0		; zero play area
+        mov.w	S0,S1
+        mov.b	A2H,S2H
+        or	A2L,S2H
+        or	A3H,S2H
+        bne	DenHighZero
+        mov.b	A0H,A0H
+        bne	NumByte0Zero
+        mov.b	A0L,A0L
+        bne	NumByte1Zero
+        mov.b	A1H,A1H
+        bne	NumByte2Zero
+        bra	NumByte3Zero
+NumByte0Zero:
+	mov.b	A0H,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A0H
+NumByte1Zero:
+	mov.b	A0L,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A0L
+NumByte2Zero:
+	mov.b	A1H,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A1H
+NumByte3Zero:
+	mov.b	A1L,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A1L
+
+        mov.b	S1H,S1L
+        mov.b	#0x0,S1H
+        rts	
+
+; have to do the divide by shift and test
+DenHighZero:
+	mov.b	A0H,S1L
+        mov.b	A0L,A0H
+        mov.b	A1H,A0L
+        mov.b	A1L,A1H
+
+        mov.b	#0,A1L
+        mov.b	#24,S2H	; only do 24 iterations
+
+nextbit:
+	add.w	A1,A1	; double the answer guess
+        rotxl	A0L
+        rotxl	A0H
+
+        rotxl	S1L	; double remainder
+        rotxl	S1H
+        rotxl	S0L
+        rotxl	S0H
+        sub.w	A3,S1	; does it all fit
+        subx	A2L,S0L
+        subx	A2H,S0H
+        bhs	setone	 
+
+        add.w	A3,S1	; no, restore mistake
+        addx	A2L,S0L
+        addx	A2H,S0H
+
+        dec	S2H
+        bne	nextbit
+        rts	
+	
+setone:
+	inc	A1L
+        dec	S2H
+        bne	nextbit
+        rts	
+
+#else /* __H8300H__ */
+
+divmodsi4:
+	sub.l	S0P,S0P		; zero play area
+	mov.w	A1E,A1E		; denominator top word 0?
+	bne	DenHighZero
+
+	; do it the easy way, see page 107 in manual
+	mov.w	A0E,A2
+	extu.l	A2P
+	divxu.w	A1,A2P
+	mov.w	A2E,A0E
+	divxu.w	A1,A0P
+	mov.w	A0E,S0
+	mov.w	A2,A0E
+	extu.l	S0P
+	rts
+
+DenHighZero:
+	mov.w	A0E,A2
+	mov.b	A2H,S0L
+	mov.b	A2L,A2H
+	mov.b	A0H,A2L
+	mov.w	A2,A0E
+	mov.b	A0L,A0H
+	mov.b	#0,A0L
+	mov.b	#24,S2H		; only do 24 iterations
+
+nextbit:
+	shll.l	A0P		; double the answer guess
+	rotxl.l	S0P		; double remainder
+	sub.l	A1P,S0P		; does it all fit?
+	bhs	setone
+
+	add.l	A1P,S0P		; no, restore mistake
+	dec	S2H
+	bne	nextbit
+	rts
+
+setone:
+	inc	A0L
+	dec	S2H
+	bne	nextbit
+	rts
+
+#endif
+#endif /* L_divsi3 */
+
+#ifdef L_mulhi3
+
+;; HImode multiply.
+; The h8 only has an 8*8->16 multiply.
+; The answer is the same as:
+; 
+; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
+; (we can ignore A1.h * A0.h cause that will all off the top)
+; A0 in
+; A1 in 
+; A0 answer
+
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global	___mulhi3
+___mulhi3:
+	mov.b	A1L,A2L		; A2l gets srcb.l
+	mulxu	A0L,A2		; A2 gets first sub product 
+
+	mov.b	A0H,A3L		; prepare for
+	mulxu	A1L,A3		; second sub product
+
+	add.b	A3L,A2H		; sum first two terms
+
+	mov.b	A1H,A3L		; third sub product
+	mulxu	A0L,A3		
+
+	add.b	A3L,A2H		; almost there
+	mov.w	A2,A0		; that is
+	rts
+
+#endif
+#endif /* L_mulhi3 */
+
+#ifdef L_mulsi3
+
+;; SImode multiply.
+;; 
+;; I think that shift and add may be sufficient for this.  Using the
+;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
+;; the inner loop uses maybe 20 cycles + overhead, but terminates
+;; quickly on small args.
+;;
+;; A0/A1 src_a
+;; A2/A3 src_b
+;;
+;;  while (a) 
+;;    {
+;;      if (a & 1)
+;;        r += b;
+;;      a >>= 1;
+;;      b <<= 1;
+;;    }
+
+	.section .text
+	.align 2
+
+#ifdef __H8300__
+
+	.global	___mulsi3
+___mulsi3:
+	PUSHP	S0P
+	PUSHP	S1P
+	PUSHP	S2P
+	
+	sub.w	S0,S0
+	sub.w	S1,S1
+	
+	; while (a)
+_top:	mov.w	A0,A0
+	bne	_more
+	mov.w	A1,A1
+	beq	_done
+_more:	; if (a & 1)
+	bld	#0,A1L
+	bcc	_nobit
+	; r += b
+	add.w	A3,S1
+	addx	A2L,S0L
+	addx	A2H,S0H
+_nobit:
+	; a >>= 1
+	shlr	A0H
+	rotxr	A0L
+	rotxr	A1H
+	rotxr	A1L
+	
+	; b <<= 1
+	add.w	A3,A3
+	addx	A2L,A2L
+	addx	A2H,A2H
+	bra 	_top
+
+_done:
+	mov.w	S0,A0	
+	mov.w	S1,A1
+	POPP	S2P
+	POPP	S1P
+	POPP	S0P
+	rts
+
+#else /* __H8300H__ */
+
+	.h8300h
+
+	.global	___mulsi3
+___mulsi3:
+	sub.l	A2P,A2P
+
+	; while (a)
+_top:	mov.l	A0P,A0P
+	beq	_done
+
+	; if (a & 1)
+	bld	#0,A0L
+	bcc	_nobit
+
+	; r += b
+	add.l	A1P,A2P
+
+_nobit:
+	; a >>= 1
+	shlr.l	A0P
+
+	; b <<= 1
+	shll.l	A1P
+	bra	_top
+
+_done:
+	mov.l	A2P,A0P
+	rts
+
+#endif
+#endif /* L_mulsi3 */
\ No newline at end of file
-- 
2.30.2