1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003
2 Free Software Foundation, Inc.
4 This file is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
9 In addition to the permissions in the GNU General Public License, the
10 Free Software Foundation gives you unlimited permission to link the
11 compiled version of this file into combinations with other programs,
12 and to distribute those combinations without any restriction coming
13 from the use of this file. (The General Public License restrictions
14 do apply in other respects; for example, they cover modification of
15 the file, and distribution when not linked into a combine
18 This file is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; see the file COPYING. If not, write to
25 the Free Software Foundation, 59 Temple Place - Suite 330,
26 Boston, MA 02111-1307, USA. */
28 !! libgcc routines for the Renesas / SuperH SH CPUs.
29 !! Contributed by Steve Chamberlain.
32 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
33 !! recoded in assembly by Toshiyasu Morita
36 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 ELF local label prefixes by J"orn Rennecke
41 #define LOCAL(X) .L_##X
42 #define FUNC(X) .type X,@function
43 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
44 #define ENDFUNC(X) ENDFUNC0(X)
46 #define LOCAL(X) L_##X
51 #define CONCAT(A,B) A##B
52 #define GLOBAL0(U,X) CONCAT(U,__##X)
53 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
55 #if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__)
66 .global GLOBAL(ashiftrt_r4_0)
67 .global GLOBAL(ashiftrt_r4_1)
68 .global GLOBAL(ashiftrt_r4_2)
69 .global GLOBAL(ashiftrt_r4_3)
70 .global GLOBAL(ashiftrt_r4_4)
71 .global GLOBAL(ashiftrt_r4_5)
72 .global GLOBAL(ashiftrt_r4_6)
73 .global GLOBAL(ashiftrt_r4_7)
74 .global GLOBAL(ashiftrt_r4_8)
75 .global GLOBAL(ashiftrt_r4_9)
76 .global GLOBAL(ashiftrt_r4_10)
77 .global GLOBAL(ashiftrt_r4_11)
78 .global GLOBAL(ashiftrt_r4_12)
79 .global GLOBAL(ashiftrt_r4_13)
80 .global GLOBAL(ashiftrt_r4_14)
81 .global GLOBAL(ashiftrt_r4_15)
82 .global GLOBAL(ashiftrt_r4_16)
83 .global GLOBAL(ashiftrt_r4_17)
84 .global GLOBAL(ashiftrt_r4_18)
85 .global GLOBAL(ashiftrt_r4_19)
86 .global GLOBAL(ashiftrt_r4_20)
87 .global GLOBAL(ashiftrt_r4_21)
88 .global GLOBAL(ashiftrt_r4_22)
89 .global GLOBAL(ashiftrt_r4_23)
90 .global GLOBAL(ashiftrt_r4_24)
91 .global GLOBAL(ashiftrt_r4_25)
92 .global GLOBAL(ashiftrt_r4_26)
93 .global GLOBAL(ashiftrt_r4_27)
94 .global GLOBAL(ashiftrt_r4_28)
95 .global GLOBAL(ashiftrt_r4_29)
96 .global GLOBAL(ashiftrt_r4_30)
97 .global GLOBAL(ashiftrt_r4_31)
98 .global GLOBAL(ashiftrt_r4_32)
100 FUNC(GLOBAL(ashiftrt_r4_0))
101 FUNC(GLOBAL(ashiftrt_r4_1))
102 FUNC(GLOBAL(ashiftrt_r4_2))
103 FUNC(GLOBAL(ashiftrt_r4_3))
104 FUNC(GLOBAL(ashiftrt_r4_4))
105 FUNC(GLOBAL(ashiftrt_r4_5))
106 FUNC(GLOBAL(ashiftrt_r4_6))
107 FUNC(GLOBAL(ashiftrt_r4_7))
108 FUNC(GLOBAL(ashiftrt_r4_8))
109 FUNC(GLOBAL(ashiftrt_r4_9))
110 FUNC(GLOBAL(ashiftrt_r4_10))
111 FUNC(GLOBAL(ashiftrt_r4_11))
112 FUNC(GLOBAL(ashiftrt_r4_12))
113 FUNC(GLOBAL(ashiftrt_r4_13))
114 FUNC(GLOBAL(ashiftrt_r4_14))
115 FUNC(GLOBAL(ashiftrt_r4_15))
116 FUNC(GLOBAL(ashiftrt_r4_16))
117 FUNC(GLOBAL(ashiftrt_r4_17))
118 FUNC(GLOBAL(ashiftrt_r4_18))
119 FUNC(GLOBAL(ashiftrt_r4_19))
120 FUNC(GLOBAL(ashiftrt_r4_20))
121 FUNC(GLOBAL(ashiftrt_r4_21))
122 FUNC(GLOBAL(ashiftrt_r4_22))
123 FUNC(GLOBAL(ashiftrt_r4_23))
124 FUNC(GLOBAL(ashiftrt_r4_24))
125 FUNC(GLOBAL(ashiftrt_r4_25))
126 FUNC(GLOBAL(ashiftrt_r4_26))
127 FUNC(GLOBAL(ashiftrt_r4_27))
128 FUNC(GLOBAL(ashiftrt_r4_28))
129 FUNC(GLOBAL(ashiftrt_r4_29))
130 FUNC(GLOBAL(ashiftrt_r4_30))
131 FUNC(GLOBAL(ashiftrt_r4_31))
132 FUNC(GLOBAL(ashiftrt_r4_32))
135 GLOBAL(ashiftrt_r4_32):
136 GLOBAL(ashiftrt_r4_31):
141 GLOBAL(ashiftrt_r4_30):
143 GLOBAL(ashiftrt_r4_29):
145 GLOBAL(ashiftrt_r4_28):
147 GLOBAL(ashiftrt_r4_27):
149 GLOBAL(ashiftrt_r4_26):
151 GLOBAL(ashiftrt_r4_25):
153 GLOBAL(ashiftrt_r4_24):
159 GLOBAL(ashiftrt_r4_23):
161 GLOBAL(ashiftrt_r4_22):
163 GLOBAL(ashiftrt_r4_21):
165 GLOBAL(ashiftrt_r4_20):
167 GLOBAL(ashiftrt_r4_19):
169 GLOBAL(ashiftrt_r4_18):
171 GLOBAL(ashiftrt_r4_17):
173 GLOBAL(ashiftrt_r4_16):
178 GLOBAL(ashiftrt_r4_15):
180 GLOBAL(ashiftrt_r4_14):
182 GLOBAL(ashiftrt_r4_13):
184 GLOBAL(ashiftrt_r4_12):
186 GLOBAL(ashiftrt_r4_11):
188 GLOBAL(ashiftrt_r4_10):
190 GLOBAL(ashiftrt_r4_9):
192 GLOBAL(ashiftrt_r4_8):
194 GLOBAL(ashiftrt_r4_7):
196 GLOBAL(ashiftrt_r4_6):
198 GLOBAL(ashiftrt_r4_5):
200 GLOBAL(ashiftrt_r4_4):
202 GLOBAL(ashiftrt_r4_3):
204 GLOBAL(ashiftrt_r4_2):
206 GLOBAL(ashiftrt_r4_1):
210 GLOBAL(ashiftrt_r4_0):
214 ENDFUNC(GLOBAL(ashiftrt_r4_0))
215 ENDFUNC(GLOBAL(ashiftrt_r4_1))
216 ENDFUNC(GLOBAL(ashiftrt_r4_2))
217 ENDFUNC(GLOBAL(ashiftrt_r4_3))
218 ENDFUNC(GLOBAL(ashiftrt_r4_4))
219 ENDFUNC(GLOBAL(ashiftrt_r4_5))
220 ENDFUNC(GLOBAL(ashiftrt_r4_6))
221 ENDFUNC(GLOBAL(ashiftrt_r4_7))
222 ENDFUNC(GLOBAL(ashiftrt_r4_8))
223 ENDFUNC(GLOBAL(ashiftrt_r4_9))
224 ENDFUNC(GLOBAL(ashiftrt_r4_10))
225 ENDFUNC(GLOBAL(ashiftrt_r4_11))
226 ENDFUNC(GLOBAL(ashiftrt_r4_12))
227 ENDFUNC(GLOBAL(ashiftrt_r4_13))
228 ENDFUNC(GLOBAL(ashiftrt_r4_14))
229 ENDFUNC(GLOBAL(ashiftrt_r4_15))
230 ENDFUNC(GLOBAL(ashiftrt_r4_16))
231 ENDFUNC(GLOBAL(ashiftrt_r4_17))
232 ENDFUNC(GLOBAL(ashiftrt_r4_18))
233 ENDFUNC(GLOBAL(ashiftrt_r4_19))
234 ENDFUNC(GLOBAL(ashiftrt_r4_20))
235 ENDFUNC(GLOBAL(ashiftrt_r4_21))
236 ENDFUNC(GLOBAL(ashiftrt_r4_22))
237 ENDFUNC(GLOBAL(ashiftrt_r4_23))
238 ENDFUNC(GLOBAL(ashiftrt_r4_24))
239 ENDFUNC(GLOBAL(ashiftrt_r4_25))
240 ENDFUNC(GLOBAL(ashiftrt_r4_26))
241 ENDFUNC(GLOBAL(ashiftrt_r4_27))
242 ENDFUNC(GLOBAL(ashiftrt_r4_28))
243 ENDFUNC(GLOBAL(ashiftrt_r4_29))
244 ENDFUNC(GLOBAL(ashiftrt_r4_30))
245 ENDFUNC(GLOBAL(ashiftrt_r4_31))
246 ENDFUNC(GLOBAL(ashiftrt_r4_32))
268 .global GLOBAL(ashrsi3)
269 FUNC(GLOBAL(ashrsi3))
274 mova LOCAL(ashrsi3_table),r0
285 LOCAL(ashrsi3_table):
286 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
305 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
306 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
307 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
308 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
309 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
310 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
311 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
312 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
313 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
314 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
315 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
316 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
317 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
397 ENDFUNC(GLOBAL(ashrsi3))
418 .global GLOBAL(ashlsi3)
419 FUNC(GLOBAL(ashlsi3))
424 mova LOCAL(ashlsi3_table),r0
435 LOCAL(ashlsi3_table):
436 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
455 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
456 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
457 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
458 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
459 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
460 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
461 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
462 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
463 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
464 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
465 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
466 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
467 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
556 ENDFUNC(GLOBAL(ashlsi3))
577 .global GLOBAL(lshrsi3)
578 FUNC(GLOBAL(lshrsi3))
583 mova LOCAL(lshrsi3_table),r0
594 LOCAL(lshrsi3_table):
595 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
614 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
615 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
616 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
617 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
618 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
619 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
620 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
621 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
622 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
623 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
624 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
625 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
626 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
715 ENDFUNC(GLOBAL(lshrsi3))
720 ! done all the large groups, do the remainder
725 mova GLOBAL(movmemSI0),r0
731 .global GLOBAL(movmemSI64)
732 FUNC(GLOBAL(movmemSI64))
736 .global GLOBAL(movmemSI60)
737 FUNC(GLOBAL(movmemSI60))
741 .global GLOBAL(movmemSI56)
742 FUNC(GLOBAL(movmemSI56))
746 .global GLOBAL(movmemSI52)
747 FUNC(GLOBAL(movmemSI52))
751 .global GLOBAL(movmemSI48)
752 FUNC(GLOBAL(movmemSI48))
756 .global GLOBAL(movmemSI44)
757 FUNC(GLOBAL(movmemSI44))
761 .global GLOBAL(movmemSI40)
762 FUNC(GLOBAL(movmemSI40))
766 .global GLOBAL(movmemSI36)
767 FUNC(GLOBAL(movmemSI36))
771 .global GLOBAL(movmemSI32)
772 FUNC(GLOBAL(movmemSI32))
776 .global GLOBAL(movmemSI28)
777 FUNC(GLOBAL(movmemSI28))
781 .global GLOBAL(movmemSI24)
782 FUNC(GLOBAL(movmemSI24))
786 .global GLOBAL(movmemSI20)
787 FUNC(GLOBAL(movmemSI20))
791 .global GLOBAL(movmemSI16)
792 FUNC(GLOBAL(movmemSI16))
796 .global GLOBAL(movmemSI12)
797 FUNC(GLOBAL(movmemSI12))
801 .global GLOBAL(movmemSI8)
802 FUNC(GLOBAL(movmemSI8))
806 .global GLOBAL(movmemSI4)
807 FUNC(GLOBAL(movmemSI4))
811 .global GLOBAL(movmemSI0)
812 FUNC(GLOBAL(movmemSI0))
817 ENDFUNC(GLOBAL(movmemSI64))
818 ENDFUNC(GLOBAL(movmemSI60))
819 ENDFUNC(GLOBAL(movmemSI56))
820 ENDFUNC(GLOBAL(movmemSI52))
821 ENDFUNC(GLOBAL(movmemSI48))
822 ENDFUNC(GLOBAL(movmemSI44))
823 ENDFUNC(GLOBAL(movmemSI40))
824 ENDFUNC(GLOBAL(movmemSI36))
825 ENDFUNC(GLOBAL(movmemSI32))
826 ENDFUNC(GLOBAL(movmemSI28))
827 ENDFUNC(GLOBAL(movmemSI24))
828 ENDFUNC(GLOBAL(movmemSI20))
829 ENDFUNC(GLOBAL(movmemSI16))
830 ENDFUNC(GLOBAL(movmemSI12))
831 ENDFUNC(GLOBAL(movmemSI8))
832 ENDFUNC(GLOBAL(movmemSI4))
833 ENDFUNC(GLOBAL(movmemSI0))
837 .global GLOBAL(movmem)
901 .global GLOBAL(movmem_i4_even)
902 .global GLOBAL(movmem_i4_odd)
903 .global GLOBAL(movmemSI12_i4)
905 FUNC(GLOBAL(movmem_i4_even))
906 FUNC(GLOBAL(movmem_i4_odd))
907 FUNC(GLOBAL(movmemSI12_i4))
917 GLOBAL(movmem_i4_even):
919 bra L_movmem_start_even
922 GLOBAL(movmem_i4_odd):
934 bt/s L_movmem_2mod4_end
948 ENDFUNC(GLOBAL(movmem_i4_even))
949 ENDFUNC(GLOBAL(movmem_i4_odd))
952 GLOBAL(movmemSI12_i4):
961 ENDFUNC(GLOBAL(movmemSI12_i4))
967 .global GLOBAL(mulsi3)
972 ! r0 = aabb*ccdd via partial products
974 ! if aa == 0 and cc = 0
978 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
982 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
983 mov r5,r3 ! r3 = ccdd
984 swap.w r4,r2 ! r2 = bbaa
985 xtrct r2,r3 ! r3 = aacc
986 tst r3,r3 ! msws zero ?
988 rts ! yes - then we have the answer
991 hiset: sts macl,r0 ! r0 = bb*dd
992 mulu.w r2,r5 ! brewing macl = aa*dd
994 mulu.w r3,r4 ! brewing macl = cc*bb
1001 FUNC(GLOBAL(mulsi3))
1003 #endif /* ! __SH5__ */
1006 !! 4 byte integer Divide code for the Renesas SH
1008 !! args in r4 and r5, result in fpul, clobber dr0, dr2
1010 .global GLOBAL(sdivsi3_i4)
1011 FUNC(GLOBAL(sdivsi3_i4))
1021 ENDFUNC(GLOBAL(sdivsi3_i4))
1022 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1023 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1025 #if ! __SH5__ || __SH5__ == 32
1029 .global GLOBAL(sdivsi3_i4)
1030 FUNC(GLOBAL(sdivsi3_i4))
1045 ENDFUNC(GLOBAL(sdivsi3_i4))
1046 #endif /* ! __SH5__ || __SH5__ == 32 */
1047 #endif /* ! __SH4__ */
1051 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1053 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1055 !! Steve Chamberlain
1060 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1062 .global GLOBAL(sdivsi3)
1063 FUNC(GLOBAL(sdivsi3))
1066 .section .text..SHmedia32,"ax"
1072 /* The assembly code that follows is a hand-optimized version of the C
1073 code that follows. Note that the registers that are modified are
1074 exactly those listed as clobbered in the patterns divsi3_i1 and
1077 int __sdivsi3 (i, j)
1080 register unsigned long long r18 asm ("r18");
1081 register unsigned long long r19 asm ("r19");
1082 register unsigned long long r0 asm ("r0") = 0;
1083 register unsigned long long r1 asm ("r1") = 1;
1084 register int r2 asm ("r2") = i >> 31;
1085 register int r3 asm ("r3") = j >> 31;
1097 r0 |= r1, r18 -= r19;
1098 while (r19 >>= 1, r1 >>= 1);
1100 return r2 * (int)r0;
1104 pt/l LOCAL(sdivsi3_dontadd), tr2
1105 pt/l LOCAL(sdivsi3_loop), tr1
1118 LOCAL(sdivsi3_loop):
1122 LOCAL(sdivsi3_dontadd):
1131 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1134 // can create absolute value without extra latency,
1135 // but dependent on proper sign extension of inputs:
1138 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1141 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1142 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1150 // If r4 was to be used in-place instead of r21, could use this sequence
1151 // to compute absolute:
1152 // sub r63,r4,r19 // compute absolute value of r4
1153 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1154 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1166 mmacnfx.wl r25,r2,r1
1192 #elif defined __SHMEDIA__
1193 /* m5compact-nofpu */
1194 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1196 .section .text..SHmedia32,"ax"
1199 pt/l LOCAL(sdivsi3_dontsub), tr0
1200 pt/l LOCAL(sdivsi3_loop), tr1
1212 LOCAL(sdivsi3_loop):
1216 LOCAL(sdivsi3_dontsub):
1222 #else /* ! __SHMEDIA__ */
1307 ENDFUNC(GLOBAL(sdivsi3))
1308 #endif /* ! __SHMEDIA__ */
1309 #endif /* ! __SH4__ */
1314 !! 4 byte integer Divide code for the Renesas SH
1316 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1319 .global GLOBAL(udivsi3_i4)
1320 FUNC(GLOBAL(udivsi3_i4))
1332 #ifdef __LITTLE_ENDIAN__
1356 .align 3 ! make double below 8 byte aligned.
1361 ENDFUNC(GLOBAL(udivsi3_i4))
1362 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1363 #if ! __SH5__ || __SH5__ == 32
1364 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1366 .global GLOBAL(udivsi3_i4)
1367 FUNC(GLOBAL(udivsi3_i4))
1381 ENDFUNC(GLOBAL(udivsi3_i4))
1382 #endif /* ! __SH5__ || __SH5__ == 32 */
1383 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1384 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1386 .global GLOBAL(udivsi3_i4)
1400 #ifdef __LITTLE_ENDIAN__
1420 .align 3 ! make double below 8 byte aligned.
1435 ENDFUNC(GLOBAL(udivsi3_i4))
1436 #endif /* ! __SH4__ */
1440 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1442 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1444 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1445 .global GLOBAL(udivsi3)
1446 FUNC(GLOBAL(udivsi3))
1450 .section .text..SHmedia32,"ax"
1456 /* The assembly code that follows is a hand-optimized version of the C
1457 code that follows. Note that the registers that are modified are
1458 exactly those listed as clobbered in the patterns udivsi3_i1 and
1465 register unsigned long long r0 asm ("r0") = 0;
1466 register unsigned long long r18 asm ("r18") = 1;
1467 register unsigned long long r4 asm ("r4") = i;
1468 register unsigned long long r19 asm ("r19") = j;
1474 r0 |= r18, r4 -= r19;
1475 while (r19 >>= 1, r18 >>= 1);
1481 pt/l LOCAL(udivsi3_dontadd), tr2
1482 pt/l LOCAL(udivsi3_loop), tr1
1490 LOCAL(udivsi3_loop):
1494 LOCAL(udivsi3_dontadd):
1502 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1508 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1510 mmulfx.w r21,r21,r19
1511 mshflo.w r21,r63,r21
1513 mmulfx.w r25,r19,r19
1517 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1518 before the msub.w, but we need a different value for
1519 r19 to keep errors under control. */
1521 mmulfx.w r19,r19,r19
1525 mmacnfx.wl r25,r19,r21
1550 #elif defined (__SHMEDIA__)
1551 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1552 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1553 So use a short shmedia loop. */
1554 // clobbered: r20,r21,r25,tr0,tr1,tr2
1556 .section .text..SHmedia32,"ax"
1559 pt/l LOCAL(udivsi3_dontsub), tr0
1560 pt/l LOCAL(udivsi3_loop), tr1
1565 LOCAL(udivsi3_loop):
1569 LOCAL(udivsi3_dontsub):
1574 #else /* ! defined (__SHMEDIA__) */
1578 div1 r5,r4; div1 r5,r4; div1 r5,r4
1579 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1582 div1 r5,r4; rotcl r0
1583 div1 r5,r4; rotcl r0
1584 div1 r5,r4; rotcl r0
1592 bf LOCAL(large_divisor)
1594 bf/s LOCAL(large_divisor)
1616 LOCAL(large_divisor):
1635 ENDFUNC(GLOBAL(udivsi3))
1636 #endif /* ! __SHMEDIA__ */
1637 #endif /* __SH4__ */
1638 #endif /* L_udivsi3 */
1643 .section .text..SHmedia32,"ax"
1645 .global GLOBAL(udivdi3)
1646 FUNC(GLOBAL(udivdi3))
1652 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1656 sub r63,r22,r20 // r63 == 64 % 64
1658 pta LOCAL(large_divisor),tr0
1664 bgt/u r9,r63,tr0 // large_divisor
1673 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1674 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1675 the case may be, %0000000000000000 000.11111111111, still */
1676 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1681 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1683 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1692 mcmpgt.l r21,r63,r21 // See Note 1
1694 mshfhi.l r63,r21,r21
1698 /* small divisor: need a third divide step */
1708 /* could test r3 here to check for divide by zero. */
1711 LOCAL(large_divisor):
1720 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1721 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1722 the case may be, %0000000000000000 000.11111111111, still */
1723 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1728 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1730 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1734 pta LOCAL(no_lo_adj),tr0
1741 bgtu/u r7,r25,tr0 // no_lo_adj
1747 /* large_divisor: only needs a few adjustments. */
1754 ENDFUNC(GLOBAL(udivdi3))
1755 /* Note 1: To shift the result of the second divide stage so that the result
1756 always fits into 32 bits, yet we still reduce the rest sufficiently
1757 would require a lot of instructions to do the shifts just right. Using
1758 the full 64 bit shift result to multiply with the divisor would require
1759 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1760 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1761 know that the rest after taking this partial result into account will
1762 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1763 upper 32 bits of the partial result are nonzero. */
1764 #endif /* __SHMEDIA__ */
1765 #endif /* L_udivdi3 */
1770 .section .text..SHmedia32,"ax"
1772 .global GLOBAL(divdi3)
1773 FUNC(GLOBAL(divdi3))
1775 pta GLOBAL(udivdi3),tr0
1787 ENDFUNC(GLOBAL(divdi3))
1788 #endif /* __SHMEDIA__ */
1789 #endif /* L_divdi3 */
1794 .section .text..SHmedia32,"ax"
1796 .global GLOBAL(umoddi3)
1797 FUNC(GLOBAL(umoddi3))
1803 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1807 sub r63,r22,r20 // r63 == 64 % 64
1809 pta LOCAL(large_divisor),tr0
1815 bgt/u r9,r63,tr0 // large_divisor
1824 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1825 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1826 the case may be, %0000000000000000 000.11111111111, still */
1827 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1832 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1834 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1839 /* bubble */ /* could test r3 here to check for divide by zero. */
1842 mcmpgt.l r21,r63,r21 // See Note 1
1844 mshfhi.l r63,r21,r21
1848 /* small divisor: need a third divide step */
1851 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1861 LOCAL(large_divisor):
1870 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1871 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1872 the case may be, %0000000000000000 000.11111111111, still */
1873 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1878 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1880 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1884 pta LOCAL(no_lo_adj),tr0
1891 bgtu/u r7,r25,tr0 // no_lo_adj
1897 /* large_divisor: only needs a few adjustments. */
1906 ENDFUNC(GLOBAL(umoddi3))
1907 /* Note 1: To shift the result of the second divide stage so that the result
1908 always fits into 32 bits, yet we still reduce the rest sufficiently
1909 would require a lot of instructions to do the shifts just right. Using
1910 the full 64 bit shift result to multiply with the divisor would require
1911 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1912 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1913 know that the rest after taking this partial result into account will
1914 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1915 upper 32 bits of the partial result are nonzero. */
1916 #endif /* __SHMEDIA__ */
1917 #endif /* L_umoddi3 */
1922 .section .text..SHmedia32,"ax"
1924 .global GLOBAL(moddi3)
1925 FUNC(GLOBAL(moddi3))
1927 pta GLOBAL(umoddi3),tr0
1939 ENDFUNC(GLOBAL(moddi3))
1940 #endif /* __SHMEDIA__ */
1941 #endif /* L_moddi3 */
1944 #if !defined (__SH2A_NOFPU__)
1945 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1949 .global GLOBAL(set_fpscr)
1950 FUNC(GLOBAL(set_fpscr))
1955 mova LOCAL(set_fpscr_L0),r0
1956 mov.l LOCAL(set_fpscr_L0),r12
1958 mov.l LOCAL(set_fpscr_L1),r0
1962 mov.l LOCAL(set_fpscr_L1),r1
1969 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1972 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1981 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1985 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1992 LOCAL(set_fpscr_L0):
1993 .long _GLOBAL_OFFSET_TABLE_
1994 LOCAL(set_fpscr_L1):
1995 .long GLOBAL(fpscr_values@GOT)
1997 LOCAL(set_fpscr_L1):
1998 .long GLOBAL(fpscr_values)
2001 ENDFUNC(GLOBAL(set_fpscr))
2002 #ifndef NO_FPSCR_VALUES
2004 .comm GLOBAL(fpscr_values),8,4
2006 .comm GLOBAL(fpscr_values),8
2008 #endif /* NO_FPSCR_VALUES */
2009 #endif /* SH2E / SH3E / SH4 */
2010 #endif /* __SH2A_NOFPU__ */
2011 #endif /* L_set_fpscr */
2012 #ifdef L_ic_invalidate
2015 .section .text..SHmedia32,"ax"
2017 .global GLOBAL(init_trampoline)
2018 FUNC(GLOBAL(init_trampoline))
2019 GLOBAL(init_trampoline):
2021 #ifdef __LITTLE_ENDIAN__
2027 movi 0xffffffffffffd002,r20
2034 .global GLOBAL(ic_invalidate)
2035 FUNC(GLOBAL(ic_invalidate))
2036 GLOBAL(ic_invalidate):
2044 ENDFUNC(GLOBAL(ic_invalidate))
2045 ENDFUNC(GLOBAL(init_trampoline))
2046 #elif defined(__SH4A__)
2047 .global GLOBAL(ic_invalidate)
2048 FUNC(GLOBAL(ic_invalidate))
2049 GLOBAL(ic_invalidate):
2054 ENDFUNC(GLOBAL(ic_invalidate))
2055 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
2056 /* This assumes a direct-mapped cache, which is the case for
2057 the first SH4, but not for the second version of SH4, that
2058 uses a 2-way set-associative cache, nor SH4a, that is 4-way.
2059 SH4a fortunately offers an instruction to invalidate the
2060 instruction cache, and we use it above, but SH4 doesn't.
2061 However, since the libraries don't contain any nested
2062 functions (the only case in which GCC would emit this pattern)
2063 and we actually emit the ic_invalidate_line_i pattern for
2064 cache invalidation on all SH4 multilibs (even 4-nofpu, that
2065 isn't even corevered here), and pre-SH4 cores don't have
2066 caches, it seems like this code is pointless, unless it's
2067 meant for backward binary compatibility or for userland-only
2068 cache invalidation for say sh4-*-linux-gnu. Such a feature
2069 should probably be moved into a system call, such that the
2070 kernel could do whatever it takes to invalidate a cache line
2071 on the core it's actually running on. I.e., this hideous :-)
2072 piece of code should go away at some point. */
2074 .global GLOBAL(ic_invalidate)
2075 FUNC(GLOBAL(ic_invalidate))
2076 GLOBAL(ic_invalidate):
2080 /* Compute how many cache lines 0f is away from r4. */
2083 /* Prepare to branch to 0f plus the cache-line offset. */
2090 /* This must be aligned to the beginning of a cache line. */
2092 .rept 256 /* There are 256 cache lines of 32 bytes. */
2099 ENDFUNC(GLOBAL(ic_invalidate))
2101 #endif /* L_ic_invalidate */
2103 #if defined (__SH5__) && __SH5__ == 32
2104 #ifdef L_shcompact_call_trampoline
2107 LOCAL(ct_main_table):
2108 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2109 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2110 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2111 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2112 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2113 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2114 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2115 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2116 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2117 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2118 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2119 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2120 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2121 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2122 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2123 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2124 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2125 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2126 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2127 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2128 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2129 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2130 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2131 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2132 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2133 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2134 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2135 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2136 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2137 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2138 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2139 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2140 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2142 .section .text..SHmedia32, "ax"
2145 /* This function loads 64-bit general-purpose registers from the
2146 stack, from a memory address contained in them or from an FP
2147 register, according to a cookie passed in r1. Its execution
2148 time is linear on the number of registers that actually have
2149 to be copied. See sh.h for details on the actual bit pattern.
2151 The function to be called is passed in r0. If a 32-bit return
2152 value is expected, the actual function will be tail-called,
2153 otherwise the return address will be stored in r10 (that the
2154 caller should expect to be clobbered) and the return value
2155 will be expanded into r2/r3 upon return. */
2157 .global GLOBAL(GCC_shcompact_call_trampoline)
2158 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2159 GLOBAL(GCC_shcompact_call_trampoline):
2160 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2161 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2162 pt/l LOCAL(ct_loop), tr1
2164 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2169 LOCAL(ct_main_label):
2172 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2173 /* It must be dr0, so just do it. */
2179 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2180 /* It is either dr0 or dr2. */
2189 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2190 shlri r1, 23 - 3, r34
2191 andi r34, 3 << 3, r33
2192 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2193 LOCAL(ct_r4_fp_base):
2199 LOCAL(ct_r4_fp_copy):
2206 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2207 shlri r1, 20 - 3, r34
2208 andi r34, 3 << 3, r33
2209 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2210 LOCAL(ct_r5_fp_base):
2216 LOCAL(ct_r5_fp_copy):
2225 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2226 /* It must be dr8. */
2232 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2233 shlri r1, 16 - 3, r34
2234 andi r34, 3 << 3, r33
2235 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2236 LOCAL(ct_r6_fp_base):
2242 LOCAL(ct_r6_fp_copy):
2251 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2252 /* It is either dr8 or dr10. */
2260 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2261 shlri r1, 12 - 3, r34
2262 andi r34, 3 << 3, r33
2263 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2264 LOCAL(ct_r7_fp_base):
2269 LOCAL(ct_r7_fp_copy):
2278 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2279 /* It is either dr8 or dr10. */
2281 andi r1, 1 << 8, r32
2287 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2288 shlri r1, 8 - 3, r34
2289 andi r34, 3 << 3, r33
2290 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2291 LOCAL(ct_r8_fp_base):
2296 LOCAL(ct_r8_fp_copy):
2305 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2306 /* It is either dr8 or dr10. */
2308 andi r1, 1 << 4, r32
2314 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2315 shlri r1, 4 - 3, r34
2316 andi r34, 3 << 3, r33
2317 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2318 LOCAL(ct_r9_fp_base):
2323 LOCAL(ct_r9_fp_copy):
2332 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2333 pt/l LOCAL(ct_r2_load), tr2
2342 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2343 pt/l LOCAL(ct_r3_load), tr2
2351 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2352 pt/l LOCAL(ct_r4_load), tr2
2360 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2361 pt/l LOCAL(ct_r5_load), tr2
2369 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2370 pt/l LOCAL(ct_r6_load), tr2
2377 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2378 pt/l LOCAL(ct_r7_load), tr2
2385 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2386 pt/l LOCAL(ct_r8_load), tr2
2393 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2394 pt/l LOCAL(ct_check_tramp), tr2
2418 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2425 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2432 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2439 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2446 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2453 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2459 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2465 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2466 andi r1, 7 << 1, r30
2467 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2469 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2473 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2486 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2489 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2490 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2491 pt/u LOCAL(ct_ret_wide), tr2
2494 LOCAL(ct_call_func): /* Just branch to the function. */
2496 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2497 64-bit return value. */
2501 #if __LITTLE_ENDIAN__
2510 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2511 #endif /* L_shcompact_call_trampoline */
2513 #ifdef L_shcompact_return_trampoline
2514 /* This function does the converse of the code in `ret_wide'
2515 above. It is tail-called by SHcompact functions returning
2516 64-bit non-floating-point values, to pack the 32-bit values in
2517 r2 and r3 into r2. */
2520 .section .text..SHmedia32, "ax"
2522 .global GLOBAL(GCC_shcompact_return_trampoline)
2523 FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2524 GLOBAL(GCC_shcompact_return_trampoline):
2526 #if __LITTLE_ENDIAN__
2536 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2537 #endif /* L_shcompact_return_trampoline */
2539 #ifdef L_shcompact_incoming_args
2542 LOCAL(ia_main_table):
2543 .word 1 /* Invalid, just loop */
2544 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2545 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2546 .word 1 /* Invalid, just loop */
2547 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2548 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2549 .word 1 /* Invalid, just loop */
2550 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2551 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2552 .word 1 /* Invalid, just loop */
2553 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2554 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2555 .word 1 /* Invalid, just loop */
2556 .word 1 /* Invalid, just loop */
2557 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2558 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2559 .word 1 /* Invalid, just loop */
2560 .word 1 /* Invalid, just loop */
2561 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2562 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2563 .word 1 /* Invalid, just loop */
2564 .word 1 /* Invalid, just loop */
2565 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2566 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2567 .word 1 /* Invalid, just loop */
2568 .word 1 /* Invalid, just loop */
2569 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2570 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2571 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2572 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2573 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2574 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2575 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2577 .section .text..SHmedia32, "ax"
2580 /* This function stores 64-bit general-purpose registers back in
2581 the stack, and loads the address in which each register
2582 was stored into itself. The lower 32 bits of r17 hold the address
2583 to begin storing, and the upper 32 bits of r17 hold the cookie.
2584 Its execution time is linear on the
2585 number of registers that actually have to be copied, and it is
2586 optimized for structures larger than 64 bits, as opposed to
2587 individual `long long' arguments. See sh.h for details on the
2588 actual bit pattern. */
2590 .global GLOBAL(GCC_shcompact_incoming_args)
2591 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2592 GLOBAL(GCC_shcompact_incoming_args):
2593 ptabs/l r18, tr0 /* Prepare to return. */
2594 shlri r17, 32, r0 /* Load the cookie. */
2595 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2596 pt/l LOCAL(ia_loop), tr1
2598 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2603 LOCAL(ia_main_label):
2606 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2615 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2624 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2633 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2642 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2651 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2659 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2667 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2671 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2678 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2685 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2692 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2699 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2706 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2712 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2718 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2719 andi r0, 7 << 1, r38
2720 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2722 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2726 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2739 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2741 LOCAL(ia_return): /* Return. */
2743 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2744 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2745 #endif /* L_shcompact_incoming_args */
2748 #ifdef L_nested_trampoline
2750 .section .text..SHmedia32,"ax"
2754 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2755 .global GLOBAL(GCC_nested_trampoline)
2756 FUNC(GLOBAL(GCC_nested_trampoline))
2757 GLOBAL(GCC_nested_trampoline):
2774 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2775 #endif /* L_nested_trampoline */
2776 #endif /* __SH5__ */
2778 #ifdef L_push_pop_shmedia_regs
2779 .section .text..SHmedia32,"ax"
2782 #ifndef __SH4_NOFPU__
2783 .global GLOBAL(GCC_push_shmedia_regs)
2784 FUNC(GLOBAL(GCC_push_shmedia_regs))
2785 GLOBAL(GCC_push_shmedia_regs):
2786 addi.l r15, -14*8, r15
2787 fst.d r15, 13*8, dr62
2788 fst.d r15, 12*8, dr60
2789 fst.d r15, 11*8, dr58
2790 fst.d r15, 10*8, dr56
2791 fst.d r15, 9*8, dr54
2792 fst.d r15, 8*8, dr52
2793 fst.d r15, 7*8, dr50
2794 fst.d r15, 6*8, dr48
2795 fst.d r15, 5*8, dr46
2796 fst.d r15, 4*8, dr44
2797 fst.d r15, 3*8, dr42
2798 fst.d r15, 2*8, dr40
2799 fst.d r15, 1*8, dr38
2800 fst.d r15, 0*8, dr36
2802 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2803 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2804 GLOBAL(GCC_push_shmedia_regs_nofpu):
2806 addi.l r15, -27*8, r15
2839 #ifndef __SH4_NOFPU__
2840 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2842 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2843 #ifndef __SH4_NOFPU__
2844 .global GLOBAL(GCC_pop_shmedia_regs)
2845 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2846 GLOBAL(GCC_pop_shmedia_regs):
2849 fld.d r15, 40*8, dr62
2850 fld.d r15, 39*8, dr60
2851 fld.d r15, 38*8, dr58
2852 fld.d r15, 37*8, dr56
2853 fld.d r15, 36*8, dr54
2854 fld.d r15, 35*8, dr52
2855 fld.d r15, 34*8, dr50
2856 fld.d r15, 33*8, dr48
2857 fld.d r15, 32*8, dr46
2858 fld.d r15, 31*8, dr44
2859 fld.d r15, 30*8, dr42
2860 fld.d r15, 29*8, dr40
2861 fld.d r15, 28*8, dr38
2862 fld.d r15, 27*8, dr36
2865 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2866 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2867 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2904 #ifndef __SH4_NOFPU__
2905 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
2907 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2908 #endif /* __SH5__ == 32 */
2909 #endif /* L_push_pop_shmedia_regs */