1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3 Free Software Foundation, Inc.
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
25 !! libgcc routines for the Renesas / SuperH SH CPUs.
26 !! Contributed by Steve Chamberlain.
29 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
30 !! recoded in assembly by Toshiyasu Morita
33 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
34 ELF local label prefixes by J"orn Rennecke
37 #include "lib1funcs.h"
39 /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
40 so it is more convenient to define NO_FPSCR_VALUES here than to
41 define it on the command line. */
42 #if defined __vxworks && defined __PIC__
43 #define NO_FPSCR_VALUES
48 .global GLOBAL(ashiftrt_r4_0)
49 .global GLOBAL(ashiftrt_r4_1)
50 .global GLOBAL(ashiftrt_r4_2)
51 .global GLOBAL(ashiftrt_r4_3)
52 .global GLOBAL(ashiftrt_r4_4)
53 .global GLOBAL(ashiftrt_r4_5)
54 .global GLOBAL(ashiftrt_r4_6)
55 .global GLOBAL(ashiftrt_r4_7)
56 .global GLOBAL(ashiftrt_r4_8)
57 .global GLOBAL(ashiftrt_r4_9)
58 .global GLOBAL(ashiftrt_r4_10)
59 .global GLOBAL(ashiftrt_r4_11)
60 .global GLOBAL(ashiftrt_r4_12)
61 .global GLOBAL(ashiftrt_r4_13)
62 .global GLOBAL(ashiftrt_r4_14)
63 .global GLOBAL(ashiftrt_r4_15)
64 .global GLOBAL(ashiftrt_r4_16)
65 .global GLOBAL(ashiftrt_r4_17)
66 .global GLOBAL(ashiftrt_r4_18)
67 .global GLOBAL(ashiftrt_r4_19)
68 .global GLOBAL(ashiftrt_r4_20)
69 .global GLOBAL(ashiftrt_r4_21)
70 .global GLOBAL(ashiftrt_r4_22)
71 .global GLOBAL(ashiftrt_r4_23)
72 .global GLOBAL(ashiftrt_r4_24)
73 .global GLOBAL(ashiftrt_r4_25)
74 .global GLOBAL(ashiftrt_r4_26)
75 .global GLOBAL(ashiftrt_r4_27)
76 .global GLOBAL(ashiftrt_r4_28)
77 .global GLOBAL(ashiftrt_r4_29)
78 .global GLOBAL(ashiftrt_r4_30)
79 .global GLOBAL(ashiftrt_r4_31)
80 .global GLOBAL(ashiftrt_r4_32)
82 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
83 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
117 GLOBAL(ashiftrt_r4_32):
118 GLOBAL(ashiftrt_r4_31):
123 GLOBAL(ashiftrt_r4_30):
125 GLOBAL(ashiftrt_r4_29):
127 GLOBAL(ashiftrt_r4_28):
129 GLOBAL(ashiftrt_r4_27):
131 GLOBAL(ashiftrt_r4_26):
133 GLOBAL(ashiftrt_r4_25):
135 GLOBAL(ashiftrt_r4_24):
141 GLOBAL(ashiftrt_r4_23):
143 GLOBAL(ashiftrt_r4_22):
145 GLOBAL(ashiftrt_r4_21):
147 GLOBAL(ashiftrt_r4_20):
149 GLOBAL(ashiftrt_r4_19):
151 GLOBAL(ashiftrt_r4_18):
153 GLOBAL(ashiftrt_r4_17):
155 GLOBAL(ashiftrt_r4_16):
160 GLOBAL(ashiftrt_r4_15):
162 GLOBAL(ashiftrt_r4_14):
164 GLOBAL(ashiftrt_r4_13):
166 GLOBAL(ashiftrt_r4_12):
168 GLOBAL(ashiftrt_r4_11):
170 GLOBAL(ashiftrt_r4_10):
172 GLOBAL(ashiftrt_r4_9):
174 GLOBAL(ashiftrt_r4_8):
176 GLOBAL(ashiftrt_r4_7):
178 GLOBAL(ashiftrt_r4_6):
180 GLOBAL(ashiftrt_r4_5):
182 GLOBAL(ashiftrt_r4_4):
184 GLOBAL(ashiftrt_r4_3):
186 GLOBAL(ashiftrt_r4_2):
188 GLOBAL(ashiftrt_r4_1):
192 GLOBAL(ashiftrt_r4_0):
196 ENDFUNC(GLOBAL(ashiftrt_r4_0))
197 ENDFUNC(GLOBAL(ashiftrt_r4_1))
198 ENDFUNC(GLOBAL(ashiftrt_r4_2))
199 ENDFUNC(GLOBAL(ashiftrt_r4_3))
200 ENDFUNC(GLOBAL(ashiftrt_r4_4))
201 ENDFUNC(GLOBAL(ashiftrt_r4_5))
202 ENDFUNC(GLOBAL(ashiftrt_r4_6))
203 ENDFUNC(GLOBAL(ashiftrt_r4_7))
204 ENDFUNC(GLOBAL(ashiftrt_r4_8))
205 ENDFUNC(GLOBAL(ashiftrt_r4_9))
206 ENDFUNC(GLOBAL(ashiftrt_r4_10))
207 ENDFUNC(GLOBAL(ashiftrt_r4_11))
208 ENDFUNC(GLOBAL(ashiftrt_r4_12))
209 ENDFUNC(GLOBAL(ashiftrt_r4_13))
210 ENDFUNC(GLOBAL(ashiftrt_r4_14))
211 ENDFUNC(GLOBAL(ashiftrt_r4_15))
212 ENDFUNC(GLOBAL(ashiftrt_r4_16))
213 ENDFUNC(GLOBAL(ashiftrt_r4_17))
214 ENDFUNC(GLOBAL(ashiftrt_r4_18))
215 ENDFUNC(GLOBAL(ashiftrt_r4_19))
216 ENDFUNC(GLOBAL(ashiftrt_r4_20))
217 ENDFUNC(GLOBAL(ashiftrt_r4_21))
218 ENDFUNC(GLOBAL(ashiftrt_r4_22))
219 ENDFUNC(GLOBAL(ashiftrt_r4_23))
220 ENDFUNC(GLOBAL(ashiftrt_r4_24))
221 ENDFUNC(GLOBAL(ashiftrt_r4_25))
222 ENDFUNC(GLOBAL(ashiftrt_r4_26))
223 ENDFUNC(GLOBAL(ashiftrt_r4_27))
224 ENDFUNC(GLOBAL(ashiftrt_r4_28))
225 ENDFUNC(GLOBAL(ashiftrt_r4_29))
226 ENDFUNC(GLOBAL(ashiftrt_r4_30))
227 ENDFUNC(GLOBAL(ashiftrt_r4_31))
228 ENDFUNC(GLOBAL(ashiftrt_r4_32))
250 .global GLOBAL(ashrsi3)
251 HIDDEN_FUNC(GLOBAL(ashrsi3))
256 mova LOCAL(ashrsi3_table),r0
267 LOCAL(ashrsi3_table):
268 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
269 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
270 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
271 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
272 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
273 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
379 ENDFUNC(GLOBAL(ashrsi3))
400 .global GLOBAL(ashlsi3)
401 HIDDEN_FUNC(GLOBAL(ashlsi3))
406 mova LOCAL(ashlsi3_table),r0
417 LOCAL(ashlsi3_table):
418 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
419 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
420 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
421 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
422 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
423 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
424 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
425 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
426 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
427 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
428 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
429 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
430 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
431 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
432 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
433 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
434 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
435 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
436 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
538 ENDFUNC(GLOBAL(ashlsi3))
559 .global GLOBAL(lshrsi3)
560 HIDDEN_FUNC(GLOBAL(lshrsi3))
565 mova LOCAL(lshrsi3_table),r0
576 LOCAL(lshrsi3_table):
577 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
578 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
579 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
580 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
581 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
582 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
583 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
584 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
585 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
586 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
587 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
588 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
589 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
590 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
591 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
592 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
593 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
594 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
595 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
697 ENDFUNC(GLOBAL(lshrsi3))
703 .global GLOBAL(movmem)
704 HIDDEN_FUNC(GLOBAL(movmem))
705 HIDDEN_ALIAS(movstr,movmem)
706 /* This would be a lot simpler if r6 contained the byte count
707 minus 64, and we wouldn't be called here for a byte count of 64. */
711 bsr GLOBAL(movmemSI52+2)
714 LOCAL(movmem_loop): /* Reached with rts */
720 bt LOCAL(movmem_done)
727 bt GLOBAL(movmemSI52)
728 ! done all the large groups, do the remainder
730 mova GLOBAL(movmemSI4)+4,r0
733 LOCAL(movmem_done): ! share slot insn, works out aligned.
740 ! ??? We need aliases movstr* for movmem* for the older libraries. These
741 ! aliases will be removed at the some point in the future.
742 .global GLOBAL(movmemSI64)
743 HIDDEN_FUNC(GLOBAL(movmemSI64))
744 HIDDEN_ALIAS(movstrSI64,movmemSI64)
748 .global GLOBAL(movmemSI60)
749 HIDDEN_FUNC(GLOBAL(movmemSI60))
750 HIDDEN_ALIAS(movstrSI60,movmemSI60)
754 .global GLOBAL(movmemSI56)
755 HIDDEN_FUNC(GLOBAL(movmemSI56))
756 HIDDEN_ALIAS(movstrSI56,movmemSI56)
760 .global GLOBAL(movmemSI52)
761 HIDDEN_FUNC(GLOBAL(movmemSI52))
762 HIDDEN_ALIAS(movstrSI52,movmemSI52)
766 .global GLOBAL(movmemSI48)
767 HIDDEN_FUNC(GLOBAL(movmemSI48))
768 HIDDEN_ALIAS(movstrSI48,movmemSI48)
772 .global GLOBAL(movmemSI44)
773 HIDDEN_FUNC(GLOBAL(movmemSI44))
774 HIDDEN_ALIAS(movstrSI44,movmemSI44)
778 .global GLOBAL(movmemSI40)
779 HIDDEN_FUNC(GLOBAL(movmemSI40))
780 HIDDEN_ALIAS(movstrSI40,movmemSI40)
784 .global GLOBAL(movmemSI36)
785 HIDDEN_FUNC(GLOBAL(movmemSI36))
786 HIDDEN_ALIAS(movstrSI36,movmemSI36)
790 .global GLOBAL(movmemSI32)
791 HIDDEN_FUNC(GLOBAL(movmemSI32))
792 HIDDEN_ALIAS(movstrSI32,movmemSI32)
796 .global GLOBAL(movmemSI28)
797 HIDDEN_FUNC(GLOBAL(movmemSI28))
798 HIDDEN_ALIAS(movstrSI28,movmemSI28)
802 .global GLOBAL(movmemSI24)
803 HIDDEN_FUNC(GLOBAL(movmemSI24))
804 HIDDEN_ALIAS(movstrSI24,movmemSI24)
808 .global GLOBAL(movmemSI20)
809 HIDDEN_FUNC(GLOBAL(movmemSI20))
810 HIDDEN_ALIAS(movstrSI20,movmemSI20)
814 .global GLOBAL(movmemSI16)
815 HIDDEN_FUNC(GLOBAL(movmemSI16))
816 HIDDEN_ALIAS(movstrSI16,movmemSI16)
820 .global GLOBAL(movmemSI12)
821 HIDDEN_FUNC(GLOBAL(movmemSI12))
822 HIDDEN_ALIAS(movstrSI12,movmemSI12)
826 .global GLOBAL(movmemSI8)
827 HIDDEN_FUNC(GLOBAL(movmemSI8))
828 HIDDEN_ALIAS(movstrSI8,movmemSI8)
832 .global GLOBAL(movmemSI4)
833 HIDDEN_FUNC(GLOBAL(movmemSI4))
834 HIDDEN_ALIAS(movstrSI4,movmemSI4)
840 ENDFUNC(GLOBAL(movmemSI64))
841 ENDFUNC(GLOBAL(movmemSI60))
842 ENDFUNC(GLOBAL(movmemSI56))
843 ENDFUNC(GLOBAL(movmemSI52))
844 ENDFUNC(GLOBAL(movmemSI48))
845 ENDFUNC(GLOBAL(movmemSI44))
846 ENDFUNC(GLOBAL(movmemSI40))
847 ENDFUNC(GLOBAL(movmemSI36))
848 ENDFUNC(GLOBAL(movmemSI32))
849 ENDFUNC(GLOBAL(movmemSI28))
850 ENDFUNC(GLOBAL(movmemSI24))
851 ENDFUNC(GLOBAL(movmemSI20))
852 ENDFUNC(GLOBAL(movmemSI16))
853 ENDFUNC(GLOBAL(movmemSI12))
854 ENDFUNC(GLOBAL(movmemSI8))
855 ENDFUNC(GLOBAL(movmemSI4))
856 ENDFUNC(GLOBAL(movmem))
861 .global GLOBAL(movmem_i4_even)
862 .global GLOBAL(movmem_i4_odd)
863 .global GLOBAL(movmemSI12_i4)
865 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
866 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
867 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
869 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
870 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
871 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
881 GLOBAL(movmem_i4_even):
883 bra L_movmem_start_even
886 GLOBAL(movmem_i4_odd):
898 bt/s L_movmem_2mod4_end
912 ENDFUNC(GLOBAL(movmem_i4_even))
913 ENDFUNC(GLOBAL(movmem_i4_odd))
916 GLOBAL(movmemSI12_i4):
925 ENDFUNC(GLOBAL(movmemSI12_i4))
931 .global GLOBAL(mulsi3)
932 HIDDEN_FUNC(GLOBAL(mulsi3))
936 ! r0 = aabb*ccdd via partial products
938 ! if aa == 0 and cc = 0
942 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
946 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
947 mov r5,r3 ! r3 = ccdd
948 swap.w r4,r2 ! r2 = bbaa
949 xtrct r2,r3 ! r3 = aacc
950 tst r3,r3 ! msws zero ?
952 rts ! yes - then we have the answer
955 hiset: sts macl,r0 ! r0 = bb*dd
956 mulu.w r2,r5 ! brewing macl = aa*dd
958 mulu.w r3,r4 ! brewing macl = cc*bb
965 ENDFUNC(GLOBAL(mulsi3))
967 #endif /* ! __SH5__ */
970 !! 4 byte integer Divide code for the Renesas SH
972 !! args in r4 and r5, result in fpul, clobber dr0, dr2
974 .global GLOBAL(sdivsi3_i4)
975 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
985 ENDFUNC(GLOBAL(sdivsi3_i4))
986 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
987 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
989 #if ! __SH5__ || __SH5__ == 32
993 .global GLOBAL(sdivsi3_i4)
994 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1009 ENDFUNC(GLOBAL(sdivsi3_i4))
1010 #endif /* ! __SH5__ || __SH5__ == 32 */
1011 #endif /* ! __SH4__ */
1015 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1017 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1019 !! Steve Chamberlain
1024 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1026 .global GLOBAL(sdivsi3)
1029 .section .text..SHmedia32,"ax"
1035 /* The assembly code that follows is a hand-optimized version of the C
1036 code that follows. Note that the registers that are modified are
1037 exactly those listed as clobbered in the patterns divsi3_i1 and
1040 int __sdivsi3 (i, j)
1043 register unsigned long long r18 asm ("r18");
1044 register unsigned long long r19 asm ("r19");
1045 register unsigned long long r0 asm ("r0") = 0;
1046 register unsigned long long r1 asm ("r1") = 1;
1047 register int r2 asm ("r2") = i >> 31;
1048 register int r3 asm ("r3") = j >> 31;
1060 r0 |= r1, r18 -= r19;
1061 while (r19 >>= 1, r1 >>= 1);
1063 return r2 * (int)r0;
1067 pt/l LOCAL(sdivsi3_dontadd), tr2
1068 pt/l LOCAL(sdivsi3_loop), tr1
1081 LOCAL(sdivsi3_loop):
1085 LOCAL(sdivsi3_dontadd):
1094 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1097 // can create absolute value without extra latency,
1098 // but dependent on proper sign extension of inputs:
1101 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1104 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1105 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1113 // If r4 was to be used in-place instead of r21, could use this sequence
1114 // to compute absolute:
1115 // sub r63,r4,r19 // compute absolute value of r4
1116 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1117 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1129 mmacnfx.wl r25,r2,r1
1154 #else /* ! 0 && ! 0 */
1157 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1159 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1161 FUNC(GLOBAL(sdivsi3))
1162 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1163 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1164 // with the SHcompact implementation, which clobbers tr1 / tr2.
1165 .global GLOBAL(sdivsi3_1)
1167 .global GLOBAL(div_table_internal)
1168 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1169 shori GLOBAL(div_table_internal) & 65535, r20
1171 .global GLOBAL(sdivsi3_2)
1173 // clobbered: r1,r18,r19,r21,r25,tr0
1176 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1177 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1178 ldx.ub r20, r21, r19 // u0.8
1179 shari r25, 32, r25 // normalize to s2.30
1181 muls.l r25, r19, r19 // s2.38
1182 ldx.w r20, r21, r21 // s2.14
1184 shari r19, 24, r19 // truncate to s2.14
1185 sub r21, r19, r19 // some 11 bit inverse in s1.14
1186 muls.l r19, r19, r21 // u0.28
1189 muls.l r25, r21, r18 // s2.58
1190 shlli r19, 45, r19 // multiply by two and convert to s2.58
1193 shari r18, 28, r18 // some 22 bit inverse in s1.30
1194 muls.l r18, r25, r0 // s2.60
1195 muls.l r18, r4, r25 // s32.30
1197 shari r0, 16, r19 // s-16.44
1198 muls.l r19, r18, r19 // s-16.74
1200 shari r4, 14, r18 // s19.-14
1201 shari r19, 30, r19 // s-16.44
1202 muls.l r19, r18, r19 // s15.30
1203 xor r21, r0, r21 // You could also use the constant 1 << 27.
1210 ENDFUNC(GLOBAL(sdivsi3))
1212 ENDFUNC(GLOBAL(sdivsi3_2))
1214 #elif defined __SHMEDIA__
1215 /* m5compact-nofpu */
1216 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1218 .section .text..SHmedia32,"ax"
1220 FUNC(GLOBAL(sdivsi3))
1222 pt/l LOCAL(sdivsi3_dontsub), tr0
1223 pt/l LOCAL(sdivsi3_loop), tr1
1235 LOCAL(sdivsi3_loop):
1239 LOCAL(sdivsi3_dontsub):
1245 ENDFUNC(GLOBAL(sdivsi3))
1246 #else /* ! __SHMEDIA__ */
1247 FUNC(GLOBAL(sdivsi3))
1332 ENDFUNC(GLOBAL(sdivsi3))
1333 #endif /* ! __SHMEDIA__ */
1334 #endif /* ! __SH4__ */
1339 !! 4 byte integer Divide code for the Renesas SH
1341 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1344 .global GLOBAL(udivsi3_i4)
1345 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1376 .align 3 ! make double below 8 byte aligned.
1381 ENDFUNC(GLOBAL(udivsi3_i4))
1382 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1383 #if ! __SH5__ || __SH5__ == 32
1384 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1386 .global GLOBAL(udivsi3_i4)
1387 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1401 ENDFUNC(GLOBAL(udivsi3_i4))
1402 #endif /* ! __SH5__ || __SH5__ == 32 */
1403 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1404 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1406 .global GLOBAL(udivsi3_i4)
1407 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1436 .align 3 ! make double below 8 byte aligned.
1451 ENDFUNC(GLOBAL(udivsi3_i4))
1452 #endif /* ! __SH4__ */
1456 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1458 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1460 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1461 .global GLOBAL(udivsi3)
1462 HIDDEN_FUNC(GLOBAL(udivsi3))
1466 .section .text..SHmedia32,"ax"
1472 /* The assembly code that follows is a hand-optimized version of the C
1473 code that follows. Note that the registers that are modified are
1474 exactly those listed as clobbered in the patterns udivsi3_i1 and
1481 register unsigned long long r0 asm ("r0") = 0;
1482 register unsigned long long r18 asm ("r18") = 1;
1483 register unsigned long long r4 asm ("r4") = i;
1484 register unsigned long long r19 asm ("r19") = j;
1490 r0 |= r18, r4 -= r19;
1491 while (r19 >>= 1, r18 >>= 1);
1497 pt/l LOCAL(udivsi3_dontadd), tr2
1498 pt/l LOCAL(udivsi3_loop), tr1
1506 LOCAL(udivsi3_loop):
1510 LOCAL(udivsi3_dontadd):
1518 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1524 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1526 mmulfx.w r21,r21,r19
1527 mshflo.w r21,r63,r21
1529 mmulfx.w r25,r19,r19
1533 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1534 before the msub.w, but we need a different value for
1535 r19 to keep errors under control. */
1537 mmulfx.w r19,r19,r19
1541 mmacnfx.wl r25,r19,r21
1566 #elif defined (__SHMEDIA__)
1567 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1568 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1569 So use a short shmedia loop. */
1570 // clobbered: r20,r21,r25,tr0,tr1,tr2
1572 .section .text..SHmedia32,"ax"
1575 pt/l LOCAL(udivsi3_dontsub), tr0
1576 pt/l LOCAL(udivsi3_loop), tr1
1581 LOCAL(udivsi3_loop):
1585 LOCAL(udivsi3_dontsub):
1590 #else /* ! defined (__SHMEDIA__) */
1594 div1 r5,r4; div1 r5,r4; div1 r5,r4
1595 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1598 div1 r5,r4; rotcl r0
1599 div1 r5,r4; rotcl r0
1600 div1 r5,r4; rotcl r0
1608 bf LOCAL(large_divisor)
1610 bf/s LOCAL(large_divisor)
1632 LOCAL(large_divisor):
1651 ENDFUNC(GLOBAL(udivsi3))
1652 #endif /* ! __SHMEDIA__ */
1653 #endif /* __SH4__ */
1654 #endif /* L_udivsi3 */
1659 .section .text..SHmedia32,"ax"
1661 .global GLOBAL(udivdi3)
1662 FUNC(GLOBAL(udivdi3))
1664 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1669 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1673 sub r63,r22,r20 // r63 == 64 % 64
1675 pta LOCAL(large_divisor),tr0
1681 bgt/u r9,r63,tr0 // large_divisor
1690 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1691 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1692 the case may be, %0000000000000000 000.11111111111, still */
1693 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1698 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1700 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1709 mcmpgt.l r21,r63,r21 // See Note 1
1711 mshfhi.l r63,r21,r21
1715 /* small divisor: need a third divide step */
1725 /* could test r3 here to check for divide by zero. */
1728 LOCAL(large_divisor):
1737 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1738 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1739 the case may be, %0000000000000000 000.11111111111, still */
1740 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1745 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1747 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1751 pta LOCAL(no_lo_adj),tr0
1758 bgtu/u r7,r25,tr0 // no_lo_adj
1764 /* large_divisor: only needs a few adjustments. */
1771 ENDFUNC(GLOBAL(udivdi3))
1772 /* Note 1: To shift the result of the second divide stage so that the result
1773 always fits into 32 bits, yet we still reduce the rest sufficiently
1774 would require a lot of instructions to do the shifts just right. Using
1775 the full 64 bit shift result to multiply with the divisor would require
1776 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1777 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1778 know that the rest after taking this partial result into account will
1779 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1780 upper 32 bits of the partial result are nonzero. */
1781 #endif /* __SHMEDIA__ */
1782 #endif /* L_udivdi3 */
1787 .section .text..SHmedia32,"ax"
1789 .global GLOBAL(divdi3)
1790 FUNC(GLOBAL(divdi3))
1792 pta GLOBAL(udivdi3_internal),tr0
1804 ENDFUNC(GLOBAL(divdi3))
1805 #endif /* __SHMEDIA__ */
1806 #endif /* L_divdi3 */
1811 .section .text..SHmedia32,"ax"
1813 .global GLOBAL(umoddi3)
1814 FUNC(GLOBAL(umoddi3))
1816 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1821 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1825 sub r63,r22,r20 // r63 == 64 % 64
1827 pta LOCAL(large_divisor),tr0
1833 bgt/u r9,r63,tr0 // large_divisor
1842 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1843 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1844 the case may be, %0000000000000000 000.11111111111, still */
1845 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1850 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1852 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1857 /* bubble */ /* could test r3 here to check for divide by zero. */
1860 mcmpgt.l r21,r63,r21 // See Note 1
1862 mshfhi.l r63,r21,r21
1866 /* small divisor: need a third divide step */
1869 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1879 LOCAL(large_divisor):
1888 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1889 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1890 the case may be, %0000000000000000 000.11111111111, still */
1891 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1896 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1898 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1902 pta LOCAL(no_lo_adj),tr0
1909 bgtu/u r7,r25,tr0 // no_lo_adj
1915 /* large_divisor: only needs a few adjustments. */
1924 ENDFUNC(GLOBAL(umoddi3))
1925 /* Note 1: To shift the result of the second divide stage so that the result
1926 always fits into 32 bits, yet we still reduce the rest sufficiently
1927 would require a lot of instructions to do the shifts just right. Using
1928 the full 64 bit shift result to multiply with the divisor would require
1929 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1930 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1931 know that the rest after taking this partial result into account will
1932 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1933 upper 32 bits of the partial result are nonzero. */
1934 #endif /* __SHMEDIA__ */
1935 #endif /* L_umoddi3 */
1940 .section .text..SHmedia32,"ax"
1942 .global GLOBAL(moddi3)
1943 FUNC(GLOBAL(moddi3))
1945 pta GLOBAL(umoddi3_internal),tr0
1957 ENDFUNC(GLOBAL(moddi3))
1958 #endif /* __SHMEDIA__ */
1959 #endif /* L_moddi3 */
1962 #if !defined (__SH2A_NOFPU__)
1963 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1967 .global GLOBAL(set_fpscr)
1968 HIDDEN_FUNC(GLOBAL(set_fpscr))
1974 mov.l LOCAL(set_fpscr_L0_base),r12
1975 mov.l LOCAL(set_fpscr_L0_index),r0
1979 mova LOCAL(set_fpscr_L0),r0
1980 mov.l LOCAL(set_fpscr_L0),r12
1983 mov.l LOCAL(set_fpscr_L1),r0
1987 mov.l LOCAL(set_fpscr_L1),r1
1994 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1997 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2006 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2010 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2018 LOCAL(set_fpscr_L0_base):
2019 .long ___GOTT_BASE__
2020 LOCAL(set_fpscr_L0_index):
2021 .long ___GOTT_INDEX__
2023 LOCAL(set_fpscr_L0):
2024 .long _GLOBAL_OFFSET_TABLE_
2026 LOCAL(set_fpscr_L1):
2027 .long GLOBAL(fpscr_values@GOT)
2029 LOCAL(set_fpscr_L1):
2030 .long GLOBAL(fpscr_values)
2033 ENDFUNC(GLOBAL(set_fpscr))
2034 #ifndef NO_FPSCR_VALUES
2036 .comm GLOBAL(fpscr_values),8,4
2038 .comm GLOBAL(fpscr_values),8
2040 #endif /* NO_FPSCR_VALUES */
2041 #endif /* SH2E / SH3E / SH4 */
2042 #endif /* __SH2A_NOFPU__ */
2043 #endif /* L_set_fpscr */
2044 #ifdef L_ic_invalidate
2047 .section .text..SHmedia32,"ax"
2049 .global GLOBAL(init_trampoline)
2050 HIDDEN_FUNC(GLOBAL(init_trampoline))
2051 GLOBAL(init_trampoline):
2053 #ifdef __LITTLE_ENDIAN__
2059 movi 0xffffffffffffd002,r20
2066 ENDFUNC(GLOBAL(init_trampoline))
2067 .global GLOBAL(ic_invalidate)
2068 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2069 GLOBAL(ic_invalidate):
2076 ENDFUNC(GLOBAL(ic_invalidate))
2077 #elif defined(__SH4A__)
2078 .global GLOBAL(ic_invalidate)
2079 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2080 GLOBAL(ic_invalidate):
2086 ENDFUNC(GLOBAL(ic_invalidate))
2087 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2088 /* For system code, we use ic_invalidate_line_i, but user code
2089 needs a different mechanism. A kernel call is generally not
2090 available, and it would also be slow. Different SH4 variants use
2091 different sizes and associativities of the Icache. We use a small
2092 bit of dispatch code that can be put hidden in every shared object,
2093 which calls the actual processor-specific invalidation code in a
2095 Or if you have operating system support, the OS could mmap the
2096 procesor-specific code from a single page, since it is highly
2098 .global GLOBAL(ic_invalidate)
2099 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2100 GLOBAL(ic_invalidate):
2127 0: .long GLOBAL(ic_invalidate_array)
2129 .global GLOBAL(ic_invalidate_array)
2130 0: .long GLOBAL(ic_invalidate_array)@GOT
2132 1: .long ___GOTT_BASE__
2133 2: .long ___GOTT_INDEX__
2135 1: .long _GLOBAL_OFFSET_TABLE_
2137 ENDFUNC(GLOBAL(ic_invalidate))
2138 #endif /* __pic__ */
2140 #endif /* L_ic_invalidate */
2142 #ifdef L_ic_invalidate_array
2143 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2144 .global GLOBAL(ic_invalidate_array)
2145 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2146 .global GLOBAL(ic_invalidate_array)
2147 FUNC(GLOBAL(ic_invalidate_array))
2148 GLOBAL(ic_invalidate_array):
2156 ENDFUNC(GLOBAL(ic_invalidate_array))
2157 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2158 .global GLOBAL(ic_invalidate_array)
2160 FUNC(GLOBAL(ic_invalidate_array))
2161 /* This must be aligned to the beginning of a cache line. */
2162 GLOBAL(ic_invalidate_array):
2165 #define WAY_SIZE 0x4000
2168 .rept WAY_SIZE * WAYS / 32
2176 .rept WAY_SIZE * WAYS / 32
2190 #else /* WAYS > 6 */
2191 /* This variant needs two different pages for mmap-ing. */
2209 ENDFUNC(GLOBAL(ic_invalidate_array))
2211 #endif /* L_ic_invalidate_array */
2213 #if defined (__SH5__) && __SH5__ == 32
2214 #ifdef L_shcompact_call_trampoline
2217 LOCAL(ct_main_table):
2218 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2219 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2220 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2221 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2222 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2223 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2224 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2225 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2226 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2227 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2228 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2229 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2230 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2231 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2232 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2233 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2234 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2235 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2236 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2237 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2238 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2239 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2240 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2241 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2242 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2243 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2244 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2245 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2246 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2247 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2248 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2249 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2250 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2252 .section .text..SHmedia32, "ax"
2255 /* This function loads 64-bit general-purpose registers from the
2256 stack, from a memory address contained in them or from an FP
2257 register, according to a cookie passed in r1. Its execution
2258 time is linear on the number of registers that actually have
2259 to be copied. See sh.h for details on the actual bit pattern.
2261 The function to be called is passed in r0. If a 32-bit return
2262 value is expected, the actual function will be tail-called,
2263 otherwise the return address will be stored in r10 (that the
2264 caller should expect to be clobbered) and the return value
2265 will be expanded into r2/r3 upon return. */
2267 .global GLOBAL(GCC_shcompact_call_trampoline)
2268 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2269 GLOBAL(GCC_shcompact_call_trampoline):
2270 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2271 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2272 pt/l LOCAL(ct_loop), tr1
2274 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2279 LOCAL(ct_main_label):
2282 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2283 /* It must be dr0, so just do it. */
2289 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2290 /* It is either dr0 or dr2. */
2299 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2300 shlri r1, 23 - 3, r34
2301 andi r34, 3 << 3, r33
2302 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2303 LOCAL(ct_r4_fp_base):
2309 LOCAL(ct_r4_fp_copy):
2316 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2317 shlri r1, 20 - 3, r34
2318 andi r34, 3 << 3, r33
2319 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2320 LOCAL(ct_r5_fp_base):
2326 LOCAL(ct_r5_fp_copy):
2335 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2336 /* It must be dr8. */
2342 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2343 shlri r1, 16 - 3, r34
2344 andi r34, 3 << 3, r33
2345 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2346 LOCAL(ct_r6_fp_base):
2352 LOCAL(ct_r6_fp_copy):
2361 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2362 /* It is either dr8 or dr10. */
2370 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2371 shlri r1, 12 - 3, r34
2372 andi r34, 3 << 3, r33
2373 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2374 LOCAL(ct_r7_fp_base):
2379 LOCAL(ct_r7_fp_copy):
2388 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2389 /* It is either dr8 or dr10. */
2391 andi r1, 1 << 8, r32
2397 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2398 shlri r1, 8 - 3, r34
2399 andi r34, 3 << 3, r33
2400 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2401 LOCAL(ct_r8_fp_base):
2406 LOCAL(ct_r8_fp_copy):
2415 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2416 /* It is either dr8 or dr10. */
2418 andi r1, 1 << 4, r32
2424 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2425 shlri r1, 4 - 3, r34
2426 andi r34, 3 << 3, r33
2427 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2428 LOCAL(ct_r9_fp_base):
2433 LOCAL(ct_r9_fp_copy):
2442 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2443 pt/l LOCAL(ct_r2_load), tr2
2452 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2453 pt/l LOCAL(ct_r3_load), tr2
2461 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2462 pt/l LOCAL(ct_r4_load), tr2
2470 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2471 pt/l LOCAL(ct_r5_load), tr2
2479 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2480 pt/l LOCAL(ct_r6_load), tr2
2487 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2488 pt/l LOCAL(ct_r7_load), tr2
2495 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2496 pt/l LOCAL(ct_r8_load), tr2
2503 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2504 pt/l LOCAL(ct_check_tramp), tr2
2528 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2535 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2542 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2549 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2556 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2563 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2569 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2575 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2576 andi r1, 7 << 1, r30
2577 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2579 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2583 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2596 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2599 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2600 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2601 pt/u LOCAL(ct_ret_wide), tr2
2604 LOCAL(ct_call_func): /* Just branch to the function. */
2606 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2607 64-bit return value. */
2611 #if __LITTLE_ENDIAN__
2620 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2621 #endif /* L_shcompact_call_trampoline */
2623 #ifdef L_shcompact_return_trampoline
2624 /* This function does the converse of the code in `ret_wide'
2625 above. It is tail-called by SHcompact functions returning
2626 64-bit non-floating-point values, to pack the 32-bit values in
2627 r2 and r3 into r2. */
2630 .section .text..SHmedia32, "ax"
2632 .global GLOBAL(GCC_shcompact_return_trampoline)
2633 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2634 GLOBAL(GCC_shcompact_return_trampoline):
2636 #if __LITTLE_ENDIAN__
2646 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2647 #endif /* L_shcompact_return_trampoline */
2649 #ifdef L_shcompact_incoming_args
2652 LOCAL(ia_main_table):
2653 .word 1 /* Invalid, just loop */
2654 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2655 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2656 .word 1 /* Invalid, just loop */
2657 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2658 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2659 .word 1 /* Invalid, just loop */
2660 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2661 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2662 .word 1 /* Invalid, just loop */
2663 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2664 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2665 .word 1 /* Invalid, just loop */
2666 .word 1 /* Invalid, just loop */
2667 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2668 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2669 .word 1 /* Invalid, just loop */
2670 .word 1 /* Invalid, just loop */
2671 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2672 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2673 .word 1 /* Invalid, just loop */
2674 .word 1 /* Invalid, just loop */
2675 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2676 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2677 .word 1 /* Invalid, just loop */
2678 .word 1 /* Invalid, just loop */
2679 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2680 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2681 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2682 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2683 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2684 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2685 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2687 .section .text..SHmedia32, "ax"
2690 /* This function stores 64-bit general-purpose registers back in
2691 the stack, and loads the address in which each register
2692 was stored into itself. The lower 32 bits of r17 hold the address
2693 to begin storing, and the upper 32 bits of r17 hold the cookie.
2694 Its execution time is linear on the
2695 number of registers that actually have to be copied, and it is
2696 optimized for structures larger than 64 bits, as opposed to
2697 individual `long long' arguments. See sh.h for details on the
2698 actual bit pattern. */
2700 .global GLOBAL(GCC_shcompact_incoming_args)
2701 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2702 GLOBAL(GCC_shcompact_incoming_args):
2703 ptabs/l r18, tr0 /* Prepare to return. */
2704 shlri r17, 32, r0 /* Load the cookie. */
2705 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2706 pt/l LOCAL(ia_loop), tr1
2708 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2713 LOCAL(ia_main_label):
2716 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2725 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2734 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2743 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2752 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2761 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2769 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2777 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2781 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2788 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2795 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2802 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2809 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2816 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2822 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2828 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2829 andi r0, 7 << 1, r38
2830 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2832 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2836 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2849 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2851 LOCAL(ia_return): /* Return. */
2853 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2854 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2855 #endif /* L_shcompact_incoming_args */
2858 #ifdef L_nested_trampoline
2860 .section .text..SHmedia32,"ax"
2864 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2865 .global GLOBAL(GCC_nested_trampoline)
2866 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2867 GLOBAL(GCC_nested_trampoline):
2884 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2885 #endif /* L_nested_trampoline */
2886 #endif /* __SH5__ */
2888 #ifdef L_push_pop_shmedia_regs
2889 .section .text..SHmedia32,"ax"
2892 #ifndef __SH4_NOFPU__
2893 .global GLOBAL(GCC_push_shmedia_regs)
2894 FUNC(GLOBAL(GCC_push_shmedia_regs))
2895 GLOBAL(GCC_push_shmedia_regs):
2896 addi.l r15, -14*8, r15
2897 fst.d r15, 13*8, dr62
2898 fst.d r15, 12*8, dr60
2899 fst.d r15, 11*8, dr58
2900 fst.d r15, 10*8, dr56
2901 fst.d r15, 9*8, dr54
2902 fst.d r15, 8*8, dr52
2903 fst.d r15, 7*8, dr50
2904 fst.d r15, 6*8, dr48
2905 fst.d r15, 5*8, dr46
2906 fst.d r15, 4*8, dr44
2907 fst.d r15, 3*8, dr42
2908 fst.d r15, 2*8, dr40
2909 fst.d r15, 1*8, dr38
2910 fst.d r15, 0*8, dr36
2911 #else /* ! __SH4_NOFPU__ */
2912 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2913 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2914 GLOBAL(GCC_push_shmedia_regs_nofpu):
2915 #endif /* ! __SH4_NOFPU__ */
2917 addi.l r15, -27*8, r15
2949 #ifndef __SH4_NOFPU__
2950 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2952 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2954 #ifndef __SH4_NOFPU__
2955 .global GLOBAL(GCC_pop_shmedia_regs)
2956 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2957 GLOBAL(GCC_pop_shmedia_regs):
2960 fld.d r15, 40*8, dr62
2961 fld.d r15, 39*8, dr60
2962 fld.d r15, 38*8, dr58
2963 fld.d r15, 37*8, dr56
2964 fld.d r15, 36*8, dr54
2965 fld.d r15, 35*8, dr52
2966 fld.d r15, 34*8, dr50
2967 fld.d r15, 33*8, dr48
2968 fld.d r15, 32*8, dr46
2969 fld.d r15, 31*8, dr44
2970 fld.d r15, 30*8, dr42
2971 fld.d r15, 29*8, dr40
2972 fld.d r15, 28*8, dr38
2973 fld.d r15, 27*8, dr36
2975 #else /* ! __SH4_NOFPU__ */
2976 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2977 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2978 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2979 #endif /* ! __SH4_NOFPU__ */
3016 #ifndef __SH4_NOFPU__
3017 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3019 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3021 #endif /* __SH5__ == 32 */
3022 #endif /* L_push_pop_shmedia_regs */
3026 #if defined(__pic__) && defined(__SHMEDIA__)
3027 .global GLOBAL(sdivsi3)
3028 FUNC(GLOBAL(sdivsi3))
3030 .section .text..SHmedia32,"ax"
3035 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3036 in a text section does not work (at least for shared libraries):
3037 the linker sets the LSB of the address as if this was SHmedia code. */
3038 #define TEXT_DATA_BUG
3042 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3044 .global GLOBAL(sdivsi3)
3046 #ifdef TEXT_DATA_BUG
3047 ptb datalabel Local_div_table,tr0
3049 ptb GLOBAL(div_table_internal),tr0
3052 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3053 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3056 ldx.ub r20, r21, r19 // u0.8
3057 shari r25, 32, r25 // normalize to s2.30
3059 muls.l r25, r19, r19 // s2.38
3060 ldx.w r20, r21, r21 // s2.14
3062 shari r19, 24, r19 // truncate to s2.14
3063 sub r21, r19, r19 // some 11 bit inverse in s1.14
3064 muls.l r19, r19, r21 // u0.28
3067 muls.l r25, r21, r18 // s2.58
3068 shlli r19, 45, r19 // multiply by two and convert to s2.58
3071 shari r18, 28, r18 // some 22 bit inverse in s1.30
3072 muls.l r18, r25, r0 // s2.60
3073 muls.l r18, r4, r25 // s32.30
3075 shari r0, 16, r19 // s-16.44
3076 muls.l r19, r18, r19 // s-16.74
3078 shari r4, 14, r18 // s19.-14
3079 shari r19, 30, r19 // s-16.44
3080 muls.l r19, r18, r19 // s15.30
3081 xor r21, r0, r21 // You could also use the constant 1 << 27.
3087 ENDFUNC(GLOBAL(sdivsi3))
3088 /* This table has been generated by divtab.c .
3089 Defects for bias -330:
3090 Max defect: 6.081536e-07 at -1.000000e+00
3091 Min defect: 2.849516e-08 at 1.030651e+00
3092 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3093 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3094 Defect at 1: 1.238659e-07
3095 Defect at -2: 1.061708e-07 */
3096 #else /* ! __pic__ || ! __SHMEDIA__ */
3098 #endif /* __pic__ */
3099 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3101 .type Local_div_table,@object
3102 .size Local_div_table,128
3103 /* negative division constants */
3120 /* negative division factors */
3140 /* positive division factors */
3157 /* positive division constants */
3175 #endif /* TEXT_DATA_BUG */
3177 .type GLOBAL(div_table),@object
3178 .size GLOBAL(div_table),128
3179 /* negative division constants */
3196 /* negative division factors */
3214 .global GLOBAL(div_table)
3216 HIDDEN_ALIAS(div_table_internal,div_table)
3218 /* positive division factors */
3235 /* positive division constants */
3253 #elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3254 /* This code used shld, thus is not suitable for SH1 / SH2. */
3256 /* Signed / unsigned division without use of FPU, optimized for SH4.
3257 Uses a lookup table for divisors in the range -128 .. +128, and
3258 div1 with case distinction for larger divisors in three more ranges.
3259 The code is lumped together with the table to allow the use of mova. */
3260 #ifdef __LITTLE_ENDIAN__
3271 .global GLOBAL(udivsi3_i4i)
3272 FUNC(GLOBAL(udivsi3_i4i))
3273 GLOBAL(udivsi3_i4i):
3274 mov.w LOCAL(c128_w), r1
3280 bf LOCAL(udiv_le128)
3282 bf LOCAL(udiv_ge64k)
3295 mova LOCAL(div_table_ix),r0
3296 bra LOCAL(div_le128_2)
3300 mova LOCAL(div_table_ix),r0
3304 mova LOCAL(div_table_inv),r0
3308 mova LOCAL(div_table_clz),r0
3311 bt/s LOCAL(div_by_1)
3322 LOCAL(div_by_1_neg):
3333 bra LOCAL(div_ge64k_2)
3345 mov.l LOCAL(zero_l),r1
3351 mov.w LOCAL(m256_w),r1
3353 mov.b r0,@(L_LSWMSB,r15)
3356 bra LOCAL(div_ge64k_end)
3378 rotcl r0; div1 r5,r1
3387 ENDFUNC(GLOBAL(udivsi3_i4i))
3389 .global GLOBAL(sdivsi3_i4i)
3390 FUNC(GLOBAL(sdivsi3_i4i))
3391 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3392 but we effectively clobber only r1. */
3393 GLOBAL(sdivsi3_i4i):
3396 mov.w LOCAL(c128_w), r1
3397 bt/s LOCAL(pos_divisor)
3401 bt/s LOCAL(neg_result)
3410 bf/s LOCAL(div_ge64k)
3418 mov.l LOCAL(zero_l),r1
3425 mov.b r0,@(L_MSWLSB,r15)
3431 mov.b r0,@(L_LSWMSB,r15)
3432 LOCAL(div_ge64k_end):
3436 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3444 LOCAL(div_le128_neg):
3446 mova LOCAL(div_table_ix),r0
3448 mova LOCAL(div_table_inv),r0
3449 bt/s LOCAL(div_by_1_neg)
3451 mova LOCAL(div_table_clz),r0
3466 bt/s LOCAL(pos_result)
3471 bf LOCAL(div_le128_neg)
3475 bf/s LOCAL(div_ge64k_neg)
3478 mov.l LOCAL(zero_l),r1
3485 mov.b r0,@(L_MSWLSB,r15)
3491 mov.b r0,@(L_LSWMSB,r15)
3492 LOCAL(div_ge64k_neg_end):
3496 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3500 LOCAL(div_r8_neg_end):
3506 LOCAL(div_ge64k_neg):
3507 bt/s LOCAL(div_r8_neg)
3510 mov.l LOCAL(zero_l),r1
3516 mov.w LOCAL(m256_w),r1
3518 mov.b r0,@(L_LSWMSB,r15)
3521 bra LOCAL(div_ge64k_neg_end)
3534 rotcl r1; div1 r5,r0
3538 bra LOCAL(div_r8_neg_end)
3543 /* This table has been generated by divtab-sh4.c. */
3545 LOCAL(div_table_clz):
3674 /* Lookup table translating positive divisor to index into table of
3675 normalized inverse. N.B. the '0' entry is also the last entry of the
3676 previous table, and causes an unaligned access for division by zero. */
3677 LOCAL(div_table_ix):
3807 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3842 LOCAL(div_table_inv):
3875 /* maximum error: 0.987342 scaled: 0.921875*/
3877 ENDFUNC(GLOBAL(sdivsi3_i4i))
3878 #endif /* SH3 / SH4 */
3880 #endif /* L_div_table */
3882 #ifdef L_udiv_qrnnd_16
3884 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3885 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3886 /* n1 < d, but n1 might be larger than d1. */
3887 .global GLOBAL(udiv_qrnnd_16)
3889 GLOBAL(udiv_qrnnd_16):
3926 ENDFUNC(GLOBAL(udiv_qrnnd_16))
3927 #endif /* !__SHMEDIA__ */
3928 #endif /* L_udiv_qrnnd_16 */