Introduce sh2a support.
[gcc.git] / gcc / config / sh / lib1funcs.asm
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003
2 Free Software Foundation, Inc.
3
4 This file is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
7 later version.
8
9 In addition to the permissions in the GNU General Public License, the
10 Free Software Foundation gives you unlimited permission to link the
11 compiled version of this file into combinations with other programs,
12 and to distribute those combinations without any restriction coming
13 from the use of this file. (The General Public License restrictions
14 do apply in other respects; for example, they cover modification of
15 the file, and distribution when not linked into a combine
16 executable.)
17
18 This file is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; see the file COPYING. If not, write to
25 the Free Software Foundation, 59 Temple Place - Suite 330,
26 Boston, MA 02111-1307, USA. */
27
28 !! libgcc routines for the Renesas / SuperH SH CPUs.
29 !! Contributed by Steve Chamberlain.
30 !! sac@cygnus.com
31
32 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
33 !! recoded in assembly by Toshiyasu Morita
34 !! tm@netcom.com
35
36 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 ELF local label prefixes by J"orn Rennecke
38 amylaar@cygnus.com */
39
40 #ifdef __ELF__
41 #define LOCAL(X) .L_##X
42 #define FUNC(X) .type X,@function
43 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
44 #define ENDFUNC(X) ENDFUNC0(X)
45 #else
46 #define LOCAL(X) L_##X
47 #define FUNC(X)
48 #define ENDFUNC(X)
49 #endif
50
51 #define CONCAT(A,B) A##B
52 #define GLOBAL0(U,X) CONCAT(U,__##X)
53 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
54
55 #if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__)
56 #define FMOVD_WORKS
57 #endif
58
59 #ifdef __SH2A__
60 #undef FMOVD_WORKS
61 #define FMOVD_WORKS
62 #endif
63
64 #if ! __SH5__
65 #ifdef L_ashiftrt
66 .global GLOBAL(ashiftrt_r4_0)
67 .global GLOBAL(ashiftrt_r4_1)
68 .global GLOBAL(ashiftrt_r4_2)
69 .global GLOBAL(ashiftrt_r4_3)
70 .global GLOBAL(ashiftrt_r4_4)
71 .global GLOBAL(ashiftrt_r4_5)
72 .global GLOBAL(ashiftrt_r4_6)
73 .global GLOBAL(ashiftrt_r4_7)
74 .global GLOBAL(ashiftrt_r4_8)
75 .global GLOBAL(ashiftrt_r4_9)
76 .global GLOBAL(ashiftrt_r4_10)
77 .global GLOBAL(ashiftrt_r4_11)
78 .global GLOBAL(ashiftrt_r4_12)
79 .global GLOBAL(ashiftrt_r4_13)
80 .global GLOBAL(ashiftrt_r4_14)
81 .global GLOBAL(ashiftrt_r4_15)
82 .global GLOBAL(ashiftrt_r4_16)
83 .global GLOBAL(ashiftrt_r4_17)
84 .global GLOBAL(ashiftrt_r4_18)
85 .global GLOBAL(ashiftrt_r4_19)
86 .global GLOBAL(ashiftrt_r4_20)
87 .global GLOBAL(ashiftrt_r4_21)
88 .global GLOBAL(ashiftrt_r4_22)
89 .global GLOBAL(ashiftrt_r4_23)
90 .global GLOBAL(ashiftrt_r4_24)
91 .global GLOBAL(ashiftrt_r4_25)
92 .global GLOBAL(ashiftrt_r4_26)
93 .global GLOBAL(ashiftrt_r4_27)
94 .global GLOBAL(ashiftrt_r4_28)
95 .global GLOBAL(ashiftrt_r4_29)
96 .global GLOBAL(ashiftrt_r4_30)
97 .global GLOBAL(ashiftrt_r4_31)
98 .global GLOBAL(ashiftrt_r4_32)
99
100 FUNC(GLOBAL(ashiftrt_r4_0))
101 FUNC(GLOBAL(ashiftrt_r4_1))
102 FUNC(GLOBAL(ashiftrt_r4_2))
103 FUNC(GLOBAL(ashiftrt_r4_3))
104 FUNC(GLOBAL(ashiftrt_r4_4))
105 FUNC(GLOBAL(ashiftrt_r4_5))
106 FUNC(GLOBAL(ashiftrt_r4_6))
107 FUNC(GLOBAL(ashiftrt_r4_7))
108 FUNC(GLOBAL(ashiftrt_r4_8))
109 FUNC(GLOBAL(ashiftrt_r4_9))
110 FUNC(GLOBAL(ashiftrt_r4_10))
111 FUNC(GLOBAL(ashiftrt_r4_11))
112 FUNC(GLOBAL(ashiftrt_r4_12))
113 FUNC(GLOBAL(ashiftrt_r4_13))
114 FUNC(GLOBAL(ashiftrt_r4_14))
115 FUNC(GLOBAL(ashiftrt_r4_15))
116 FUNC(GLOBAL(ashiftrt_r4_16))
117 FUNC(GLOBAL(ashiftrt_r4_17))
118 FUNC(GLOBAL(ashiftrt_r4_18))
119 FUNC(GLOBAL(ashiftrt_r4_19))
120 FUNC(GLOBAL(ashiftrt_r4_20))
121 FUNC(GLOBAL(ashiftrt_r4_21))
122 FUNC(GLOBAL(ashiftrt_r4_22))
123 FUNC(GLOBAL(ashiftrt_r4_23))
124 FUNC(GLOBAL(ashiftrt_r4_24))
125 FUNC(GLOBAL(ashiftrt_r4_25))
126 FUNC(GLOBAL(ashiftrt_r4_26))
127 FUNC(GLOBAL(ashiftrt_r4_27))
128 FUNC(GLOBAL(ashiftrt_r4_28))
129 FUNC(GLOBAL(ashiftrt_r4_29))
130 FUNC(GLOBAL(ashiftrt_r4_30))
131 FUNC(GLOBAL(ashiftrt_r4_31))
132 FUNC(GLOBAL(ashiftrt_r4_32))
133
134 .align 1
135 GLOBAL(ashiftrt_r4_32):
136 GLOBAL(ashiftrt_r4_31):
137 rotcl r4
138 rts
139 subc r4,r4
140
141 GLOBAL(ashiftrt_r4_30):
142 shar r4
143 GLOBAL(ashiftrt_r4_29):
144 shar r4
145 GLOBAL(ashiftrt_r4_28):
146 shar r4
147 GLOBAL(ashiftrt_r4_27):
148 shar r4
149 GLOBAL(ashiftrt_r4_26):
150 shar r4
151 GLOBAL(ashiftrt_r4_25):
152 shar r4
153 GLOBAL(ashiftrt_r4_24):
154 shlr16 r4
155 shlr8 r4
156 rts
157 exts.b r4,r4
158
159 GLOBAL(ashiftrt_r4_23):
160 shar r4
161 GLOBAL(ashiftrt_r4_22):
162 shar r4
163 GLOBAL(ashiftrt_r4_21):
164 shar r4
165 GLOBAL(ashiftrt_r4_20):
166 shar r4
167 GLOBAL(ashiftrt_r4_19):
168 shar r4
169 GLOBAL(ashiftrt_r4_18):
170 shar r4
171 GLOBAL(ashiftrt_r4_17):
172 shar r4
173 GLOBAL(ashiftrt_r4_16):
174 shlr16 r4
175 rts
176 exts.w r4,r4
177
178 GLOBAL(ashiftrt_r4_15):
179 shar r4
180 GLOBAL(ashiftrt_r4_14):
181 shar r4
182 GLOBAL(ashiftrt_r4_13):
183 shar r4
184 GLOBAL(ashiftrt_r4_12):
185 shar r4
186 GLOBAL(ashiftrt_r4_11):
187 shar r4
188 GLOBAL(ashiftrt_r4_10):
189 shar r4
190 GLOBAL(ashiftrt_r4_9):
191 shar r4
192 GLOBAL(ashiftrt_r4_8):
193 shar r4
194 GLOBAL(ashiftrt_r4_7):
195 shar r4
196 GLOBAL(ashiftrt_r4_6):
197 shar r4
198 GLOBAL(ashiftrt_r4_5):
199 shar r4
200 GLOBAL(ashiftrt_r4_4):
201 shar r4
202 GLOBAL(ashiftrt_r4_3):
203 shar r4
204 GLOBAL(ashiftrt_r4_2):
205 shar r4
206 GLOBAL(ashiftrt_r4_1):
207 rts
208 shar r4
209
210 GLOBAL(ashiftrt_r4_0):
211 rts
212 nop
213
214 ENDFUNC(GLOBAL(ashiftrt_r4_0))
215 ENDFUNC(GLOBAL(ashiftrt_r4_1))
216 ENDFUNC(GLOBAL(ashiftrt_r4_2))
217 ENDFUNC(GLOBAL(ashiftrt_r4_3))
218 ENDFUNC(GLOBAL(ashiftrt_r4_4))
219 ENDFUNC(GLOBAL(ashiftrt_r4_5))
220 ENDFUNC(GLOBAL(ashiftrt_r4_6))
221 ENDFUNC(GLOBAL(ashiftrt_r4_7))
222 ENDFUNC(GLOBAL(ashiftrt_r4_8))
223 ENDFUNC(GLOBAL(ashiftrt_r4_9))
224 ENDFUNC(GLOBAL(ashiftrt_r4_10))
225 ENDFUNC(GLOBAL(ashiftrt_r4_11))
226 ENDFUNC(GLOBAL(ashiftrt_r4_12))
227 ENDFUNC(GLOBAL(ashiftrt_r4_13))
228 ENDFUNC(GLOBAL(ashiftrt_r4_14))
229 ENDFUNC(GLOBAL(ashiftrt_r4_15))
230 ENDFUNC(GLOBAL(ashiftrt_r4_16))
231 ENDFUNC(GLOBAL(ashiftrt_r4_17))
232 ENDFUNC(GLOBAL(ashiftrt_r4_18))
233 ENDFUNC(GLOBAL(ashiftrt_r4_19))
234 ENDFUNC(GLOBAL(ashiftrt_r4_20))
235 ENDFUNC(GLOBAL(ashiftrt_r4_21))
236 ENDFUNC(GLOBAL(ashiftrt_r4_22))
237 ENDFUNC(GLOBAL(ashiftrt_r4_23))
238 ENDFUNC(GLOBAL(ashiftrt_r4_24))
239 ENDFUNC(GLOBAL(ashiftrt_r4_25))
240 ENDFUNC(GLOBAL(ashiftrt_r4_26))
241 ENDFUNC(GLOBAL(ashiftrt_r4_27))
242 ENDFUNC(GLOBAL(ashiftrt_r4_28))
243 ENDFUNC(GLOBAL(ashiftrt_r4_29))
244 ENDFUNC(GLOBAL(ashiftrt_r4_30))
245 ENDFUNC(GLOBAL(ashiftrt_r4_31))
246 ENDFUNC(GLOBAL(ashiftrt_r4_32))
247 #endif
248
249 #ifdef L_ashiftrt_n
250
251 !
252 ! GLOBAL(ashrsi3)
253 !
254 ! Entry:
255 !
256 ! r4: Value to shift
257 ! r5: Shifts
258 !
259 ! Exit:
260 !
261 ! r0: Result
262 !
263 ! Destroys:
264 !
265 ! (none)
266 !
267
268 .global GLOBAL(ashrsi3)
269 FUNC(GLOBAL(ashrsi3))
270 .align 2
271 GLOBAL(ashrsi3):
272 mov #31,r0
273 and r0,r5
274 mova LOCAL(ashrsi3_table),r0
275 mov.b @(r0,r5),r5
276 #ifdef __sh1__
277 add r5,r0
278 jmp @r0
279 #else
280 braf r5
281 #endif
282 mov r4,r0
283
284 .align 2
285 LOCAL(ashrsi3_table):
286 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
305 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
306 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
307 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
308 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
309 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
310 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
311 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
312 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
313 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
314 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
315 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
316 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
317 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
318
319 LOCAL(ashrsi3_31):
320 rotcl r0
321 rts
322 subc r0,r0
323
324 LOCAL(ashrsi3_30):
325 shar r0
326 LOCAL(ashrsi3_29):
327 shar r0
328 LOCAL(ashrsi3_28):
329 shar r0
330 LOCAL(ashrsi3_27):
331 shar r0
332 LOCAL(ashrsi3_26):
333 shar r0
334 LOCAL(ashrsi3_25):
335 shar r0
336 LOCAL(ashrsi3_24):
337 shlr16 r0
338 shlr8 r0
339 rts
340 exts.b r0,r0
341
342 LOCAL(ashrsi3_23):
343 shar r0
344 LOCAL(ashrsi3_22):
345 shar r0
346 LOCAL(ashrsi3_21):
347 shar r0
348 LOCAL(ashrsi3_20):
349 shar r0
350 LOCAL(ashrsi3_19):
351 shar r0
352 LOCAL(ashrsi3_18):
353 shar r0
354 LOCAL(ashrsi3_17):
355 shar r0
356 LOCAL(ashrsi3_16):
357 shlr16 r0
358 rts
359 exts.w r0,r0
360
361 LOCAL(ashrsi3_15):
362 shar r0
363 LOCAL(ashrsi3_14):
364 shar r0
365 LOCAL(ashrsi3_13):
366 shar r0
367 LOCAL(ashrsi3_12):
368 shar r0
369 LOCAL(ashrsi3_11):
370 shar r0
371 LOCAL(ashrsi3_10):
372 shar r0
373 LOCAL(ashrsi3_9):
374 shar r0
375 LOCAL(ashrsi3_8):
376 shar r0
377 LOCAL(ashrsi3_7):
378 shar r0
379 LOCAL(ashrsi3_6):
380 shar r0
381 LOCAL(ashrsi3_5):
382 shar r0
383 LOCAL(ashrsi3_4):
384 shar r0
385 LOCAL(ashrsi3_3):
386 shar r0
387 LOCAL(ashrsi3_2):
388 shar r0
389 LOCAL(ashrsi3_1):
390 rts
391 shar r0
392
393 LOCAL(ashrsi3_0):
394 rts
395 nop
396
397 ENDFUNC(GLOBAL(ashrsi3))
398 #endif
399
400 #ifdef L_ashiftlt
401
402 !
403 ! GLOBAL(ashlsi3)
404 !
405 ! Entry:
406 !
407 ! r4: Value to shift
408 ! r5: Shifts
409 !
410 ! Exit:
411 !
412 ! r0: Result
413 !
414 ! Destroys:
415 !
416 ! (none)
417 !
418 .global GLOBAL(ashlsi3)
419 FUNC(GLOBAL(ashlsi3))
420 .align 2
421 GLOBAL(ashlsi3):
422 mov #31,r0
423 and r0,r5
424 mova LOCAL(ashlsi3_table),r0
425 mov.b @(r0,r5),r5
426 #ifdef __sh1__
427 add r5,r0
428 jmp @r0
429 #else
430 braf r5
431 #endif
432 mov r4,r0
433
434 .align 2
435 LOCAL(ashlsi3_table):
436 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
455 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
456 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
457 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
458 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
459 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
460 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
461 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
462 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
463 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
464 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
465 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
466 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
467 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
468
469 LOCAL(ashlsi3_6):
470 shll2 r0
471 LOCAL(ashlsi3_4):
472 shll2 r0
473 LOCAL(ashlsi3_2):
474 rts
475 shll2 r0
476
477 LOCAL(ashlsi3_7):
478 shll2 r0
479 LOCAL(ashlsi3_5):
480 shll2 r0
481 LOCAL(ashlsi3_3):
482 shll2 r0
483 LOCAL(ashlsi3_1):
484 rts
485 shll r0
486
487 LOCAL(ashlsi3_14):
488 shll2 r0
489 LOCAL(ashlsi3_12):
490 shll2 r0
491 LOCAL(ashlsi3_10):
492 shll2 r0
493 LOCAL(ashlsi3_8):
494 rts
495 shll8 r0
496
497 LOCAL(ashlsi3_15):
498 shll2 r0
499 LOCAL(ashlsi3_13):
500 shll2 r0
501 LOCAL(ashlsi3_11):
502 shll2 r0
503 LOCAL(ashlsi3_9):
504 shll8 r0
505 rts
506 shll r0
507
508 LOCAL(ashlsi3_22):
509 shll2 r0
510 LOCAL(ashlsi3_20):
511 shll2 r0
512 LOCAL(ashlsi3_18):
513 shll2 r0
514 LOCAL(ashlsi3_16):
515 rts
516 shll16 r0
517
518 LOCAL(ashlsi3_23):
519 shll2 r0
520 LOCAL(ashlsi3_21):
521 shll2 r0
522 LOCAL(ashlsi3_19):
523 shll2 r0
524 LOCAL(ashlsi3_17):
525 shll16 r0
526 rts
527 shll r0
528
529 LOCAL(ashlsi3_30):
530 shll2 r0
531 LOCAL(ashlsi3_28):
532 shll2 r0
533 LOCAL(ashlsi3_26):
534 shll2 r0
535 LOCAL(ashlsi3_24):
536 shll16 r0
537 rts
538 shll8 r0
539
540 LOCAL(ashlsi3_31):
541 shll2 r0
542 LOCAL(ashlsi3_29):
543 shll2 r0
544 LOCAL(ashlsi3_27):
545 shll2 r0
546 LOCAL(ashlsi3_25):
547 shll16 r0
548 shll8 r0
549 rts
550 shll r0
551
552 LOCAL(ashlsi3_0):
553 rts
554 nop
555
556 ENDFUNC(GLOBAL(ashlsi3))
557 #endif
558
559 #ifdef L_lshiftrt
560
561 !
562 ! GLOBAL(lshrsi3)
563 !
564 ! Entry:
565 !
566 ! r4: Value to shift
567 ! r5: Shifts
568 !
569 ! Exit:
570 !
571 ! r0: Result
572 !
573 ! Destroys:
574 !
575 ! (none)
576 !
577 .global GLOBAL(lshrsi3)
578 FUNC(GLOBAL(lshrsi3))
579 .align 2
580 GLOBAL(lshrsi3):
581 mov #31,r0
582 and r0,r5
583 mova LOCAL(lshrsi3_table),r0
584 mov.b @(r0,r5),r5
585 #ifdef __sh1__
586 add r5,r0
587 jmp @r0
588 #else
589 braf r5
590 #endif
591 mov r4,r0
592
593 .align 2
594 LOCAL(lshrsi3_table):
595 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
614 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
615 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
616 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
617 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
618 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
619 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
620 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
621 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
622 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
623 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
624 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
625 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
626 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
627
628 LOCAL(lshrsi3_6):
629 shlr2 r0
630 LOCAL(lshrsi3_4):
631 shlr2 r0
632 LOCAL(lshrsi3_2):
633 rts
634 shlr2 r0
635
636 LOCAL(lshrsi3_7):
637 shlr2 r0
638 LOCAL(lshrsi3_5):
639 shlr2 r0
640 LOCAL(lshrsi3_3):
641 shlr2 r0
642 LOCAL(lshrsi3_1):
643 rts
644 shlr r0
645
646 LOCAL(lshrsi3_14):
647 shlr2 r0
648 LOCAL(lshrsi3_12):
649 shlr2 r0
650 LOCAL(lshrsi3_10):
651 shlr2 r0
652 LOCAL(lshrsi3_8):
653 rts
654 shlr8 r0
655
656 LOCAL(lshrsi3_15):
657 shlr2 r0
658 LOCAL(lshrsi3_13):
659 shlr2 r0
660 LOCAL(lshrsi3_11):
661 shlr2 r0
662 LOCAL(lshrsi3_9):
663 shlr8 r0
664 rts
665 shlr r0
666
667 LOCAL(lshrsi3_22):
668 shlr2 r0
669 LOCAL(lshrsi3_20):
670 shlr2 r0
671 LOCAL(lshrsi3_18):
672 shlr2 r0
673 LOCAL(lshrsi3_16):
674 rts
675 shlr16 r0
676
677 LOCAL(lshrsi3_23):
678 shlr2 r0
679 LOCAL(lshrsi3_21):
680 shlr2 r0
681 LOCAL(lshrsi3_19):
682 shlr2 r0
683 LOCAL(lshrsi3_17):
684 shlr16 r0
685 rts
686 shlr r0
687
688 LOCAL(lshrsi3_30):
689 shlr2 r0
690 LOCAL(lshrsi3_28):
691 shlr2 r0
692 LOCAL(lshrsi3_26):
693 shlr2 r0
694 LOCAL(lshrsi3_24):
695 shlr16 r0
696 rts
697 shlr8 r0
698
699 LOCAL(lshrsi3_31):
700 shlr2 r0
701 LOCAL(lshrsi3_29):
702 shlr2 r0
703 LOCAL(lshrsi3_27):
704 shlr2 r0
705 LOCAL(lshrsi3_25):
706 shlr16 r0
707 shlr8 r0
708 rts
709 shlr r0
710
711 LOCAL(lshrsi3_0):
712 rts
713 nop
714
715 ENDFUNC(GLOBAL(lshrsi3))
716 #endif
717
718 #ifdef L_movmem
719 .text
720 ! done all the large groups, do the remainder
721
722 ! jump to movmem+
723 done:
724 add #64,r5
725 mova GLOBAL(movmemSI0),r0
726 shll2 r6
727 add r6,r0
728 jmp @r0
729 add #64,r4
730 .align 4
731 .global GLOBAL(movmemSI64)
732 FUNC(GLOBAL(movmemSI64))
733 GLOBAL(movmemSI64):
734 mov.l @(60,r5),r0
735 mov.l r0,@(60,r4)
736 .global GLOBAL(movmemSI60)
737 FUNC(GLOBAL(movmemSI60))
738 GLOBAL(movmemSI60):
739 mov.l @(56,r5),r0
740 mov.l r0,@(56,r4)
741 .global GLOBAL(movmemSI56)
742 FUNC(GLOBAL(movmemSI56))
743 GLOBAL(movmemSI56):
744 mov.l @(52,r5),r0
745 mov.l r0,@(52,r4)
746 .global GLOBAL(movmemSI52)
747 FUNC(GLOBAL(movmemSI52))
748 GLOBAL(movmemSI52):
749 mov.l @(48,r5),r0
750 mov.l r0,@(48,r4)
751 .global GLOBAL(movmemSI48)
752 FUNC(GLOBAL(movmemSI48))
753 GLOBAL(movmemSI48):
754 mov.l @(44,r5),r0
755 mov.l r0,@(44,r4)
756 .global GLOBAL(movmemSI44)
757 FUNC(GLOBAL(movmemSI44))
758 GLOBAL(movmemSI44):
759 mov.l @(40,r5),r0
760 mov.l r0,@(40,r4)
761 .global GLOBAL(movmemSI40)
762 FUNC(GLOBAL(movmemSI40))
763 GLOBAL(movmemSI40):
764 mov.l @(36,r5),r0
765 mov.l r0,@(36,r4)
766 .global GLOBAL(movmemSI36)
767 FUNC(GLOBAL(movmemSI36))
768 GLOBAL(movmemSI36):
769 mov.l @(32,r5),r0
770 mov.l r0,@(32,r4)
771 .global GLOBAL(movmemSI32)
772 FUNC(GLOBAL(movmemSI32))
773 GLOBAL(movmemSI32):
774 mov.l @(28,r5),r0
775 mov.l r0,@(28,r4)
776 .global GLOBAL(movmemSI28)
777 FUNC(GLOBAL(movmemSI28))
778 GLOBAL(movmemSI28):
779 mov.l @(24,r5),r0
780 mov.l r0,@(24,r4)
781 .global GLOBAL(movmemSI24)
782 FUNC(GLOBAL(movmemSI24))
783 GLOBAL(movmemSI24):
784 mov.l @(20,r5),r0
785 mov.l r0,@(20,r4)
786 .global GLOBAL(movmemSI20)
787 FUNC(GLOBAL(movmemSI20))
788 GLOBAL(movmemSI20):
789 mov.l @(16,r5),r0
790 mov.l r0,@(16,r4)
791 .global GLOBAL(movmemSI16)
792 FUNC(GLOBAL(movmemSI16))
793 GLOBAL(movmemSI16):
794 mov.l @(12,r5),r0
795 mov.l r0,@(12,r4)
796 .global GLOBAL(movmemSI12)
797 FUNC(GLOBAL(movmemSI12))
798 GLOBAL(movmemSI12):
799 mov.l @(8,r5),r0
800 mov.l r0,@(8,r4)
801 .global GLOBAL(movmemSI8)
802 FUNC(GLOBAL(movmemSI8))
803 GLOBAL(movmemSI8):
804 mov.l @(4,r5),r0
805 mov.l r0,@(4,r4)
806 .global GLOBAL(movmemSI4)
807 FUNC(GLOBAL(movmemSI4))
808 GLOBAL(movmemSI4):
809 mov.l @(0,r5),r0
810 mov.l r0,@(0,r4)
811 .global GLOBAL(movmemSI0)
812 FUNC(GLOBAL(movmemSI0))
813 GLOBAL(movmemSI0):
814 rts
815 nop
816
817 ENDFUNC(GLOBAL(movmemSI64))
818 ENDFUNC(GLOBAL(movmemSI60))
819 ENDFUNC(GLOBAL(movmemSI56))
820 ENDFUNC(GLOBAL(movmemSI52))
821 ENDFUNC(GLOBAL(movmemSI48))
822 ENDFUNC(GLOBAL(movmemSI44))
823 ENDFUNC(GLOBAL(movmemSI40))
824 ENDFUNC(GLOBAL(movmemSI36))
825 ENDFUNC(GLOBAL(movmemSI32))
826 ENDFUNC(GLOBAL(movmemSI28))
827 ENDFUNC(GLOBAL(movmemSI24))
828 ENDFUNC(GLOBAL(movmemSI20))
829 ENDFUNC(GLOBAL(movmemSI16))
830 ENDFUNC(GLOBAL(movmemSI12))
831 ENDFUNC(GLOBAL(movmemSI8))
832 ENDFUNC(GLOBAL(movmemSI4))
833 ENDFUNC(GLOBAL(movmemSI0))
834
835 .align 4
836
837 .global GLOBAL(movmem)
838 FUNC(GLOBAL(movmem))
839 GLOBAL(movmem):
840 mov.l @(60,r5),r0
841 mov.l r0,@(60,r4)
842
843 mov.l @(56,r5),r0
844 mov.l r0,@(56,r4)
845
846 mov.l @(52,r5),r0
847 mov.l r0,@(52,r4)
848
849 mov.l @(48,r5),r0
850 mov.l r0,@(48,r4)
851
852 mov.l @(44,r5),r0
853 mov.l r0,@(44,r4)
854
855 mov.l @(40,r5),r0
856 mov.l r0,@(40,r4)
857
858 mov.l @(36,r5),r0
859 mov.l r0,@(36,r4)
860
861 mov.l @(32,r5),r0
862 mov.l r0,@(32,r4)
863
864 mov.l @(28,r5),r0
865 mov.l r0,@(28,r4)
866
867 mov.l @(24,r5),r0
868 mov.l r0,@(24,r4)
869
870 mov.l @(20,r5),r0
871 mov.l r0,@(20,r4)
872
873 mov.l @(16,r5),r0
874 mov.l r0,@(16,r4)
875
876 mov.l @(12,r5),r0
877 mov.l r0,@(12,r4)
878
879 mov.l @(8,r5),r0
880 mov.l r0,@(8,r4)
881
882 mov.l @(4,r5),r0
883 mov.l r0,@(4,r4)
884
885 mov.l @(0,r5),r0
886 mov.l r0,@(0,r4)
887
888 add #-16,r6
889 cmp/pl r6
890 bf done
891
892 add #64,r5
893 bra GLOBAL(movmem)
894 add #64,r4
895
896 FUNC(GLOBAL(movmem))
897 #endif
898
899 #ifdef L_movmem_i4
900 .text
901 .global GLOBAL(movmem_i4_even)
902 .global GLOBAL(movmem_i4_odd)
903 .global GLOBAL(movmemSI12_i4)
904
905 FUNC(GLOBAL(movmem_i4_even))
906 FUNC(GLOBAL(movmem_i4_odd))
907 FUNC(GLOBAL(movmemSI12_i4))
908
909 .p2align 5
910 L_movmem_2mod4_end:
911 mov.l r0,@(16,r4)
912 rts
913 mov.l r1,@(20,r4)
914
915 .p2align 2
916
917 GLOBAL(movmem_i4_even):
918 mov.l @r5+,r0
919 bra L_movmem_start_even
920 mov.l @r5+,r1
921
922 GLOBAL(movmem_i4_odd):
923 mov.l @r5+,r1
924 add #-4,r4
925 mov.l @r5+,r2
926 mov.l @r5+,r3
927 mov.l r1,@(4,r4)
928 mov.l r2,@(8,r4)
929
930 L_movmem_loop:
931 mov.l r3,@(12,r4)
932 dt r6
933 mov.l @r5+,r0
934 bt/s L_movmem_2mod4_end
935 mov.l @r5+,r1
936 add #16,r4
937 L_movmem_start_even:
938 mov.l @r5+,r2
939 mov.l @r5+,r3
940 mov.l r0,@r4
941 dt r6
942 mov.l r1,@(4,r4)
943 bf/s L_movmem_loop
944 mov.l r2,@(8,r4)
945 rts
946 mov.l r3,@(12,r4)
947
948 ENDFUNC(GLOBAL(movmem_i4_even))
949 ENDFUNC(GLOBAL(movmem_i4_odd))
950
951 .p2align 4
952 GLOBAL(movmemSI12_i4):
953 mov.l @r5,r0
954 mov.l @(4,r5),r1
955 mov.l @(8,r5),r2
956 mov.l r0,@r4
957 mov.l r1,@(4,r4)
958 rts
959 mov.l r2,@(8,r4)
960
961 ENDFUNC(GLOBAL(movmemSI12_i4))
962 #endif
963
964 #ifdef L_mulsi3
965
966
967 .global GLOBAL(mulsi3)
968 FUNC(GLOBAL(mulsi3))
969
970 ! r4 = aabb
971 ! r5 = ccdd
972 ! r0 = aabb*ccdd via partial products
973 !
974 ! if aa == 0 and cc = 0
975 ! r0 = bb*dd
976 !
977 ! else
978 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
979 !
980
981 GLOBAL(mulsi3):
982 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
983 mov r5,r3 ! r3 = ccdd
984 swap.w r4,r2 ! r2 = bbaa
985 xtrct r2,r3 ! r3 = aacc
986 tst r3,r3 ! msws zero ?
987 bf hiset
988 rts ! yes - then we have the answer
989 sts macl,r0
990
991 hiset: sts macl,r0 ! r0 = bb*dd
992 mulu.w r2,r5 ! brewing macl = aa*dd
993 sts macl,r1
994 mulu.w r3,r4 ! brewing macl = cc*bb
995 sts macl,r2
996 add r1,r2
997 shll16 r2
998 rts
999 add r2,r0
1000
1001 FUNC(GLOBAL(mulsi3))
1002 #endif
1003 #endif /* ! __SH5__ */
1004 #ifdef L_sdivsi3_i4
1005 .title "SH DIVIDE"
1006 !! 4 byte integer Divide code for the Renesas SH
1007 #ifdef __SH4__
1008 !! args in r4 and r5, result in fpul, clobber dr0, dr2
1009
1010 .global GLOBAL(sdivsi3_i4)
1011 FUNC(GLOBAL(sdivsi3_i4))
1012 GLOBAL(sdivsi3_i4):
1013 lds r4,fpul
1014 float fpul,dr0
1015 lds r5,fpul
1016 float fpul,dr2
1017 fdiv dr2,dr0
1018 rts
1019 ftrc dr0,fpul
1020
1021 ENDFUNC(GLOBAL(sdivsi3_i4))
1022 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1023 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1024
1025 #if ! __SH5__ || __SH5__ == 32
1026 #if __SH5__
1027 .mode SHcompact
1028 #endif
1029 .global GLOBAL(sdivsi3_i4)
1030 FUNC(GLOBAL(sdivsi3_i4))
1031 GLOBAL(sdivsi3_i4):
1032 sts.l fpscr,@-r15
1033 mov #8,r2
1034 swap.w r2,r2
1035 lds r2,fpscr
1036 lds r4,fpul
1037 float fpul,dr0
1038 lds r5,fpul
1039 float fpul,dr2
1040 fdiv dr2,dr0
1041 ftrc dr0,fpul
1042 rts
1043 lds.l @r15+,fpscr
1044
1045 ENDFUNC(GLOBAL(sdivsi3_i4))
1046 #endif /* ! __SH5__ || __SH5__ == 32 */
1047 #endif /* ! __SH4__ */
1048 #endif
1049
1050 #ifdef L_sdivsi3
1051 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1052 sh2e/sh3e code. */
1053 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1054 !!
1055 !! Steve Chamberlain
1056 !! sac@cygnus.com
1057 !!
1058 !!
1059
1060 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1061
1062 .global GLOBAL(sdivsi3)
1063 FUNC(GLOBAL(sdivsi3))
1064 #if __SHMEDIA__
1065 #if __SH5__ == 32
1066 .section .text..SHmedia32,"ax"
1067 #else
1068 .text
1069 #endif
1070 .align 2
1071 #if 0
1072 /* The assembly code that follows is a hand-optimized version of the C
1073 code that follows. Note that the registers that are modified are
1074 exactly those listed as clobbered in the patterns divsi3_i1 and
1075 divsi3_i1_media.
1076
1077 int __sdivsi3 (i, j)
1078 int i, j;
1079 {
1080 register unsigned long long r18 asm ("r18");
1081 register unsigned long long r19 asm ("r19");
1082 register unsigned long long r0 asm ("r0") = 0;
1083 register unsigned long long r1 asm ("r1") = 1;
1084 register int r2 asm ("r2") = i >> 31;
1085 register int r3 asm ("r3") = j >> 31;
1086
1087 r2 = r2 ? r2 : r1;
1088 r3 = r3 ? r3 : r1;
1089 r18 = i * r2;
1090 r19 = j * r3;
1091 r2 *= r3;
1092
1093 r19 <<= 31;
1094 r1 <<= 31;
1095 do
1096 if (r18 >= r19)
1097 r0 |= r1, r18 -= r19;
1098 while (r19 >>= 1, r1 >>= 1);
1099
1100 return r2 * (int)r0;
1101 }
1102 */
1103 GLOBAL(sdivsi3):
1104 pt/l LOCAL(sdivsi3_dontadd), tr2
1105 pt/l LOCAL(sdivsi3_loop), tr1
1106 ptabs/l r18, tr0
1107 movi 0, r0
1108 movi 1, r1
1109 shari.l r4, 31, r2
1110 shari.l r5, 31, r3
1111 cmveq r2, r1, r2
1112 cmveq r3, r1, r3
1113 muls.l r4, r2, r18
1114 muls.l r5, r3, r19
1115 muls.l r2, r3, r2
1116 shlli r19, 31, r19
1117 shlli r1, 31, r1
1118 LOCAL(sdivsi3_loop):
1119 bgtu r19, r18, tr2
1120 or r0, r1, r0
1121 sub r18, r19, r18
1122 LOCAL(sdivsi3_dontadd):
1123 shlri r1, 1, r1
1124 shlri r19, 1, r19
1125 bnei r1, 0, tr1
1126 muls.l r0, r2, r0
1127 add.l r0, r63, r0
1128 blink tr0, r63
1129 #else /* ! 0 */
1130 // inputs: r4,r5
1131 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1132 // result in r0
1133 GLOBAL(sdivsi3):
1134 // can create absolute value without extra latency,
1135 // but dependent on proper sign extension of inputs:
1136 // shari.l r5,31,r2
1137 // xor r5,r2,r20
1138 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1139 shari.l r5,31,r2
1140 ori r2,1,r2
1141 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1142 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1143 shari.l r4,31,r3
1144 nsb r20,r0
1145 shlld r20,r0,r25
1146 shlri r25,48,r25
1147 sub r19,r25,r1
1148 mmulfx.w r1,r1,r2
1149 mshflo.w r1,r63,r1
1150 // If r4 was to be used in-place instead of r21, could use this sequence
1151 // to compute absolute:
1152 // sub r63,r4,r19 // compute absolute value of r4
1153 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1154 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1155 ori r3,1,r3
1156 mmulfx.w r25,r2,r2
1157 sub r19,r0,r0
1158 muls.l r4,r3,r21
1159 msub.w r1,r2,r2
1160 addi r2,-2,r1
1161 mulu.l r21,r1,r19
1162 mmulfx.w r2,r2,r2
1163 shlli r1,15,r1
1164 shlrd r19,r0,r19
1165 mulu.l r19,r20,r3
1166 mmacnfx.wl r25,r2,r1
1167 ptabs r18,tr0
1168 sub r21,r3,r25
1169
1170 mulu.l r25,r1,r2
1171 addi r0,14,r0
1172 xor r4,r5,r18
1173 shlrd r2,r0,r2
1174 mulu.l r2,r20,r3
1175 add r19,r2,r19
1176 shari.l r18,31,r18
1177 sub r25,r3,r25
1178
1179 mulu.l r25,r1,r2
1180 sub r25,r20,r25
1181 add r19,r18,r19
1182 shlrd r2,r0,r2
1183 mulu.l r2,r20,r3
1184 addi r25,1,r25
1185 add r19,r2,r19
1186
1187 cmpgt r25,r3,r25
1188 add.l r19,r25,r0
1189 xor r0,r18,r0
1190 blink tr0,r63
1191 #endif
1192 #elif defined __SHMEDIA__
1193 /* m5compact-nofpu */
1194 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1195 .mode SHmedia
1196 .section .text..SHmedia32,"ax"
1197 .align 2
1198 GLOBAL(sdivsi3):
1199 pt/l LOCAL(sdivsi3_dontsub), tr0
1200 pt/l LOCAL(sdivsi3_loop), tr1
1201 ptabs/l r18,tr2
1202 shari.l r4,31,r18
1203 shari.l r5,31,r19
1204 xor r4,r18,r20
1205 xor r5,r19,r21
1206 sub.l r20,r18,r20
1207 sub.l r21,r19,r21
1208 xor r18,r19,r19
1209 shlli r21,32,r25
1210 addi r25,-1,r21
1211 addz.l r20,r63,r20
1212 LOCAL(sdivsi3_loop):
1213 shlli r20,1,r20
1214 bgeu/u r21,r20,tr0
1215 sub r20,r21,r20
1216 LOCAL(sdivsi3_dontsub):
1217 addi.l r25,-1,r25
1218 bnei r25,-32,tr1
1219 xor r20,r19,r20
1220 sub.l r20,r19,r0
1221 blink tr2,r63
1222 #else /* ! __SHMEDIA__ */
1223 GLOBAL(sdivsi3):
1224 mov r4,r1
1225 mov r5,r0
1226
1227 tst r0,r0
1228 bt div0
1229 mov #0,r2
1230 div0s r2,r1
1231 subc r3,r3
1232 subc r2,r1
1233 div0s r0,r3
1234 rotcl r1
1235 div1 r0,r3
1236 rotcl r1
1237 div1 r0,r3
1238 rotcl r1
1239 div1 r0,r3
1240 rotcl r1
1241 div1 r0,r3
1242 rotcl r1
1243 div1 r0,r3
1244 rotcl r1
1245 div1 r0,r3
1246 rotcl r1
1247 div1 r0,r3
1248 rotcl r1
1249 div1 r0,r3
1250 rotcl r1
1251 div1 r0,r3
1252 rotcl r1
1253 div1 r0,r3
1254 rotcl r1
1255 div1 r0,r3
1256 rotcl r1
1257 div1 r0,r3
1258 rotcl r1
1259 div1 r0,r3
1260 rotcl r1
1261 div1 r0,r3
1262 rotcl r1
1263 div1 r0,r3
1264 rotcl r1
1265 div1 r0,r3
1266 rotcl r1
1267 div1 r0,r3
1268 rotcl r1
1269 div1 r0,r3
1270 rotcl r1
1271 div1 r0,r3
1272 rotcl r1
1273 div1 r0,r3
1274 rotcl r1
1275 div1 r0,r3
1276 rotcl r1
1277 div1 r0,r3
1278 rotcl r1
1279 div1 r0,r3
1280 rotcl r1
1281 div1 r0,r3
1282 rotcl r1
1283 div1 r0,r3
1284 rotcl r1
1285 div1 r0,r3
1286 rotcl r1
1287 div1 r0,r3
1288 rotcl r1
1289 div1 r0,r3
1290 rotcl r1
1291 div1 r0,r3
1292 rotcl r1
1293 div1 r0,r3
1294 rotcl r1
1295 div1 r0,r3
1296 rotcl r1
1297 div1 r0,r3
1298 rotcl r1
1299 addc r2,r1
1300 rts
1301 mov r1,r0
1302
1303
1304 div0: rts
1305 mov #0,r0
1306
1307 ENDFUNC(GLOBAL(sdivsi3))
1308 #endif /* ! __SHMEDIA__ */
1309 #endif /* ! __SH4__ */
1310 #endif
1311 #ifdef L_udivsi3_i4
1312
1313 .title "SH DIVIDE"
1314 !! 4 byte integer Divide code for the Renesas SH
1315 #ifdef __SH4__
1316 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1317 !! and t bit
1318
1319 .global GLOBAL(udivsi3_i4)
1320 FUNC(GLOBAL(udivsi3_i4))
1321 GLOBAL(udivsi3_i4):
1322 mov #1,r1
1323 cmp/hi r1,r5
1324 bf trivial
1325 rotr r1
1326 xor r1,r4
1327 lds r4,fpul
1328 mova L1,r0
1329 #ifdef FMOVD_WORKS
1330 fmov.d @r0+,dr4
1331 #else
1332 #ifdef __LITTLE_ENDIAN__
1333 fmov.s @r0+,fr5
1334 fmov.s @r0,fr4
1335 #else
1336 fmov.s @r0+,fr4
1337 fmov.s @r0,fr5
1338 #endif
1339 #endif
1340 float fpul,dr0
1341 xor r1,r5
1342 lds r5,fpul
1343 float fpul,dr2
1344 fadd dr4,dr0
1345 fadd dr4,dr2
1346 fdiv dr2,dr0
1347 rts
1348 ftrc dr0,fpul
1349
1350 trivial:
1351 rts
1352 lds r4,fpul
1353
1354 .align 2
1355 #ifdef FMOVD_WORKS
1356 .align 3 ! make double below 8 byte aligned.
1357 #endif
1358 L1:
1359 .double 2147483648
1360
1361 ENDFUNC(GLOBAL(udivsi3_i4))
1362 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1363 #if ! __SH5__ || __SH5__ == 32
1364 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1365 .mode SHmedia
1366 .global GLOBAL(udivsi3_i4)
1367 FUNC(GLOBAL(udivsi3_i4))
1368 GLOBAL(udivsi3_i4):
1369 addz.l r4,r63,r20
1370 addz.l r5,r63,r21
1371 fmov.qd r20,dr0
1372 fmov.qd r21,dr32
1373 ptabs r18,tr0
1374 float.qd dr0,dr0
1375 float.qd dr32,dr32
1376 fdiv.d dr0,dr32,dr0
1377 ftrc.dq dr0,dr32
1378 fmov.s fr33,fr32
1379 blink tr0,r63
1380
1381 ENDFUNC(GLOBAL(udivsi3_i4))
1382 #endif /* ! __SH5__ || __SH5__ == 32 */
1383 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1384 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1385
1386 .global GLOBAL(udivsi3_i4)
1387 GLOBAL(udivsi3_i4):
1388 mov #1,r1
1389 cmp/hi r1,r5
1390 bf trivial
1391 sts.l fpscr,@-r15
1392 mova L1,r0
1393 lds.l @r0+,fpscr
1394 rotr r1
1395 xor r1,r4
1396 lds r4,fpul
1397 #ifdef FMOVD_WORKS
1398 fmov.d @r0+,dr4
1399 #else
1400 #ifdef __LITTLE_ENDIAN__
1401 fmov.s @r0+,fr5
1402 fmov.s @r0,fr4
1403 #else
1404 fmov.s @r0+,fr4
1405 fmov.s @r0,fr5
1406 #endif
1407 #endif
1408 float fpul,dr0
1409 xor r1,r5
1410 lds r5,fpul
1411 float fpul,dr2
1412 fadd dr4,dr0
1413 fadd dr4,dr2
1414 fdiv dr2,dr0
1415 ftrc dr0,fpul
1416 rts
1417 lds.l @r15+,fpscr
1418
1419 #ifdef FMOVD_WORKS
1420 .align 3 ! make double below 8 byte aligned.
1421 #endif
1422 trivial:
1423 rts
1424 lds r4,fpul
1425
1426 .align 2
1427 L1:
1428 #ifndef FMOVD_WORKS
1429 .long 0x80000
1430 #else
1431 .long 0x180000
1432 #endif
1433 .double 2147483648
1434
1435 ENDFUNC(GLOBAL(udivsi3_i4))
1436 #endif /* ! __SH4__ */
1437 #endif
1438
1439 #ifdef L_udivsi3
1440 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1441 sh2e/sh3e code. */
1442 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1443
1444 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1445 .global GLOBAL(udivsi3)
1446 FUNC(GLOBAL(udivsi3))
1447
1448 #if __SHMEDIA__
1449 #if __SH5__ == 32
1450 .section .text..SHmedia32,"ax"
1451 #else
1452 .text
1453 #endif
1454 .align 2
1455 #if 0
1456 /* The assembly code that follows is a hand-optimized version of the C
1457 code that follows. Note that the registers that are modified are
1458 exactly those listed as clobbered in the patterns udivsi3_i1 and
1459 udivsi3_i1_media.
1460
1461 unsigned
1462 __udivsi3 (i, j)
1463 unsigned i, j;
1464 {
1465 register unsigned long long r0 asm ("r0") = 0;
1466 register unsigned long long r18 asm ("r18") = 1;
1467 register unsigned long long r4 asm ("r4") = i;
1468 register unsigned long long r19 asm ("r19") = j;
1469
1470 r19 <<= 31;
1471 r18 <<= 31;
1472 do
1473 if (r4 >= r19)
1474 r0 |= r18, r4 -= r19;
1475 while (r19 >>= 1, r18 >>= 1);
1476
1477 return r0;
1478 }
1479 */
1480 GLOBAL(udivsi3):
1481 pt/l LOCAL(udivsi3_dontadd), tr2
1482 pt/l LOCAL(udivsi3_loop), tr1
1483 ptabs/l r18, tr0
1484 movi 0, r0
1485 movi 1, r18
1486 addz.l r5, r63, r19
1487 addz.l r4, r63, r4
1488 shlli r19, 31, r19
1489 shlli r18, 31, r18
1490 LOCAL(udivsi3_loop):
1491 bgtu r19, r4, tr2
1492 or r0, r18, r0
1493 sub r4, r19, r4
1494 LOCAL(udivsi3_dontadd):
1495 shlri r18, 1, r18
1496 shlri r19, 1, r19
1497 bnei r18, 0, tr1
1498 blink tr0, r63
1499 #else
1500 GLOBAL(udivsi3):
1501 // inputs: r4,r5
1502 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1503 // result in r0.
1504 addz.l r5,r63,r22
1505 nsb r22,r0
1506 shlld r22,r0,r25
1507 shlri r25,48,r25
1508 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1509 sub r20,r25,r21
1510 mmulfx.w r21,r21,r19
1511 mshflo.w r21,r63,r21
1512 ptabs r18,tr0
1513 mmulfx.w r25,r19,r19
1514 sub r20,r0,r0
1515 /* bubble */
1516 msub.w r21,r19,r19
1517 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1518 before the msub.w, but we need a different value for
1519 r19 to keep errors under control. */
1520 mulu.l r4,r21,r18
1521 mmulfx.w r19,r19,r19
1522 shlli r21,15,r21
1523 shlrd r18,r0,r18
1524 mulu.l r18,r22,r20
1525 mmacnfx.wl r25,r19,r21
1526 /* bubble */
1527 sub r4,r20,r25
1528
1529 mulu.l r25,r21,r19
1530 addi r0,14,r0
1531 /* bubble */
1532 shlrd r19,r0,r19
1533 mulu.l r19,r22,r20
1534 add r18,r19,r18
1535 /* bubble */
1536 sub.l r25,r20,r25
1537
1538 mulu.l r25,r21,r19
1539 addz.l r25,r63,r25
1540 sub r25,r22,r25
1541 shlrd r19,r0,r19
1542 mulu.l r19,r22,r20
1543 addi r25,1,r25
1544 add r18,r19,r18
1545
1546 cmpgt r25,r20,r25
1547 add.l r18,r25,r0
1548 blink tr0,r63
1549 #endif
1550 #elif defined (__SHMEDIA__)
1551 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1552 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1553 So use a short shmedia loop. */
1554 // clobbered: r20,r21,r25,tr0,tr1,tr2
1555 .mode SHmedia
1556 .section .text..SHmedia32,"ax"
1557 .align 2
1558 GLOBAL(udivsi3):
1559 pt/l LOCAL(udivsi3_dontsub), tr0
1560 pt/l LOCAL(udivsi3_loop), tr1
1561 ptabs/l r18,tr2
1562 shlli r5,32,r25
1563 addi r25,-1,r21
1564 addz.l r4,r63,r20
1565 LOCAL(udivsi3_loop):
1566 shlli r20,1,r20
1567 bgeu/u r21,r20,tr0
1568 sub r20,r21,r20
1569 LOCAL(udivsi3_dontsub):
1570 addi.l r25,-1,r25
1571 bnei r25,-32,tr1
1572 add.l r20,r63,r0
1573 blink tr2,r63
1574 #else /* ! defined (__SHMEDIA__) */
1575 LOCAL(div8):
1576 div1 r5,r4
1577 LOCAL(div7):
1578 div1 r5,r4; div1 r5,r4; div1 r5,r4
1579 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1580
1581 LOCAL(divx4):
1582 div1 r5,r4; rotcl r0
1583 div1 r5,r4; rotcl r0
1584 div1 r5,r4; rotcl r0
1585 rts; div1 r5,r4
1586
1587 GLOBAL(udivsi3):
1588 sts.l pr,@-r15
1589 extu.w r5,r0
1590 cmp/eq r5,r0
1591 #ifdef __sh1__
1592 bf LOCAL(large_divisor)
1593 #else
1594 bf/s LOCAL(large_divisor)
1595 #endif
1596 div0u
1597 swap.w r4,r0
1598 shlr16 r4
1599 bsr LOCAL(div8)
1600 shll16 r5
1601 bsr LOCAL(div7)
1602 div1 r5,r4
1603 xtrct r4,r0
1604 xtrct r0,r4
1605 bsr LOCAL(div8)
1606 swap.w r4,r4
1607 bsr LOCAL(div7)
1608 div1 r5,r4
1609 lds.l @r15+,pr
1610 xtrct r4,r0
1611 swap.w r0,r0
1612 rotcl r0
1613 rts
1614 shlr16 r5
1615
1616 LOCAL(large_divisor):
1617 #ifdef __sh1__
1618 div0u
1619 #endif
1620 mov #0,r0
1621 xtrct r4,r0
1622 xtrct r0,r4
1623 bsr LOCAL(divx4)
1624 rotcl r0
1625 bsr LOCAL(divx4)
1626 rotcl r0
1627 bsr LOCAL(divx4)
1628 rotcl r0
1629 bsr LOCAL(divx4)
1630 rotcl r0
1631 lds.l @r15+,pr
1632 rts
1633 rotcl r0
1634
1635 ENDFUNC(GLOBAL(udivsi3))
1636 #endif /* ! __SHMEDIA__ */
1637 #endif /* __SH4__ */
1638 #endif /* L_udivsi3 */
1639
1640 #ifdef L_udivdi3
1641 #ifdef __SHMEDIA__
1642 .mode SHmedia
1643 .section .text..SHmedia32,"ax"
1644 .align 2
1645 .global GLOBAL(udivdi3)
1646 FUNC(GLOBAL(udivdi3))
1647 GLOBAL(udivdi3):
1648 shlri r3,1,r4
1649 nsb r4,r22
1650 shlld r3,r22,r6
1651 shlri r6,49,r5
1652 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1653 sub r21,r5,r1
1654 mmulfx.w r1,r1,r4
1655 mshflo.w r1,r63,r1
1656 sub r63,r22,r20 // r63 == 64 % 64
1657 mmulfx.w r5,r4,r4
1658 pta LOCAL(large_divisor),tr0
1659 addi r20,32,r9
1660 msub.w r1,r4,r1
1661 madd.w r1,r1,r1
1662 mmulfx.w r1,r1,r4
1663 shlri r6,32,r7
1664 bgt/u r9,r63,tr0 // large_divisor
1665 mmulfx.w r5,r4,r4
1666 shlri r2,32+14,r19
1667 addi r22,-31,r0
1668 msub.w r1,r4,r1
1669
1670 mulu.l r1,r7,r4
1671 addi r1,-3,r5
1672 mulu.l r5,r19,r5
1673 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1674 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1675 the case may be, %0000000000000000 000.11111111111, still */
1676 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1677 mulu.l r5,r3,r8
1678 mshalds.l r1,r21,r1
1679 shari r4,26,r4
1680 shlld r8,r0,r8
1681 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1682 sub r2,r8,r2
1683 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1684
1685 shlri r2,22,r21
1686 mulu.l r21,r1,r21
1687 shlld r5,r0,r8
1688 addi r20,30-22,r0
1689 shlrd r21,r0,r21
1690 mulu.l r21,r3,r5
1691 add r8,r21,r8
1692 mcmpgt.l r21,r63,r21 // See Note 1
1693 addi r20,30,r0
1694 mshfhi.l r63,r21,r21
1695 sub r2,r5,r2
1696 andc r2,r21,r2
1697
1698 /* small divisor: need a third divide step */
1699 mulu.l r2,r1,r7
1700 ptabs r18,tr0
1701 addi r2,1,r2
1702 shlrd r7,r0,r7
1703 mulu.l r7,r3,r5
1704 add r8,r7,r8
1705 sub r2,r3,r2
1706 cmpgt r2,r5,r5
1707 add r8,r5,r2
1708 /* could test r3 here to check for divide by zero. */
1709 blink tr0,r63
1710
1711 LOCAL(large_divisor):
1712 mmulfx.w r5,r4,r4
1713 shlrd r2,r9,r25
1714 shlri r25,32,r8
1715 msub.w r1,r4,r1
1716
1717 mulu.l r1,r7,r4
1718 addi r1,-3,r5
1719 mulu.l r5,r8,r5
1720 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1721 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1722 the case may be, %0000000000000000 000.11111111111, still */
1723 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1724 shlri r5,14-1,r8
1725 mulu.l r8,r7,r5
1726 mshalds.l r1,r21,r1
1727 shari r4,26,r4
1728 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1729 sub r25,r5,r25
1730 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1731
1732 shlri r25,22,r21
1733 mulu.l r21,r1,r21
1734 pta LOCAL(no_lo_adj),tr0
1735 addi r22,32,r0
1736 shlri r21,40,r21
1737 mulu.l r21,r7,r5
1738 add r8,r21,r8
1739 shlld r2,r0,r2
1740 sub r25,r5,r25
1741 bgtu/u r7,r25,tr0 // no_lo_adj
1742 addi r8,1,r8
1743 sub r25,r7,r25
1744 LOCAL(no_lo_adj):
1745 mextr4 r2,r25,r2
1746
1747 /* large_divisor: only needs a few adjustments. */
1748 mulu.l r8,r6,r5
1749 ptabs r18,tr0
1750 /* bubble */
1751 cmpgtu r5,r2,r5
1752 sub r8,r5,r2
1753 blink tr0,r63
1754 ENDFUNC(GLOBAL(udivdi3))
1755 /* Note 1: To shift the result of the second divide stage so that the result
1756 always fits into 32 bits, yet we still reduce the rest sufficiently
1757 would require a lot of instructions to do the shifts just right. Using
1758 the full 64 bit shift result to multiply with the divisor would require
1759 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1760 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1761 know that the rest after taking this partial result into account will
1762 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1763 upper 32 bits of the partial result are nonzero. */
1764 #endif /* __SHMEDIA__ */
1765 #endif /* L_udivdi3 */
1766
1767 #ifdef L_divdi3
1768 #ifdef __SHMEDIA__
1769 .mode SHmedia
1770 .section .text..SHmedia32,"ax"
1771 .align 2
1772 .global GLOBAL(divdi3)
1773 FUNC(GLOBAL(divdi3))
1774 GLOBAL(divdi3):
1775 pta GLOBAL(udivdi3),tr0
1776 shari r2,63,r22
1777 shari r3,63,r23
1778 xor r2,r22,r2
1779 xor r3,r23,r3
1780 sub r2,r22,r2
1781 sub r3,r23,r3
1782 beq/u r22,r23,tr0
1783 ptabs r18,tr1
1784 blink tr0,r18
1785 sub r63,r2,r2
1786 blink tr1,r63
1787 ENDFUNC(GLOBAL(divdi3))
1788 #endif /* __SHMEDIA__ */
1789 #endif /* L_divdi3 */
1790
1791 #ifdef L_umoddi3
1792 #ifdef __SHMEDIA__
1793 .mode SHmedia
1794 .section .text..SHmedia32,"ax"
1795 .align 2
1796 .global GLOBAL(umoddi3)
1797 FUNC(GLOBAL(umoddi3))
1798 GLOBAL(umoddi3):
1799 shlri r3,1,r4
1800 nsb r4,r22
1801 shlld r3,r22,r6
1802 shlri r6,49,r5
1803 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1804 sub r21,r5,r1
1805 mmulfx.w r1,r1,r4
1806 mshflo.w r1,r63,r1
1807 sub r63,r22,r20 // r63 == 64 % 64
1808 mmulfx.w r5,r4,r4
1809 pta LOCAL(large_divisor),tr0
1810 addi r20,32,r9
1811 msub.w r1,r4,r1
1812 madd.w r1,r1,r1
1813 mmulfx.w r1,r1,r4
1814 shlri r6,32,r7
1815 bgt/u r9,r63,tr0 // large_divisor
1816 mmulfx.w r5,r4,r4
1817 shlri r2,32+14,r19
1818 addi r22,-31,r0
1819 msub.w r1,r4,r1
1820
1821 mulu.l r1,r7,r4
1822 addi r1,-3,r5
1823 mulu.l r5,r19,r5
1824 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1825 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1826 the case may be, %0000000000000000 000.11111111111, still */
1827 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1828 mulu.l r5,r3,r5
1829 mshalds.l r1,r21,r1
1830 shari r4,26,r4
1831 shlld r5,r0,r5
1832 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1833 sub r2,r5,r2
1834 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1835
1836 shlri r2,22,r21
1837 mulu.l r21,r1,r21
1838 addi r20,30-22,r0
1839 /* bubble */ /* could test r3 here to check for divide by zero. */
1840 shlrd r21,r0,r21
1841 mulu.l r21,r3,r5
1842 mcmpgt.l r21,r63,r21 // See Note 1
1843 addi r20,30,r0
1844 mshfhi.l r63,r21,r21
1845 sub r2,r5,r2
1846 andc r2,r21,r2
1847
1848 /* small divisor: need a third divide step */
1849 mulu.l r2,r1,r7
1850 ptabs r18,tr0
1851 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1852 shlrd r7,r0,r7
1853 mulu.l r7,r3,r5
1854 /* bubble */
1855 addi r8,1,r7
1856 cmpgt r7,r5,r7
1857 cmvne r7,r8,r2
1858 sub r2,r5,r2
1859 blink tr0,r63
1860
1861 LOCAL(large_divisor):
1862 mmulfx.w r5,r4,r4
1863 shlrd r2,r9,r25
1864 shlri r25,32,r8
1865 msub.w r1,r4,r1
1866
1867 mulu.l r1,r7,r4
1868 addi r1,-3,r5
1869 mulu.l r5,r8,r5
1870 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1871 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1872 the case may be, %0000000000000000 000.11111111111, still */
1873 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1874 shlri r5,14-1,r8
1875 mulu.l r8,r7,r5
1876 mshalds.l r1,r21,r1
1877 shari r4,26,r4
1878 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1879 sub r25,r5,r25
1880 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1881
1882 shlri r25,22,r21
1883 mulu.l r21,r1,r21
1884 pta LOCAL(no_lo_adj),tr0
1885 addi r22,32,r0
1886 shlri r21,40,r21
1887 mulu.l r21,r7,r5
1888 add r8,r21,r8
1889 shlld r2,r0,r2
1890 sub r25,r5,r25
1891 bgtu/u r7,r25,tr0 // no_lo_adj
1892 addi r8,1,r8
1893 sub r25,r7,r25
1894 LOCAL(no_lo_adj):
1895 mextr4 r2,r25,r2
1896
1897 /* large_divisor: only needs a few adjustments. */
1898 mulu.l r8,r6,r5
1899 ptabs r18,tr0
1900 add r2,r6,r7
1901 cmpgtu r5,r2,r8
1902 cmvne r8,r7,r2
1903 sub r2,r5,r2
1904 shlrd r2,r22,r2
1905 blink tr0,r63
1906 ENDFUNC(GLOBAL(umoddi3))
1907 /* Note 1: To shift the result of the second divide stage so that the result
1908 always fits into 32 bits, yet we still reduce the rest sufficiently
1909 would require a lot of instructions to do the shifts just right. Using
1910 the full 64 bit shift result to multiply with the divisor would require
1911 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1912 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1913 know that the rest after taking this partial result into account will
1914 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1915 upper 32 bits of the partial result are nonzero. */
1916 #endif /* __SHMEDIA__ */
1917 #endif /* L_umoddi3 */
1918
1919 #ifdef L_moddi3
1920 #ifdef __SHMEDIA__
1921 .mode SHmedia
1922 .section .text..SHmedia32,"ax"
1923 .align 2
1924 .global GLOBAL(moddi3)
1925 FUNC(GLOBAL(moddi3))
1926 GLOBAL(moddi3):
1927 pta GLOBAL(umoddi3),tr0
1928 shari r2,63,r22
1929 shari r3,63,r23
1930 xor r2,r22,r2
1931 xor r3,r23,r3
1932 sub r2,r22,r2
1933 sub r3,r23,r3
1934 beq/u r22,r63,tr0
1935 ptabs r18,tr1
1936 blink tr0,r18
1937 sub r63,r2,r2
1938 blink tr1,r63
1939 ENDFUNC(GLOBAL(moddi3))
1940 #endif /* __SHMEDIA__ */
1941 #endif /* L_moddi3 */
1942
1943 #ifdef L_set_fpscr
1944 #if !defined (__SH2A_NOFPU__)
1945 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1946 #ifdef __SH5__
1947 .mode SHcompact
1948 #endif
1949 .global GLOBAL(set_fpscr)
1950 FUNC(GLOBAL(set_fpscr))
1951 GLOBAL(set_fpscr):
1952 lds r4,fpscr
1953 #ifdef __PIC__
1954 mov.l r12,@-r15
1955 mova LOCAL(set_fpscr_L0),r0
1956 mov.l LOCAL(set_fpscr_L0),r12
1957 add r0,r12
1958 mov.l LOCAL(set_fpscr_L1),r0
1959 mov.l @(r0,r12),r1
1960 mov.l @r15+,r12
1961 #else
1962 mov.l LOCAL(set_fpscr_L1),r1
1963 #endif
1964 swap.w r4,r0
1965 or #24,r0
1966 #ifndef FMOVD_WORKS
1967 xor #16,r0
1968 #endif
1969 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1970 swap.w r0,r3
1971 mov.l r3,@(4,r1)
1972 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1973 swap.w r0,r2
1974 mov.l r2,@r1
1975 #endif
1976 #ifndef FMOVD_WORKS
1977 xor #8,r0
1978 #else
1979 xor #24,r0
1980 #endif
1981 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1982 swap.w r0,r2
1983 rts
1984 mov.l r2,@r1
1985 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1986 swap.w r0,r3
1987 rts
1988 mov.l r3,@(4,r1)
1989 #endif
1990 .align 2
1991 #ifdef __PIC__
1992 LOCAL(set_fpscr_L0):
1993 .long _GLOBAL_OFFSET_TABLE_
1994 LOCAL(set_fpscr_L1):
1995 .long GLOBAL(fpscr_values@GOT)
1996 #else
1997 LOCAL(set_fpscr_L1):
1998 .long GLOBAL(fpscr_values)
1999 #endif
2000
2001 ENDFUNC(GLOBAL(set_fpscr))
2002 #ifndef NO_FPSCR_VALUES
2003 #ifdef __ELF__
2004 .comm GLOBAL(fpscr_values),8,4
2005 #else
2006 .comm GLOBAL(fpscr_values),8
2007 #endif /* ELF */
2008 #endif /* NO_FPSCR_VALUES */
2009 #endif /* SH2E / SH3E / SH4 */
2010 #endif /* __SH2A_NOFPU__ */
2011 #endif /* L_set_fpscr */
2012 #ifdef L_ic_invalidate
2013 #if __SH5__ == 32
2014 .mode SHmedia
2015 .section .text..SHmedia32,"ax"
2016 .align 2
2017 .global GLOBAL(init_trampoline)
2018 FUNC(GLOBAL(init_trampoline))
2019 GLOBAL(init_trampoline):
2020 st.l r0,8,r2
2021 #ifdef __LITTLE_ENDIAN__
2022 movi 9,r20
2023 shori 0x402b,r20
2024 shori 0xd101,r20
2025 shori 0xd002,r20
2026 #else
2027 movi 0xffffffffffffd002,r20
2028 shori 0xd101,r20
2029 shori 0x402b,r20
2030 shori 9,r20
2031 #endif
2032 st.q r0,0,r20
2033 st.l r0,12,r3
2034 .global GLOBAL(ic_invalidate)
2035 FUNC(GLOBAL(ic_invalidate))
2036 GLOBAL(ic_invalidate):
2037 ocbwb r0,0
2038 synco
2039 icbi r0, 0
2040 ptabs r18, tr0
2041 synci
2042 blink tr0, r63
2043
2044 ENDFUNC(GLOBAL(ic_invalidate))
2045 ENDFUNC(GLOBAL(init_trampoline))
2046 #elif defined(__SH4A__)
2047 .global GLOBAL(ic_invalidate)
2048 FUNC(GLOBAL(ic_invalidate))
2049 GLOBAL(ic_invalidate):
2050 ocbwb @r4
2051 synco
2052 rts
2053 icbi @r4
2054 ENDFUNC(GLOBAL(ic_invalidate))
2055 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
2056 /* This assumes a direct-mapped cache, which is the case for
2057 the first SH4, but not for the second version of SH4, that
2058 uses a 2-way set-associative cache, nor SH4a, that is 4-way.
2059 SH4a fortunately offers an instruction to invalidate the
2060 instruction cache, and we use it above, but SH4 doesn't.
2061 However, since the libraries don't contain any nested
2062 functions (the only case in which GCC would emit this pattern)
2063 and we actually emit the ic_invalidate_line_i pattern for
2064 cache invalidation on all SH4 multilibs (even 4-nofpu, that
2065 isn't even corevered here), and pre-SH4 cores don't have
2066 caches, it seems like this code is pointless, unless it's
2067 meant for backward binary compatibility or for userland-only
2068 cache invalidation for say sh4-*-linux-gnu. Such a feature
2069 should probably be moved into a system call, such that the
2070 kernel could do whatever it takes to invalidate a cache line
2071 on the core it's actually running on. I.e., this hideous :-)
2072 piece of code should go away at some point. */
2073
2074 .global GLOBAL(ic_invalidate)
2075 FUNC(GLOBAL(ic_invalidate))
2076 GLOBAL(ic_invalidate):
2077 ocbwb @r4
2078 mova 0f,r0
2079 mov.w 1f,r1
2080 /* Compute how many cache lines 0f is away from r4. */
2081 sub r0,r4
2082 and r1,r4
2083 /* Prepare to branch to 0f plus the cache-line offset. */
2084 add # 0f - 1f,r4
2085 braf r4
2086 nop
2087 1:
2088 .short 0x1fe0
2089 .p2align 5
2090 /* This must be aligned to the beginning of a cache line. */
2091 0:
2092 .rept 256 /* There are 256 cache lines of 32 bytes. */
2093 rts
2094 .rept 15
2095 nop
2096 .endr
2097 .endr
2098
2099 ENDFUNC(GLOBAL(ic_invalidate))
2100 #endif /* SH4 */
2101 #endif /* L_ic_invalidate */
2102
2103 #if defined (__SH5__) && __SH5__ == 32
2104 #ifdef L_shcompact_call_trampoline
2105 .section .rodata
2106 .align 1
2107 LOCAL(ct_main_table):
2108 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2109 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2110 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2111 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2112 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2113 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2114 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2115 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2116 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2117 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2118 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2119 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2120 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2121 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2122 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2123 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2124 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2125 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2126 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2127 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2128 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2129 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2130 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2131 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2132 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2133 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2134 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2135 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2136 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2137 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2138 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2139 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2140 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2141 .mode SHmedia
2142 .section .text..SHmedia32, "ax"
2143 .align 2
2144
2145 /* This function loads 64-bit general-purpose registers from the
2146 stack, from a memory address contained in them or from an FP
2147 register, according to a cookie passed in r1. Its execution
2148 time is linear on the number of registers that actually have
2149 to be copied. See sh.h for details on the actual bit pattern.
2150
2151 The function to be called is passed in r0. If a 32-bit return
2152 value is expected, the actual function will be tail-called,
2153 otherwise the return address will be stored in r10 (that the
2154 caller should expect to be clobbered) and the return value
2155 will be expanded into r2/r3 upon return. */
2156
2157 .global GLOBAL(GCC_shcompact_call_trampoline)
2158 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2159 GLOBAL(GCC_shcompact_call_trampoline):
2160 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2161 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2162 pt/l LOCAL(ct_loop), tr1
2163 addz.l r1, r63, r1
2164 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2165 LOCAL(ct_loop):
2166 nsb r1, r28
2167 shlli r28, 1, r29
2168 ldx.w r0, r29, r30
2169 LOCAL(ct_main_label):
2170 ptrel/l r30, tr2
2171 blink tr2, r63
2172 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2173 /* It must be dr0, so just do it. */
2174 fmov.dq dr0, r2
2175 movi 7, r30
2176 shlli r30, 29, r31
2177 andc r1, r31, r1
2178 blink tr1, r63
2179 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2180 /* It is either dr0 or dr2. */
2181 movi 7, r30
2182 shlri r1, 26, r32
2183 shlli r30, 26, r31
2184 andc r1, r31, r1
2185 fmov.dq dr0, r3
2186 beqi/l r32, 4, tr1
2187 fmov.dq dr2, r3
2188 blink tr1, r63
2189 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2190 shlri r1, 23 - 3, r34
2191 andi r34, 3 << 3, r33
2192 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2193 LOCAL(ct_r4_fp_base):
2194 ptrel/l r32, tr2
2195 movi 7, r30
2196 shlli r30, 23, r31
2197 andc r1, r31, r1
2198 blink tr2, r63
2199 LOCAL(ct_r4_fp_copy):
2200 fmov.dq dr0, r4
2201 blink tr1, r63
2202 fmov.dq dr2, r4
2203 blink tr1, r63
2204 fmov.dq dr4, r4
2205 blink tr1, r63
2206 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2207 shlri r1, 20 - 3, r34
2208 andi r34, 3 << 3, r33
2209 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2210 LOCAL(ct_r5_fp_base):
2211 ptrel/l r32, tr2
2212 movi 7, r30
2213 shlli r30, 20, r31
2214 andc r1, r31, r1
2215 blink tr2, r63
2216 LOCAL(ct_r5_fp_copy):
2217 fmov.dq dr0, r5
2218 blink tr1, r63
2219 fmov.dq dr2, r5
2220 blink tr1, r63
2221 fmov.dq dr4, r5
2222 blink tr1, r63
2223 fmov.dq dr6, r5
2224 blink tr1, r63
2225 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2226 /* It must be dr8. */
2227 fmov.dq dr8, r6
2228 movi 15, r30
2229 shlli r30, 16, r31
2230 andc r1, r31, r1
2231 blink tr1, r63
2232 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2233 shlri r1, 16 - 3, r34
2234 andi r34, 3 << 3, r33
2235 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2236 LOCAL(ct_r6_fp_base):
2237 ptrel/l r32, tr2
2238 movi 7, r30
2239 shlli r30, 16, r31
2240 andc r1, r31, r1
2241 blink tr2, r63
2242 LOCAL(ct_r6_fp_copy):
2243 fmov.dq dr0, r6
2244 blink tr1, r63
2245 fmov.dq dr2, r6
2246 blink tr1, r63
2247 fmov.dq dr4, r6
2248 blink tr1, r63
2249 fmov.dq dr6, r6
2250 blink tr1, r63
2251 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2252 /* It is either dr8 or dr10. */
2253 movi 15 << 12, r31
2254 shlri r1, 12, r32
2255 andc r1, r31, r1
2256 fmov.dq dr8, r7
2257 beqi/l r32, 8, tr1
2258 fmov.dq dr10, r7
2259 blink tr1, r63
2260 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2261 shlri r1, 12 - 3, r34
2262 andi r34, 3 << 3, r33
2263 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2264 LOCAL(ct_r7_fp_base):
2265 ptrel/l r32, tr2
2266 movi 7 << 12, r31
2267 andc r1, r31, r1
2268 blink tr2, r63
2269 LOCAL(ct_r7_fp_copy):
2270 fmov.dq dr0, r7
2271 blink tr1, r63
2272 fmov.dq dr2, r7
2273 blink tr1, r63
2274 fmov.dq dr4, r7
2275 blink tr1, r63
2276 fmov.dq dr6, r7
2277 blink tr1, r63
2278 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2279 /* It is either dr8 or dr10. */
2280 movi 15 << 8, r31
2281 andi r1, 1 << 8, r32
2282 andc r1, r31, r1
2283 fmov.dq dr8, r8
2284 beq/l r32, r63, tr1
2285 fmov.dq dr10, r8
2286 blink tr1, r63
2287 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2288 shlri r1, 8 - 3, r34
2289 andi r34, 3 << 3, r33
2290 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2291 LOCAL(ct_r8_fp_base):
2292 ptrel/l r32, tr2
2293 movi 7 << 8, r31
2294 andc r1, r31, r1
2295 blink tr2, r63
2296 LOCAL(ct_r8_fp_copy):
2297 fmov.dq dr0, r8
2298 blink tr1, r63
2299 fmov.dq dr2, r8
2300 blink tr1, r63
2301 fmov.dq dr4, r8
2302 blink tr1, r63
2303 fmov.dq dr6, r8
2304 blink tr1, r63
2305 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2306 /* It is either dr8 or dr10. */
2307 movi 15 << 4, r31
2308 andi r1, 1 << 4, r32
2309 andc r1, r31, r1
2310 fmov.dq dr8, r9
2311 beq/l r32, r63, tr1
2312 fmov.dq dr10, r9
2313 blink tr1, r63
2314 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2315 shlri r1, 4 - 3, r34
2316 andi r34, 3 << 3, r33
2317 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2318 LOCAL(ct_r9_fp_base):
2319 ptrel/l r32, tr2
2320 movi 7 << 4, r31
2321 andc r1, r31, r1
2322 blink tr2, r63
2323 LOCAL(ct_r9_fp_copy):
2324 fmov.dq dr0, r9
2325 blink tr1, r63
2326 fmov.dq dr2, r9
2327 blink tr1, r63
2328 fmov.dq dr4, r9
2329 blink tr1, r63
2330 fmov.dq dr6, r9
2331 blink tr1, r63
2332 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2333 pt/l LOCAL(ct_r2_load), tr2
2334 movi 3, r30
2335 shlli r30, 29, r31
2336 and r1, r31, r32
2337 andc r1, r31, r1
2338 beq/l r31, r32, tr2
2339 addi.l r2, 8, r3
2340 ldx.q r2, r63, r2
2341 /* Fall through. */
2342 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2343 pt/l LOCAL(ct_r3_load), tr2
2344 movi 3, r30
2345 shlli r30, 26, r31
2346 and r1, r31, r32
2347 andc r1, r31, r1
2348 beq/l r31, r32, tr2
2349 addi.l r3, 8, r4
2350 ldx.q r3, r63, r3
2351 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2352 pt/l LOCAL(ct_r4_load), tr2
2353 movi 3, r30
2354 shlli r30, 23, r31
2355 and r1, r31, r32
2356 andc r1, r31, r1
2357 beq/l r31, r32, tr2
2358 addi.l r4, 8, r5
2359 ldx.q r4, r63, r4
2360 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2361 pt/l LOCAL(ct_r5_load), tr2
2362 movi 3, r30
2363 shlli r30, 20, r31
2364 and r1, r31, r32
2365 andc r1, r31, r1
2366 beq/l r31, r32, tr2
2367 addi.l r5, 8, r6
2368 ldx.q r5, r63, r5
2369 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2370 pt/l LOCAL(ct_r6_load), tr2
2371 movi 3 << 16, r31
2372 and r1, r31, r32
2373 andc r1, r31, r1
2374 beq/l r31, r32, tr2
2375 addi.l r6, 8, r7
2376 ldx.q r6, r63, r6
2377 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2378 pt/l LOCAL(ct_r7_load), tr2
2379 movi 3 << 12, r31
2380 and r1, r31, r32
2381 andc r1, r31, r1
2382 beq/l r31, r32, tr2
2383 addi.l r7, 8, r8
2384 ldx.q r7, r63, r7
2385 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2386 pt/l LOCAL(ct_r8_load), tr2
2387 movi 3 << 8, r31
2388 and r1, r31, r32
2389 andc r1, r31, r1
2390 beq/l r31, r32, tr2
2391 addi.l r8, 8, r9
2392 ldx.q r8, r63, r8
2393 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2394 pt/l LOCAL(ct_check_tramp), tr2
2395 ldx.q r9, r63, r9
2396 blink tr2, r63
2397 LOCAL(ct_r2_load):
2398 ldx.q r2, r63, r2
2399 blink tr1, r63
2400 LOCAL(ct_r3_load):
2401 ldx.q r3, r63, r3
2402 blink tr1, r63
2403 LOCAL(ct_r4_load):
2404 ldx.q r4, r63, r4
2405 blink tr1, r63
2406 LOCAL(ct_r5_load):
2407 ldx.q r5, r63, r5
2408 blink tr1, r63
2409 LOCAL(ct_r6_load):
2410 ldx.q r6, r63, r6
2411 blink tr1, r63
2412 LOCAL(ct_r7_load):
2413 ldx.q r7, r63, r7
2414 blink tr1, r63
2415 LOCAL(ct_r8_load):
2416 ldx.q r8, r63, r8
2417 blink tr1, r63
2418 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2419 movi 1, r30
2420 ldx.q r15, r63, r2
2421 shlli r30, 29, r31
2422 addi.l r15, 8, r15
2423 andc r1, r31, r1
2424 blink tr1, r63
2425 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2426 movi 1, r30
2427 ldx.q r15, r63, r3
2428 shlli r30, 26, r31
2429 addi.l r15, 8, r15
2430 andc r1, r31, r1
2431 blink tr1, r63
2432 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2433 movi 1, r30
2434 ldx.q r15, r63, r4
2435 shlli r30, 23, r31
2436 addi.l r15, 8, r15
2437 andc r1, r31, r1
2438 blink tr1, r63
2439 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2440 movi 1, r30
2441 ldx.q r15, r63, r5
2442 shlli r30, 20, r31
2443 addi.l r15, 8, r15
2444 andc r1, r31, r1
2445 blink tr1, r63
2446 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2447 movi 1, r30
2448 ldx.q r15, r63, r6
2449 shlli r30, 16, r31
2450 addi.l r15, 8, r15
2451 andc r1, r31, r1
2452 blink tr1, r63
2453 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2454 ldx.q r15, r63, r7
2455 movi 1 << 12, r31
2456 addi.l r15, 8, r15
2457 andc r1, r31, r1
2458 blink tr1, r63
2459 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2460 ldx.q r15, r63, r8
2461 movi 1 << 8, r31
2462 addi.l r15, 8, r15
2463 andc r1, r31, r1
2464 blink tr1, r63
2465 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2466 andi r1, 7 << 1, r30
2467 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2468 shlli r30, 2, r31
2469 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2470 sub.l r32, r31, r33
2471 ptabs/l r33, tr2
2472 blink tr2, r63
2473 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2474 ldx.q r15, r63, r3
2475 addi.l r15, 8, r15
2476 ldx.q r15, r63, r4
2477 addi.l r15, 8, r15
2478 ldx.q r15, r63, r5
2479 addi.l r15, 8, r15
2480 ldx.q r15, r63, r6
2481 addi.l r15, 8, r15
2482 ldx.q r15, r63, r7
2483 addi.l r15, 8, r15
2484 ldx.q r15, r63, r8
2485 addi.l r15, 8, r15
2486 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2487 ldx.q r15, r63, r9
2488 addi.l r15, 8, r15
2489 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2490 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2491 pt/u LOCAL(ct_ret_wide), tr2
2492 andi r1, 1, r1
2493 bne/u r1, r63, tr2
2494 LOCAL(ct_call_func): /* Just branch to the function. */
2495 blink tr0, r63
2496 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2497 64-bit return value. */
2498 add.l r18, r63, r10
2499 blink tr0, r18
2500 ptabs r10, tr0
2501 #if __LITTLE_ENDIAN__
2502 shari r2, 32, r3
2503 add.l r2, r63, r2
2504 #else
2505 add.l r2, r63, r3
2506 shari r2, 32, r2
2507 #endif
2508 blink tr0, r63
2509
2510 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2511 #endif /* L_shcompact_call_trampoline */
2512
2513 #ifdef L_shcompact_return_trampoline
2514 /* This function does the converse of the code in `ret_wide'
2515 above. It is tail-called by SHcompact functions returning
2516 64-bit non-floating-point values, to pack the 32-bit values in
2517 r2 and r3 into r2. */
2518
2519 .mode SHmedia
2520 .section .text..SHmedia32, "ax"
2521 .align 2
2522 .global GLOBAL(GCC_shcompact_return_trampoline)
2523 FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2524 GLOBAL(GCC_shcompact_return_trampoline):
2525 ptabs/l r18, tr0
2526 #if __LITTLE_ENDIAN__
2527 addz.l r2, r63, r2
2528 shlli r3, 32, r3
2529 #else
2530 addz.l r3, r63, r3
2531 shlli r2, 32, r2
2532 #endif
2533 or r3, r2, r2
2534 blink tr0, r63
2535
2536 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2537 #endif /* L_shcompact_return_trampoline */
2538
2539 #ifdef L_shcompact_incoming_args
2540 .section .rodata
2541 .align 1
2542 LOCAL(ia_main_table):
2543 .word 1 /* Invalid, just loop */
2544 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2545 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2546 .word 1 /* Invalid, just loop */
2547 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2548 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2549 .word 1 /* Invalid, just loop */
2550 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2551 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2552 .word 1 /* Invalid, just loop */
2553 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2554 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2555 .word 1 /* Invalid, just loop */
2556 .word 1 /* Invalid, just loop */
2557 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2558 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2559 .word 1 /* Invalid, just loop */
2560 .word 1 /* Invalid, just loop */
2561 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2562 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2563 .word 1 /* Invalid, just loop */
2564 .word 1 /* Invalid, just loop */
2565 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2566 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2567 .word 1 /* Invalid, just loop */
2568 .word 1 /* Invalid, just loop */
2569 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2570 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2571 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2572 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2573 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2574 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2575 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2576 .mode SHmedia
2577 .section .text..SHmedia32, "ax"
2578 .align 2
2579
2580 /* This function stores 64-bit general-purpose registers back in
2581 the stack, and loads the address in which each register
2582 was stored into itself. The lower 32 bits of r17 hold the address
2583 to begin storing, and the upper 32 bits of r17 hold the cookie.
2584 Its execution time is linear on the
2585 number of registers that actually have to be copied, and it is
2586 optimized for structures larger than 64 bits, as opposed to
2587 individual `long long' arguments. See sh.h for details on the
2588 actual bit pattern. */
2589
2590 .global GLOBAL(GCC_shcompact_incoming_args)
2591 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2592 GLOBAL(GCC_shcompact_incoming_args):
2593 ptabs/l r18, tr0 /* Prepare to return. */
2594 shlri r17, 32, r0 /* Load the cookie. */
2595 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2596 pt/l LOCAL(ia_loop), tr1
2597 add.l r17, r63, r17
2598 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2599 LOCAL(ia_loop):
2600 nsb r0, r36
2601 shlli r36, 1, r37
2602 ldx.w r43, r37, r38
2603 LOCAL(ia_main_label):
2604 ptrel/l r38, tr2
2605 blink tr2, r63
2606 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2607 movi 3, r38
2608 shlli r38, 29, r39
2609 and r0, r39, r40
2610 andc r0, r39, r0
2611 stx.q r17, r63, r2
2612 add.l r17, r63, r2
2613 addi.l r17, 8, r17
2614 beq/u r39, r40, tr1
2615 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2616 movi 3, r38
2617 shlli r38, 26, r39
2618 and r0, r39, r40
2619 andc r0, r39, r0
2620 stx.q r17, r63, r3
2621 add.l r17, r63, r3
2622 addi.l r17, 8, r17
2623 beq/u r39, r40, tr1
2624 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2625 movi 3, r38
2626 shlli r38, 23, r39
2627 and r0, r39, r40
2628 andc r0, r39, r0
2629 stx.q r17, r63, r4
2630 add.l r17, r63, r4
2631 addi.l r17, 8, r17
2632 beq/u r39, r40, tr1
2633 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2634 movi 3, r38
2635 shlli r38, 20, r39
2636 and r0, r39, r40
2637 andc r0, r39, r0
2638 stx.q r17, r63, r5
2639 add.l r17, r63, r5
2640 addi.l r17, 8, r17
2641 beq/u r39, r40, tr1
2642 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2643 movi 3, r38
2644 shlli r38, 16, r39
2645 and r0, r39, r40
2646 andc r0, r39, r0
2647 stx.q r17, r63, r6
2648 add.l r17, r63, r6
2649 addi.l r17, 8, r17
2650 beq/u r39, r40, tr1
2651 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2652 movi 3 << 12, r39
2653 and r0, r39, r40
2654 andc r0, r39, r0
2655 stx.q r17, r63, r7
2656 add.l r17, r63, r7
2657 addi.l r17, 8, r17
2658 beq/u r39, r40, tr1
2659 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2660 movi 3 << 8, r39
2661 and r0, r39, r40
2662 andc r0, r39, r0
2663 stx.q r17, r63, r8
2664 add.l r17, r63, r8
2665 addi.l r17, 8, r17
2666 beq/u r39, r40, tr1
2667 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2668 stx.q r17, r63, r9
2669 add.l r17, r63, r9
2670 blink tr0, r63
2671 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2672 movi 1, r38
2673 shlli r38, 29, r39
2674 andc r0, r39, r0
2675 stx.q r17, r63, r2
2676 addi.l r17, 8, r17
2677 blink tr1, r63
2678 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2679 movi 1, r38
2680 shlli r38, 26, r39
2681 andc r0, r39, r0
2682 stx.q r17, r63, r3
2683 addi.l r17, 8, r17
2684 blink tr1, r63
2685 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2686 movi 1, r38
2687 shlli r38, 23, r39
2688 andc r0, r39, r0
2689 stx.q r17, r63, r4
2690 addi.l r17, 8, r17
2691 blink tr1, r63
2692 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2693 movi 1, r38
2694 shlli r38, 20, r39
2695 andc r0, r39, r0
2696 stx.q r17, r63, r5
2697 addi.l r17, 8, r17
2698 blink tr1, r63
2699 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2700 movi 1, r38
2701 shlli r38, 16, r39
2702 andc r0, r39, r0
2703 stx.q r17, r63, r6
2704 addi.l r17, 8, r17
2705 blink tr1, r63
2706 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2707 movi 1 << 12, r39
2708 andc r0, r39, r0
2709 stx.q r17, r63, r7
2710 addi.l r17, 8, r17
2711 blink tr1, r63
2712 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2713 movi 1 << 8, r39
2714 andc r0, r39, r0
2715 stx.q r17, r63, r8
2716 addi.l r17, 8, r17
2717 blink tr1, r63
2718 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2719 andi r0, 7 << 1, r38
2720 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2721 shlli r38, 2, r39
2722 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2723 sub.l r40, r39, r41
2724 ptabs/l r41, tr2
2725 blink tr2, r63
2726 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2727 stx.q r17, r63, r3
2728 addi.l r17, 8, r17
2729 stx.q r17, r63, r4
2730 addi.l r17, 8, r17
2731 stx.q r17, r63, r5
2732 addi.l r17, 8, r17
2733 stx.q r17, r63, r6
2734 addi.l r17, 8, r17
2735 stx.q r17, r63, r7
2736 addi.l r17, 8, r17
2737 stx.q r17, r63, r8
2738 addi.l r17, 8, r17
2739 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2740 stx.q r17, r63, r9
2741 LOCAL(ia_return): /* Return. */
2742 blink tr0, r63
2743 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2744 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2745 #endif /* L_shcompact_incoming_args */
2746 #endif
2747 #if __SH5__
2748 #ifdef L_nested_trampoline
2749 #if __SH5__ == 32
2750 .section .text..SHmedia32,"ax"
2751 #else
2752 .text
2753 #endif
2754 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2755 .global GLOBAL(GCC_nested_trampoline)
2756 FUNC(GLOBAL(GCC_nested_trampoline))
2757 GLOBAL(GCC_nested_trampoline):
2758 .mode SHmedia
2759 ptrel/u r63, tr0
2760 gettr tr0, r0
2761 #if __SH5__ == 64
2762 ld.q r0, 24, r1
2763 #else
2764 ld.l r0, 24, r1
2765 #endif
2766 ptabs/l r1, tr1
2767 #if __SH5__ == 64
2768 ld.q r0, 32, r1
2769 #else
2770 ld.l r0, 28, r1
2771 #endif
2772 blink tr1, r63
2773
2774 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2775 #endif /* L_nested_trampoline */
2776 #endif /* __SH5__ */
2777 #if __SH5__ == 32
2778 #ifdef L_push_pop_shmedia_regs
2779 .section .text..SHmedia32,"ax"
2780 .mode SHmedia
2781 .align 2
2782 #ifndef __SH4_NOFPU__
2783 .global GLOBAL(GCC_push_shmedia_regs)
2784 FUNC(GLOBAL(GCC_push_shmedia_regs))
2785 GLOBAL(GCC_push_shmedia_regs):
2786 addi.l r15, -14*8, r15
2787 fst.d r15, 13*8, dr62
2788 fst.d r15, 12*8, dr60
2789 fst.d r15, 11*8, dr58
2790 fst.d r15, 10*8, dr56
2791 fst.d r15, 9*8, dr54
2792 fst.d r15, 8*8, dr52
2793 fst.d r15, 7*8, dr50
2794 fst.d r15, 6*8, dr48
2795 fst.d r15, 5*8, dr46
2796 fst.d r15, 4*8, dr44
2797 fst.d r15, 3*8, dr42
2798 fst.d r15, 2*8, dr40
2799 fst.d r15, 1*8, dr38
2800 fst.d r15, 0*8, dr36
2801 #endif
2802 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2803 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2804 GLOBAL(GCC_push_shmedia_regs_nofpu):
2805 ptabs/l r18, tr0
2806 addi.l r15, -27*8, r15
2807 gettr tr7, r62
2808 gettr tr6, r61
2809 gettr tr5, r60
2810 st.q r15, 26*8, r62
2811 st.q r15, 25*8, r61
2812 st.q r15, 24*8, r60
2813 st.q r15, 23*8, r59
2814 st.q r15, 22*8, r58
2815 st.q r15, 21*8, r57
2816 st.q r15, 20*8, r56
2817 st.q r15, 19*8, r55
2818 st.q r15, 18*8, r54
2819 st.q r15, 17*8, r53
2820 st.q r15, 16*8, r52
2821 st.q r15, 15*8, r51
2822 st.q r15, 14*8, r50
2823 st.q r15, 13*8, r49
2824 st.q r15, 12*8, r48
2825 st.q r15, 11*8, r47
2826 st.q r15, 10*8, r46
2827 st.q r15, 9*8, r45
2828 st.q r15, 8*8, r44
2829 st.q r15, 7*8, r35
2830 st.q r15, 6*8, r34
2831 st.q r15, 5*8, r33
2832 st.q r15, 4*8, r32
2833 st.q r15, 3*8, r31
2834 st.q r15, 2*8, r30
2835 st.q r15, 1*8, r29
2836 st.q r15, 0*8, r28
2837 blink tr0, r63
2838
2839 #ifndef __SH4_NOFPU__
2840 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2841 #endif
2842 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2843 #ifndef __SH4_NOFPU__
2844 .global GLOBAL(GCC_pop_shmedia_regs)
2845 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2846 GLOBAL(GCC_pop_shmedia_regs):
2847 pt .L0, tr1
2848 movi 41*8, r0
2849 fld.d r15, 40*8, dr62
2850 fld.d r15, 39*8, dr60
2851 fld.d r15, 38*8, dr58
2852 fld.d r15, 37*8, dr56
2853 fld.d r15, 36*8, dr54
2854 fld.d r15, 35*8, dr52
2855 fld.d r15, 34*8, dr50
2856 fld.d r15, 33*8, dr48
2857 fld.d r15, 32*8, dr46
2858 fld.d r15, 31*8, dr44
2859 fld.d r15, 30*8, dr42
2860 fld.d r15, 29*8, dr40
2861 fld.d r15, 28*8, dr38
2862 fld.d r15, 27*8, dr36
2863 blink tr1, r63
2864 #endif
2865 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2866 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2867 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2868 movi 27*8, r0
2869 .L0:
2870 ptabs r18, tr0
2871 ld.q r15, 26*8, r62
2872 ld.q r15, 25*8, r61
2873 ld.q r15, 24*8, r60
2874 ptabs r62, tr7
2875 ptabs r61, tr6
2876 ptabs r60, tr5
2877 ld.q r15, 23*8, r59
2878 ld.q r15, 22*8, r58
2879 ld.q r15, 21*8, r57
2880 ld.q r15, 20*8, r56
2881 ld.q r15, 19*8, r55
2882 ld.q r15, 18*8, r54
2883 ld.q r15, 17*8, r53
2884 ld.q r15, 16*8, r52
2885 ld.q r15, 15*8, r51
2886 ld.q r15, 14*8, r50
2887 ld.q r15, 13*8, r49
2888 ld.q r15, 12*8, r48
2889 ld.q r15, 11*8, r47
2890 ld.q r15, 10*8, r46
2891 ld.q r15, 9*8, r45
2892 ld.q r15, 8*8, r44
2893 ld.q r15, 7*8, r35
2894 ld.q r15, 6*8, r34
2895 ld.q r15, 5*8, r33
2896 ld.q r15, 4*8, r32
2897 ld.q r15, 3*8, r31
2898 ld.q r15, 2*8, r30
2899 ld.q r15, 1*8, r29
2900 ld.q r15, 0*8, r28
2901 add.l r15, r0, r15
2902 blink tr0, r63
2903
2904 #ifndef __SH4_NOFPU__
2905 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
2906 #endif
2907 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2908 #endif /* __SH5__ == 32 */
2909 #endif /* L_push_pop_shmedia_regs */