1f36f73dbc3e17697fa9e3ec8e0d76d61c52f4ed
[gcc.git] / gcc / config / sh / lib1funcs.asm
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2 2004, 2005, 2006, 2009
3 Free Software Foundation, Inc.
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24
25 !! libgcc routines for the Renesas / SuperH SH CPUs.
26 !! Contributed by Steve Chamberlain.
27 !! sac@cygnus.com
28
29 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
30 !! recoded in assembly by Toshiyasu Morita
31 !! tm@netcom.com
32
33 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
34 ELF local label prefixes by J"orn Rennecke
35 amylaar@cygnus.com */
36
37 #include "lib1funcs.h"
38
39 /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
40 so it is more convenient to define NO_FPSCR_VALUES here than to
41 define it on the command line. */
42 #if defined __vxworks && defined __PIC__
43 #define NO_FPSCR_VALUES
44 #endif
45
46 #if ! __SH5__
47 #ifdef L_ashiftrt
48 .global GLOBAL(ashiftrt_r4_0)
49 .global GLOBAL(ashiftrt_r4_1)
50 .global GLOBAL(ashiftrt_r4_2)
51 .global GLOBAL(ashiftrt_r4_3)
52 .global GLOBAL(ashiftrt_r4_4)
53 .global GLOBAL(ashiftrt_r4_5)
54 .global GLOBAL(ashiftrt_r4_6)
55 .global GLOBAL(ashiftrt_r4_7)
56 .global GLOBAL(ashiftrt_r4_8)
57 .global GLOBAL(ashiftrt_r4_9)
58 .global GLOBAL(ashiftrt_r4_10)
59 .global GLOBAL(ashiftrt_r4_11)
60 .global GLOBAL(ashiftrt_r4_12)
61 .global GLOBAL(ashiftrt_r4_13)
62 .global GLOBAL(ashiftrt_r4_14)
63 .global GLOBAL(ashiftrt_r4_15)
64 .global GLOBAL(ashiftrt_r4_16)
65 .global GLOBAL(ashiftrt_r4_17)
66 .global GLOBAL(ashiftrt_r4_18)
67 .global GLOBAL(ashiftrt_r4_19)
68 .global GLOBAL(ashiftrt_r4_20)
69 .global GLOBAL(ashiftrt_r4_21)
70 .global GLOBAL(ashiftrt_r4_22)
71 .global GLOBAL(ashiftrt_r4_23)
72 .global GLOBAL(ashiftrt_r4_24)
73 .global GLOBAL(ashiftrt_r4_25)
74 .global GLOBAL(ashiftrt_r4_26)
75 .global GLOBAL(ashiftrt_r4_27)
76 .global GLOBAL(ashiftrt_r4_28)
77 .global GLOBAL(ashiftrt_r4_29)
78 .global GLOBAL(ashiftrt_r4_30)
79 .global GLOBAL(ashiftrt_r4_31)
80 .global GLOBAL(ashiftrt_r4_32)
81
82 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
83 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
115
116 .align 1
117 GLOBAL(ashiftrt_r4_32):
118 GLOBAL(ashiftrt_r4_31):
119 rotcl r4
120 rts
121 subc r4,r4
122
123 GLOBAL(ashiftrt_r4_30):
124 shar r4
125 GLOBAL(ashiftrt_r4_29):
126 shar r4
127 GLOBAL(ashiftrt_r4_28):
128 shar r4
129 GLOBAL(ashiftrt_r4_27):
130 shar r4
131 GLOBAL(ashiftrt_r4_26):
132 shar r4
133 GLOBAL(ashiftrt_r4_25):
134 shar r4
135 GLOBAL(ashiftrt_r4_24):
136 shlr16 r4
137 shlr8 r4
138 rts
139 exts.b r4,r4
140
141 GLOBAL(ashiftrt_r4_23):
142 shar r4
143 GLOBAL(ashiftrt_r4_22):
144 shar r4
145 GLOBAL(ashiftrt_r4_21):
146 shar r4
147 GLOBAL(ashiftrt_r4_20):
148 shar r4
149 GLOBAL(ashiftrt_r4_19):
150 shar r4
151 GLOBAL(ashiftrt_r4_18):
152 shar r4
153 GLOBAL(ashiftrt_r4_17):
154 shar r4
155 GLOBAL(ashiftrt_r4_16):
156 shlr16 r4
157 rts
158 exts.w r4,r4
159
160 GLOBAL(ashiftrt_r4_15):
161 shar r4
162 GLOBAL(ashiftrt_r4_14):
163 shar r4
164 GLOBAL(ashiftrt_r4_13):
165 shar r4
166 GLOBAL(ashiftrt_r4_12):
167 shar r4
168 GLOBAL(ashiftrt_r4_11):
169 shar r4
170 GLOBAL(ashiftrt_r4_10):
171 shar r4
172 GLOBAL(ashiftrt_r4_9):
173 shar r4
174 GLOBAL(ashiftrt_r4_8):
175 shar r4
176 GLOBAL(ashiftrt_r4_7):
177 shar r4
178 GLOBAL(ashiftrt_r4_6):
179 shar r4
180 GLOBAL(ashiftrt_r4_5):
181 shar r4
182 GLOBAL(ashiftrt_r4_4):
183 shar r4
184 GLOBAL(ashiftrt_r4_3):
185 shar r4
186 GLOBAL(ashiftrt_r4_2):
187 shar r4
188 GLOBAL(ashiftrt_r4_1):
189 rts
190 shar r4
191
192 GLOBAL(ashiftrt_r4_0):
193 rts
194 nop
195
196 ENDFUNC(GLOBAL(ashiftrt_r4_0))
197 ENDFUNC(GLOBAL(ashiftrt_r4_1))
198 ENDFUNC(GLOBAL(ashiftrt_r4_2))
199 ENDFUNC(GLOBAL(ashiftrt_r4_3))
200 ENDFUNC(GLOBAL(ashiftrt_r4_4))
201 ENDFUNC(GLOBAL(ashiftrt_r4_5))
202 ENDFUNC(GLOBAL(ashiftrt_r4_6))
203 ENDFUNC(GLOBAL(ashiftrt_r4_7))
204 ENDFUNC(GLOBAL(ashiftrt_r4_8))
205 ENDFUNC(GLOBAL(ashiftrt_r4_9))
206 ENDFUNC(GLOBAL(ashiftrt_r4_10))
207 ENDFUNC(GLOBAL(ashiftrt_r4_11))
208 ENDFUNC(GLOBAL(ashiftrt_r4_12))
209 ENDFUNC(GLOBAL(ashiftrt_r4_13))
210 ENDFUNC(GLOBAL(ashiftrt_r4_14))
211 ENDFUNC(GLOBAL(ashiftrt_r4_15))
212 ENDFUNC(GLOBAL(ashiftrt_r4_16))
213 ENDFUNC(GLOBAL(ashiftrt_r4_17))
214 ENDFUNC(GLOBAL(ashiftrt_r4_18))
215 ENDFUNC(GLOBAL(ashiftrt_r4_19))
216 ENDFUNC(GLOBAL(ashiftrt_r4_20))
217 ENDFUNC(GLOBAL(ashiftrt_r4_21))
218 ENDFUNC(GLOBAL(ashiftrt_r4_22))
219 ENDFUNC(GLOBAL(ashiftrt_r4_23))
220 ENDFUNC(GLOBAL(ashiftrt_r4_24))
221 ENDFUNC(GLOBAL(ashiftrt_r4_25))
222 ENDFUNC(GLOBAL(ashiftrt_r4_26))
223 ENDFUNC(GLOBAL(ashiftrt_r4_27))
224 ENDFUNC(GLOBAL(ashiftrt_r4_28))
225 ENDFUNC(GLOBAL(ashiftrt_r4_29))
226 ENDFUNC(GLOBAL(ashiftrt_r4_30))
227 ENDFUNC(GLOBAL(ashiftrt_r4_31))
228 ENDFUNC(GLOBAL(ashiftrt_r4_32))
229 #endif
230
231 #ifdef L_ashiftrt_n
232
233 !
234 ! GLOBAL(ashrsi3)
235 !
236 ! Entry:
237 !
238 ! r4: Value to shift
239 ! r5: Shifts
240 !
241 ! Exit:
242 !
243 ! r0: Result
244 !
245 ! Destroys:
246 !
247 ! (none)
248 !
249
250 .global GLOBAL(ashrsi3)
251 HIDDEN_FUNC(GLOBAL(ashrsi3))
252 .align 2
253 GLOBAL(ashrsi3):
254 mov #31,r0
255 and r0,r5
256 mova LOCAL(ashrsi3_table),r0
257 mov.b @(r0,r5),r5
258 #ifdef __sh1__
259 add r5,r0
260 jmp @r0
261 #else
262 braf r5
263 #endif
264 mov r4,r0
265
266 .align 2
267 LOCAL(ashrsi3_table):
268 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
269 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
270 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
271 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
272 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
273 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
300
301 LOCAL(ashrsi3_31):
302 rotcl r0
303 rts
304 subc r0,r0
305
306 LOCAL(ashrsi3_30):
307 shar r0
308 LOCAL(ashrsi3_29):
309 shar r0
310 LOCAL(ashrsi3_28):
311 shar r0
312 LOCAL(ashrsi3_27):
313 shar r0
314 LOCAL(ashrsi3_26):
315 shar r0
316 LOCAL(ashrsi3_25):
317 shar r0
318 LOCAL(ashrsi3_24):
319 shlr16 r0
320 shlr8 r0
321 rts
322 exts.b r0,r0
323
324 LOCAL(ashrsi3_23):
325 shar r0
326 LOCAL(ashrsi3_22):
327 shar r0
328 LOCAL(ashrsi3_21):
329 shar r0
330 LOCAL(ashrsi3_20):
331 shar r0
332 LOCAL(ashrsi3_19):
333 shar r0
334 LOCAL(ashrsi3_18):
335 shar r0
336 LOCAL(ashrsi3_17):
337 shar r0
338 LOCAL(ashrsi3_16):
339 shlr16 r0
340 rts
341 exts.w r0,r0
342
343 LOCAL(ashrsi3_15):
344 shar r0
345 LOCAL(ashrsi3_14):
346 shar r0
347 LOCAL(ashrsi3_13):
348 shar r0
349 LOCAL(ashrsi3_12):
350 shar r0
351 LOCAL(ashrsi3_11):
352 shar r0
353 LOCAL(ashrsi3_10):
354 shar r0
355 LOCAL(ashrsi3_9):
356 shar r0
357 LOCAL(ashrsi3_8):
358 shar r0
359 LOCAL(ashrsi3_7):
360 shar r0
361 LOCAL(ashrsi3_6):
362 shar r0
363 LOCAL(ashrsi3_5):
364 shar r0
365 LOCAL(ashrsi3_4):
366 shar r0
367 LOCAL(ashrsi3_3):
368 shar r0
369 LOCAL(ashrsi3_2):
370 shar r0
371 LOCAL(ashrsi3_1):
372 rts
373 shar r0
374
375 LOCAL(ashrsi3_0):
376 rts
377 nop
378
379 ENDFUNC(GLOBAL(ashrsi3))
380 #endif
381
382 #ifdef L_ashiftlt
383
384 !
385 ! GLOBAL(ashlsi3)
386 !
387 ! Entry:
388 !
389 ! r4: Value to shift
390 ! r5: Shifts
391 !
392 ! Exit:
393 !
394 ! r0: Result
395 !
396 ! Destroys:
397 !
398 ! (none)
399 !
400 .global GLOBAL(ashlsi3)
401 HIDDEN_FUNC(GLOBAL(ashlsi3))
402 .align 2
403 GLOBAL(ashlsi3):
404 mov #31,r0
405 and r0,r5
406 mova LOCAL(ashlsi3_table),r0
407 mov.b @(r0,r5),r5
408 #ifdef __sh1__
409 add r5,r0
410 jmp @r0
411 #else
412 braf r5
413 #endif
414 mov r4,r0
415
416 .align 2
417 LOCAL(ashlsi3_table):
418 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
419 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
420 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
421 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
422 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
423 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
424 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
425 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
426 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
427 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
428 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
429 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
430 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
431 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
432 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
433 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
434 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
435 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
436 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
450
451 LOCAL(ashlsi3_6):
452 shll2 r0
453 LOCAL(ashlsi3_4):
454 shll2 r0
455 LOCAL(ashlsi3_2):
456 rts
457 shll2 r0
458
459 LOCAL(ashlsi3_7):
460 shll2 r0
461 LOCAL(ashlsi3_5):
462 shll2 r0
463 LOCAL(ashlsi3_3):
464 shll2 r0
465 LOCAL(ashlsi3_1):
466 rts
467 shll r0
468
469 LOCAL(ashlsi3_14):
470 shll2 r0
471 LOCAL(ashlsi3_12):
472 shll2 r0
473 LOCAL(ashlsi3_10):
474 shll2 r0
475 LOCAL(ashlsi3_8):
476 rts
477 shll8 r0
478
479 LOCAL(ashlsi3_15):
480 shll2 r0
481 LOCAL(ashlsi3_13):
482 shll2 r0
483 LOCAL(ashlsi3_11):
484 shll2 r0
485 LOCAL(ashlsi3_9):
486 shll8 r0
487 rts
488 shll r0
489
490 LOCAL(ashlsi3_22):
491 shll2 r0
492 LOCAL(ashlsi3_20):
493 shll2 r0
494 LOCAL(ashlsi3_18):
495 shll2 r0
496 LOCAL(ashlsi3_16):
497 rts
498 shll16 r0
499
500 LOCAL(ashlsi3_23):
501 shll2 r0
502 LOCAL(ashlsi3_21):
503 shll2 r0
504 LOCAL(ashlsi3_19):
505 shll2 r0
506 LOCAL(ashlsi3_17):
507 shll16 r0
508 rts
509 shll r0
510
511 LOCAL(ashlsi3_30):
512 shll2 r0
513 LOCAL(ashlsi3_28):
514 shll2 r0
515 LOCAL(ashlsi3_26):
516 shll2 r0
517 LOCAL(ashlsi3_24):
518 shll16 r0
519 rts
520 shll8 r0
521
522 LOCAL(ashlsi3_31):
523 shll2 r0
524 LOCAL(ashlsi3_29):
525 shll2 r0
526 LOCAL(ashlsi3_27):
527 shll2 r0
528 LOCAL(ashlsi3_25):
529 shll16 r0
530 shll8 r0
531 rts
532 shll r0
533
534 LOCAL(ashlsi3_0):
535 rts
536 nop
537
538 ENDFUNC(GLOBAL(ashlsi3))
539 #endif
540
541 #ifdef L_lshiftrt
542
543 !
544 ! GLOBAL(lshrsi3)
545 !
546 ! Entry:
547 !
548 ! r4: Value to shift
549 ! r5: Shifts
550 !
551 ! Exit:
552 !
553 ! r0: Result
554 !
555 ! Destroys:
556 !
557 ! (none)
558 !
559 .global GLOBAL(lshrsi3)
560 HIDDEN_FUNC(GLOBAL(lshrsi3))
561 .align 2
562 GLOBAL(lshrsi3):
563 mov #31,r0
564 and r0,r5
565 mova LOCAL(lshrsi3_table),r0
566 mov.b @(r0,r5),r5
567 #ifdef __sh1__
568 add r5,r0
569 jmp @r0
570 #else
571 braf r5
572 #endif
573 mov r4,r0
574
575 .align 2
576 LOCAL(lshrsi3_table):
577 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
578 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
579 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
580 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
581 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
582 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
583 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
584 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
585 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
586 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
587 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
588 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
589 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
590 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
591 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
592 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
593 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
594 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
595 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
609
610 LOCAL(lshrsi3_6):
611 shlr2 r0
612 LOCAL(lshrsi3_4):
613 shlr2 r0
614 LOCAL(lshrsi3_2):
615 rts
616 shlr2 r0
617
618 LOCAL(lshrsi3_7):
619 shlr2 r0
620 LOCAL(lshrsi3_5):
621 shlr2 r0
622 LOCAL(lshrsi3_3):
623 shlr2 r0
624 LOCAL(lshrsi3_1):
625 rts
626 shlr r0
627
628 LOCAL(lshrsi3_14):
629 shlr2 r0
630 LOCAL(lshrsi3_12):
631 shlr2 r0
632 LOCAL(lshrsi3_10):
633 shlr2 r0
634 LOCAL(lshrsi3_8):
635 rts
636 shlr8 r0
637
638 LOCAL(lshrsi3_15):
639 shlr2 r0
640 LOCAL(lshrsi3_13):
641 shlr2 r0
642 LOCAL(lshrsi3_11):
643 shlr2 r0
644 LOCAL(lshrsi3_9):
645 shlr8 r0
646 rts
647 shlr r0
648
649 LOCAL(lshrsi3_22):
650 shlr2 r0
651 LOCAL(lshrsi3_20):
652 shlr2 r0
653 LOCAL(lshrsi3_18):
654 shlr2 r0
655 LOCAL(lshrsi3_16):
656 rts
657 shlr16 r0
658
659 LOCAL(lshrsi3_23):
660 shlr2 r0
661 LOCAL(lshrsi3_21):
662 shlr2 r0
663 LOCAL(lshrsi3_19):
664 shlr2 r0
665 LOCAL(lshrsi3_17):
666 shlr16 r0
667 rts
668 shlr r0
669
670 LOCAL(lshrsi3_30):
671 shlr2 r0
672 LOCAL(lshrsi3_28):
673 shlr2 r0
674 LOCAL(lshrsi3_26):
675 shlr2 r0
676 LOCAL(lshrsi3_24):
677 shlr16 r0
678 rts
679 shlr8 r0
680
681 LOCAL(lshrsi3_31):
682 shlr2 r0
683 LOCAL(lshrsi3_29):
684 shlr2 r0
685 LOCAL(lshrsi3_27):
686 shlr2 r0
687 LOCAL(lshrsi3_25):
688 shlr16 r0
689 shlr8 r0
690 rts
691 shlr r0
692
693 LOCAL(lshrsi3_0):
694 rts
695 nop
696
697 ENDFUNC(GLOBAL(lshrsi3))
698 #endif
699
700 #ifdef L_movmem
701 .text
702 .balign 4
703 .global GLOBAL(movmem)
704 HIDDEN_FUNC(GLOBAL(movmem))
705 HIDDEN_ALIAS(movstr,movmem)
706 /* This would be a lot simpler if r6 contained the byte count
707 minus 64, and we wouldn't be called here for a byte count of 64. */
708 GLOBAL(movmem):
709 sts.l pr,@-r15
710 shll2 r6
711 bsr GLOBAL(movmemSI52+2)
712 mov.l @(48,r5),r0
713 .balign 4
714 LOCAL(movmem_loop): /* Reached with rts */
715 mov.l @(60,r5),r0
716 add #-64,r6
717 mov.l r0,@(60,r4)
718 tst r6,r6
719 mov.l @(56,r5),r0
720 bt LOCAL(movmem_done)
721 mov.l r0,@(56,r4)
722 cmp/pl r6
723 mov.l @(52,r5),r0
724 add #64,r5
725 mov.l r0,@(52,r4)
726 add #64,r4
727 bt GLOBAL(movmemSI52)
728 ! done all the large groups, do the remainder
729 ! jump to movmem+
730 mova GLOBAL(movmemSI4)+4,r0
731 add r6,r0
732 jmp @r0
733 LOCAL(movmem_done): ! share slot insn, works out aligned.
734 lds.l @r15+,pr
735 mov.l r0,@(56,r4)
736 mov.l @(52,r5),r0
737 rts
738 mov.l r0,@(52,r4)
739 .balign 4
740 ! ??? We need aliases movstr* for movmem* for the older libraries. These
741 ! aliases will be removed at the some point in the future.
742 .global GLOBAL(movmemSI64)
743 HIDDEN_FUNC(GLOBAL(movmemSI64))
744 HIDDEN_ALIAS(movstrSI64,movmemSI64)
745 GLOBAL(movmemSI64):
746 mov.l @(60,r5),r0
747 mov.l r0,@(60,r4)
748 .global GLOBAL(movmemSI60)
749 HIDDEN_FUNC(GLOBAL(movmemSI60))
750 HIDDEN_ALIAS(movstrSI60,movmemSI60)
751 GLOBAL(movmemSI60):
752 mov.l @(56,r5),r0
753 mov.l r0,@(56,r4)
754 .global GLOBAL(movmemSI56)
755 HIDDEN_FUNC(GLOBAL(movmemSI56))
756 HIDDEN_ALIAS(movstrSI56,movmemSI56)
757 GLOBAL(movmemSI56):
758 mov.l @(52,r5),r0
759 mov.l r0,@(52,r4)
760 .global GLOBAL(movmemSI52)
761 HIDDEN_FUNC(GLOBAL(movmemSI52))
762 HIDDEN_ALIAS(movstrSI52,movmemSI52)
763 GLOBAL(movmemSI52):
764 mov.l @(48,r5),r0
765 mov.l r0,@(48,r4)
766 .global GLOBAL(movmemSI48)
767 HIDDEN_FUNC(GLOBAL(movmemSI48))
768 HIDDEN_ALIAS(movstrSI48,movmemSI48)
769 GLOBAL(movmemSI48):
770 mov.l @(44,r5),r0
771 mov.l r0,@(44,r4)
772 .global GLOBAL(movmemSI44)
773 HIDDEN_FUNC(GLOBAL(movmemSI44))
774 HIDDEN_ALIAS(movstrSI44,movmemSI44)
775 GLOBAL(movmemSI44):
776 mov.l @(40,r5),r0
777 mov.l r0,@(40,r4)
778 .global GLOBAL(movmemSI40)
779 HIDDEN_FUNC(GLOBAL(movmemSI40))
780 HIDDEN_ALIAS(movstrSI40,movmemSI40)
781 GLOBAL(movmemSI40):
782 mov.l @(36,r5),r0
783 mov.l r0,@(36,r4)
784 .global GLOBAL(movmemSI36)
785 HIDDEN_FUNC(GLOBAL(movmemSI36))
786 HIDDEN_ALIAS(movstrSI36,movmemSI36)
787 GLOBAL(movmemSI36):
788 mov.l @(32,r5),r0
789 mov.l r0,@(32,r4)
790 .global GLOBAL(movmemSI32)
791 HIDDEN_FUNC(GLOBAL(movmemSI32))
792 HIDDEN_ALIAS(movstrSI32,movmemSI32)
793 GLOBAL(movmemSI32):
794 mov.l @(28,r5),r0
795 mov.l r0,@(28,r4)
796 .global GLOBAL(movmemSI28)
797 HIDDEN_FUNC(GLOBAL(movmemSI28))
798 HIDDEN_ALIAS(movstrSI28,movmemSI28)
799 GLOBAL(movmemSI28):
800 mov.l @(24,r5),r0
801 mov.l r0,@(24,r4)
802 .global GLOBAL(movmemSI24)
803 HIDDEN_FUNC(GLOBAL(movmemSI24))
804 HIDDEN_ALIAS(movstrSI24,movmemSI24)
805 GLOBAL(movmemSI24):
806 mov.l @(20,r5),r0
807 mov.l r0,@(20,r4)
808 .global GLOBAL(movmemSI20)
809 HIDDEN_FUNC(GLOBAL(movmemSI20))
810 HIDDEN_ALIAS(movstrSI20,movmemSI20)
811 GLOBAL(movmemSI20):
812 mov.l @(16,r5),r0
813 mov.l r0,@(16,r4)
814 .global GLOBAL(movmemSI16)
815 HIDDEN_FUNC(GLOBAL(movmemSI16))
816 HIDDEN_ALIAS(movstrSI16,movmemSI16)
817 GLOBAL(movmemSI16):
818 mov.l @(12,r5),r0
819 mov.l r0,@(12,r4)
820 .global GLOBAL(movmemSI12)
821 HIDDEN_FUNC(GLOBAL(movmemSI12))
822 HIDDEN_ALIAS(movstrSI12,movmemSI12)
823 GLOBAL(movmemSI12):
824 mov.l @(8,r5),r0
825 mov.l r0,@(8,r4)
826 .global GLOBAL(movmemSI8)
827 HIDDEN_FUNC(GLOBAL(movmemSI8))
828 HIDDEN_ALIAS(movstrSI8,movmemSI8)
829 GLOBAL(movmemSI8):
830 mov.l @(4,r5),r0
831 mov.l r0,@(4,r4)
832 .global GLOBAL(movmemSI4)
833 HIDDEN_FUNC(GLOBAL(movmemSI4))
834 HIDDEN_ALIAS(movstrSI4,movmemSI4)
835 GLOBAL(movmemSI4):
836 mov.l @(0,r5),r0
837 rts
838 mov.l r0,@(0,r4)
839
840 ENDFUNC(GLOBAL(movmemSI64))
841 ENDFUNC(GLOBAL(movmemSI60))
842 ENDFUNC(GLOBAL(movmemSI56))
843 ENDFUNC(GLOBAL(movmemSI52))
844 ENDFUNC(GLOBAL(movmemSI48))
845 ENDFUNC(GLOBAL(movmemSI44))
846 ENDFUNC(GLOBAL(movmemSI40))
847 ENDFUNC(GLOBAL(movmemSI36))
848 ENDFUNC(GLOBAL(movmemSI32))
849 ENDFUNC(GLOBAL(movmemSI28))
850 ENDFUNC(GLOBAL(movmemSI24))
851 ENDFUNC(GLOBAL(movmemSI20))
852 ENDFUNC(GLOBAL(movmemSI16))
853 ENDFUNC(GLOBAL(movmemSI12))
854 ENDFUNC(GLOBAL(movmemSI8))
855 ENDFUNC(GLOBAL(movmemSI4))
856 ENDFUNC(GLOBAL(movmem))
857 #endif
858
859 #ifdef L_movmem_i4
860 .text
861 .global GLOBAL(movmem_i4_even)
862 .global GLOBAL(movmem_i4_odd)
863 .global GLOBAL(movmemSI12_i4)
864
865 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
866 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
867 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
868
869 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
870 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
871 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
872
873 .p2align 5
874 L_movmem_2mod4_end:
875 mov.l r0,@(16,r4)
876 rts
877 mov.l r1,@(20,r4)
878
879 .p2align 2
880
881 GLOBAL(movmem_i4_even):
882 mov.l @r5+,r0
883 bra L_movmem_start_even
884 mov.l @r5+,r1
885
886 GLOBAL(movmem_i4_odd):
887 mov.l @r5+,r1
888 add #-4,r4
889 mov.l @r5+,r2
890 mov.l @r5+,r3
891 mov.l r1,@(4,r4)
892 mov.l r2,@(8,r4)
893
894 L_movmem_loop:
895 mov.l r3,@(12,r4)
896 dt r6
897 mov.l @r5+,r0
898 bt/s L_movmem_2mod4_end
899 mov.l @r5+,r1
900 add #16,r4
901 L_movmem_start_even:
902 mov.l @r5+,r2
903 mov.l @r5+,r3
904 mov.l r0,@r4
905 dt r6
906 mov.l r1,@(4,r4)
907 bf/s L_movmem_loop
908 mov.l r2,@(8,r4)
909 rts
910 mov.l r3,@(12,r4)
911
912 ENDFUNC(GLOBAL(movmem_i4_even))
913 ENDFUNC(GLOBAL(movmem_i4_odd))
914
915 .p2align 4
916 GLOBAL(movmemSI12_i4):
917 mov.l @r5,r0
918 mov.l @(4,r5),r1
919 mov.l @(8,r5),r2
920 mov.l r0,@r4
921 mov.l r1,@(4,r4)
922 rts
923 mov.l r2,@(8,r4)
924
925 ENDFUNC(GLOBAL(movmemSI12_i4))
926 #endif
927
928 #ifdef L_mulsi3
929
930
931 .global GLOBAL(mulsi3)
932 HIDDEN_FUNC(GLOBAL(mulsi3))
933
934 ! r4 = aabb
935 ! r5 = ccdd
936 ! r0 = aabb*ccdd via partial products
937 !
938 ! if aa == 0 and cc = 0
939 ! r0 = bb*dd
940 !
941 ! else
942 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
943 !
944
945 GLOBAL(mulsi3):
946 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
947 mov r5,r3 ! r3 = ccdd
948 swap.w r4,r2 ! r2 = bbaa
949 xtrct r2,r3 ! r3 = aacc
950 tst r3,r3 ! msws zero ?
951 bf hiset
952 rts ! yes - then we have the answer
953 sts macl,r0
954
955 hiset: sts macl,r0 ! r0 = bb*dd
956 mulu.w r2,r5 ! brewing macl = aa*dd
957 sts macl,r1
958 mulu.w r3,r4 ! brewing macl = cc*bb
959 sts macl,r2
960 add r1,r2
961 shll16 r2
962 rts
963 add r2,r0
964
965 ENDFUNC(GLOBAL(mulsi3))
966 #endif
967 #endif /* ! __SH5__ */
968 #ifdef L_sdivsi3_i4
969 .title "SH DIVIDE"
970 !! 4 byte integer Divide code for the Renesas SH
971 #ifdef __SH4__
972 !! args in r4 and r5, result in fpul, clobber dr0, dr2
973
974 .global GLOBAL(sdivsi3_i4)
975 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
976 GLOBAL(sdivsi3_i4):
977 lds r4,fpul
978 float fpul,dr0
979 lds r5,fpul
980 float fpul,dr2
981 fdiv dr2,dr0
982 rts
983 ftrc dr0,fpul
984
985 ENDFUNC(GLOBAL(sdivsi3_i4))
986 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
987 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
988
989 #if ! __SH5__ || __SH5__ == 32
990 #if __SH5__
991 .mode SHcompact
992 #endif
993 .global GLOBAL(sdivsi3_i4)
994 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
995 GLOBAL(sdivsi3_i4):
996 sts.l fpscr,@-r15
997 mov #8,r2
998 swap.w r2,r2
999 lds r2,fpscr
1000 lds r4,fpul
1001 float fpul,dr0
1002 lds r5,fpul
1003 float fpul,dr2
1004 fdiv dr2,dr0
1005 ftrc dr0,fpul
1006 rts
1007 lds.l @r15+,fpscr
1008
1009 ENDFUNC(GLOBAL(sdivsi3_i4))
1010 #endif /* ! __SH5__ || __SH5__ == 32 */
1011 #endif /* ! __SH4__ */
1012 #endif
1013
1014 #ifdef L_sdivsi3
1015 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1016 sh2e/sh3e code. */
1017 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1018 !!
1019 !! Steve Chamberlain
1020 !! sac@cygnus.com
1021 !!
1022 !!
1023
1024 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1025
1026 .global GLOBAL(sdivsi3)
1027 #if __SHMEDIA__
1028 #if __SH5__ == 32
1029 .section .text..SHmedia32,"ax"
1030 #else
1031 .text
1032 #endif
1033 .align 2
1034 #if 0
1035 /* The assembly code that follows is a hand-optimized version of the C
1036 code that follows. Note that the registers that are modified are
1037 exactly those listed as clobbered in the patterns divsi3_i1 and
1038 divsi3_i1_media.
1039
1040 int __sdivsi3 (i, j)
1041 int i, j;
1042 {
1043 register unsigned long long r18 asm ("r18");
1044 register unsigned long long r19 asm ("r19");
1045 register unsigned long long r0 asm ("r0") = 0;
1046 register unsigned long long r1 asm ("r1") = 1;
1047 register int r2 asm ("r2") = i >> 31;
1048 register int r3 asm ("r3") = j >> 31;
1049
1050 r2 = r2 ? r2 : r1;
1051 r3 = r3 ? r3 : r1;
1052 r18 = i * r2;
1053 r19 = j * r3;
1054 r2 *= r3;
1055
1056 r19 <<= 31;
1057 r1 <<= 31;
1058 do
1059 if (r18 >= r19)
1060 r0 |= r1, r18 -= r19;
1061 while (r19 >>= 1, r1 >>= 1);
1062
1063 return r2 * (int)r0;
1064 }
1065 */
1066 GLOBAL(sdivsi3):
1067 pt/l LOCAL(sdivsi3_dontadd), tr2
1068 pt/l LOCAL(sdivsi3_loop), tr1
1069 ptabs/l r18, tr0
1070 movi 0, r0
1071 movi 1, r1
1072 shari.l r4, 31, r2
1073 shari.l r5, 31, r3
1074 cmveq r2, r1, r2
1075 cmveq r3, r1, r3
1076 muls.l r4, r2, r18
1077 muls.l r5, r3, r19
1078 muls.l r2, r3, r2
1079 shlli r19, 31, r19
1080 shlli r1, 31, r1
1081 LOCAL(sdivsi3_loop):
1082 bgtu r19, r18, tr2
1083 or r0, r1, r0
1084 sub r18, r19, r18
1085 LOCAL(sdivsi3_dontadd):
1086 shlri r1, 1, r1
1087 shlri r19, 1, r19
1088 bnei r1, 0, tr1
1089 muls.l r0, r2, r0
1090 add.l r0, r63, r0
1091 blink tr0, r63
1092 #elif 0 /* ! 0 */
1093 // inputs: r4,r5
1094 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1095 // result in r0
1096 GLOBAL(sdivsi3):
1097 // can create absolute value without extra latency,
1098 // but dependent on proper sign extension of inputs:
1099 // shari.l r5,31,r2
1100 // xor r5,r2,r20
1101 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1102 shari.l r5,31,r2
1103 ori r2,1,r2
1104 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1105 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1106 shari.l r4,31,r3
1107 nsb r20,r0
1108 shlld r20,r0,r25
1109 shlri r25,48,r25
1110 sub r19,r25,r1
1111 mmulfx.w r1,r1,r2
1112 mshflo.w r1,r63,r1
1113 // If r4 was to be used in-place instead of r21, could use this sequence
1114 // to compute absolute:
1115 // sub r63,r4,r19 // compute absolute value of r4
1116 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1117 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1118 ori r3,1,r3
1119 mmulfx.w r25,r2,r2
1120 sub r19,r0,r0
1121 muls.l r4,r3,r21
1122 msub.w r1,r2,r2
1123 addi r2,-2,r1
1124 mulu.l r21,r1,r19
1125 mmulfx.w r2,r2,r2
1126 shlli r1,15,r1
1127 shlrd r19,r0,r19
1128 mulu.l r19,r20,r3
1129 mmacnfx.wl r25,r2,r1
1130 ptabs r18,tr0
1131 sub r21,r3,r25
1132
1133 mulu.l r25,r1,r2
1134 addi r0,14,r0
1135 xor r4,r5,r18
1136 shlrd r2,r0,r2
1137 mulu.l r2,r20,r3
1138 add r19,r2,r19
1139 shari.l r18,31,r18
1140 sub r25,r3,r25
1141
1142 mulu.l r25,r1,r2
1143 sub r25,r20,r25
1144 add r19,r18,r19
1145 shlrd r2,r0,r2
1146 mulu.l r2,r20,r3
1147 addi r25,1,r25
1148 add r19,r2,r19
1149
1150 cmpgt r25,r3,r25
1151 add.l r19,r25,r0
1152 xor r0,r18,r0
1153 blink tr0,r63
1154 #else /* ! 0 && ! 0 */
1155
1156 // inputs: r4,r5
1157 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1158 // result in r0
1159 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1160 #ifndef __pic__
1161 FUNC(GLOBAL(sdivsi3))
1162 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1163 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1164 // with the SHcompact implementation, which clobbers tr1 / tr2.
1165 .global GLOBAL(sdivsi3_1)
1166 GLOBAL(sdivsi3_1):
1167 .global GLOBAL(div_table_internal)
1168 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1169 shori GLOBAL(div_table_internal) & 65535, r20
1170 #endif
1171 .global GLOBAL(sdivsi3_2)
1172 // div_table in r20
1173 // clobbered: r1,r18,r19,r21,r25,tr0
1174 GLOBAL(sdivsi3_2):
1175 nsb r5, r1
1176 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1177 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1178 ldx.ub r20, r21, r19 // u0.8
1179 shari r25, 32, r25 // normalize to s2.30
1180 shlli r21, 1, r21
1181 muls.l r25, r19, r19 // s2.38
1182 ldx.w r20, r21, r21 // s2.14
1183 ptabs r18, tr0
1184 shari r19, 24, r19 // truncate to s2.14
1185 sub r21, r19, r19 // some 11 bit inverse in s1.14
1186 muls.l r19, r19, r21 // u0.28
1187 sub r63, r1, r1
1188 addi r1, 92, r1
1189 muls.l r25, r21, r18 // s2.58
1190 shlli r19, 45, r19 // multiply by two and convert to s2.58
1191 /* bubble */
1192 sub r19, r18, r18
1193 shari r18, 28, r18 // some 22 bit inverse in s1.30
1194 muls.l r18, r25, r0 // s2.60
1195 muls.l r18, r4, r25 // s32.30
1196 /* bubble */
1197 shari r0, 16, r19 // s-16.44
1198 muls.l r19, r18, r19 // s-16.74
1199 shari r25, 63, r0
1200 shari r4, 14, r18 // s19.-14
1201 shari r19, 30, r19 // s-16.44
1202 muls.l r19, r18, r19 // s15.30
1203 xor r21, r0, r21 // You could also use the constant 1 << 27.
1204 add r21, r25, r21
1205 sub r21, r19, r21
1206 shard r21, r1, r21
1207 sub r21, r0, r0
1208 blink tr0, r63
1209 #ifndef __pic__
1210 ENDFUNC(GLOBAL(sdivsi3))
1211 #endif
1212 ENDFUNC(GLOBAL(sdivsi3_2))
1213 #endif
1214 #elif defined __SHMEDIA__
1215 /* m5compact-nofpu */
1216 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1217 .mode SHmedia
1218 .section .text..SHmedia32,"ax"
1219 .align 2
1220 FUNC(GLOBAL(sdivsi3))
1221 GLOBAL(sdivsi3):
1222 pt/l LOCAL(sdivsi3_dontsub), tr0
1223 pt/l LOCAL(sdivsi3_loop), tr1
1224 ptabs/l r18,tr2
1225 shari.l r4,31,r18
1226 shari.l r5,31,r19
1227 xor r4,r18,r20
1228 xor r5,r19,r21
1229 sub.l r20,r18,r20
1230 sub.l r21,r19,r21
1231 xor r18,r19,r19
1232 shlli r21,32,r25
1233 addi r25,-1,r21
1234 addz.l r20,r63,r20
1235 LOCAL(sdivsi3_loop):
1236 shlli r20,1,r20
1237 bgeu/u r21,r20,tr0
1238 sub r20,r21,r20
1239 LOCAL(sdivsi3_dontsub):
1240 addi.l r25,-1,r25
1241 bnei r25,-32,tr1
1242 xor r20,r19,r20
1243 sub.l r20,r19,r0
1244 blink tr2,r63
1245 ENDFUNC(GLOBAL(sdivsi3))
1246 #else /* ! __SHMEDIA__ */
1247 FUNC(GLOBAL(sdivsi3))
1248 GLOBAL(sdivsi3):
1249 mov r4,r1
1250 mov r5,r0
1251
1252 tst r0,r0
1253 bt div0
1254 mov #0,r2
1255 div0s r2,r1
1256 subc r3,r3
1257 subc r2,r1
1258 div0s r0,r3
1259 rotcl r1
1260 div1 r0,r3
1261 rotcl r1
1262 div1 r0,r3
1263 rotcl r1
1264 div1 r0,r3
1265 rotcl r1
1266 div1 r0,r3
1267 rotcl r1
1268 div1 r0,r3
1269 rotcl r1
1270 div1 r0,r3
1271 rotcl r1
1272 div1 r0,r3
1273 rotcl r1
1274 div1 r0,r3
1275 rotcl r1
1276 div1 r0,r3
1277 rotcl r1
1278 div1 r0,r3
1279 rotcl r1
1280 div1 r0,r3
1281 rotcl r1
1282 div1 r0,r3
1283 rotcl r1
1284 div1 r0,r3
1285 rotcl r1
1286 div1 r0,r3
1287 rotcl r1
1288 div1 r0,r3
1289 rotcl r1
1290 div1 r0,r3
1291 rotcl r1
1292 div1 r0,r3
1293 rotcl r1
1294 div1 r0,r3
1295 rotcl r1
1296 div1 r0,r3
1297 rotcl r1
1298 div1 r0,r3
1299 rotcl r1
1300 div1 r0,r3
1301 rotcl r1
1302 div1 r0,r3
1303 rotcl r1
1304 div1 r0,r3
1305 rotcl r1
1306 div1 r0,r3
1307 rotcl r1
1308 div1 r0,r3
1309 rotcl r1
1310 div1 r0,r3
1311 rotcl r1
1312 div1 r0,r3
1313 rotcl r1
1314 div1 r0,r3
1315 rotcl r1
1316 div1 r0,r3
1317 rotcl r1
1318 div1 r0,r3
1319 rotcl r1
1320 div1 r0,r3
1321 rotcl r1
1322 div1 r0,r3
1323 rotcl r1
1324 addc r2,r1
1325 rts
1326 mov r1,r0
1327
1328
1329 div0: rts
1330 mov #0,r0
1331
1332 ENDFUNC(GLOBAL(sdivsi3))
1333 #endif /* ! __SHMEDIA__ */
1334 #endif /* ! __SH4__ */
1335 #endif
1336 #ifdef L_udivsi3_i4
1337
1338 .title "SH DIVIDE"
1339 !! 4 byte integer Divide code for the Renesas SH
1340 #ifdef __SH4__
1341 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1342 !! and t bit
1343
1344 .global GLOBAL(udivsi3_i4)
1345 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1346 GLOBAL(udivsi3_i4):
1347 mov #1,r1
1348 cmp/hi r1,r5
1349 bf trivial
1350 rotr r1
1351 xor r1,r4
1352 lds r4,fpul
1353 mova L1,r0
1354 #ifdef FMOVD_WORKS
1355 fmov.d @r0+,dr4
1356 #else
1357 fmov.s @r0+,DR40
1358 fmov.s @r0,DR41
1359 #endif
1360 float fpul,dr0
1361 xor r1,r5
1362 lds r5,fpul
1363 float fpul,dr2
1364 fadd dr4,dr0
1365 fadd dr4,dr2
1366 fdiv dr2,dr0
1367 rts
1368 ftrc dr0,fpul
1369
1370 trivial:
1371 rts
1372 lds r4,fpul
1373
1374 .align 2
1375 #ifdef FMOVD_WORKS
1376 .align 3 ! make double below 8 byte aligned.
1377 #endif
1378 L1:
1379 .double 2147483648
1380
1381 ENDFUNC(GLOBAL(udivsi3_i4))
1382 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1383 #if ! __SH5__ || __SH5__ == 32
1384 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1385 .mode SHmedia
1386 .global GLOBAL(udivsi3_i4)
1387 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1388 GLOBAL(udivsi3_i4):
1389 addz.l r4,r63,r20
1390 addz.l r5,r63,r21
1391 fmov.qd r20,dr0
1392 fmov.qd r21,dr32
1393 ptabs r18,tr0
1394 float.qd dr0,dr0
1395 float.qd dr32,dr32
1396 fdiv.d dr0,dr32,dr0
1397 ftrc.dq dr0,dr32
1398 fmov.s fr33,fr32
1399 blink tr0,r63
1400
1401 ENDFUNC(GLOBAL(udivsi3_i4))
1402 #endif /* ! __SH5__ || __SH5__ == 32 */
1403 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1404 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1405
1406 .global GLOBAL(udivsi3_i4)
1407 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1408 GLOBAL(udivsi3_i4):
1409 mov #1,r1
1410 cmp/hi r1,r5
1411 bf trivial
1412 sts.l fpscr,@-r15
1413 mova L1,r0
1414 lds.l @r0+,fpscr
1415 rotr r1
1416 xor r1,r4
1417 lds r4,fpul
1418 #ifdef FMOVD_WORKS
1419 fmov.d @r0+,dr4
1420 #else
1421 fmov.s @r0+,DR40
1422 fmov.s @r0,DR41
1423 #endif
1424 float fpul,dr0
1425 xor r1,r5
1426 lds r5,fpul
1427 float fpul,dr2
1428 fadd dr4,dr0
1429 fadd dr4,dr2
1430 fdiv dr2,dr0
1431 ftrc dr0,fpul
1432 rts
1433 lds.l @r15+,fpscr
1434
1435 #ifdef FMOVD_WORKS
1436 .align 3 ! make double below 8 byte aligned.
1437 #endif
1438 trivial:
1439 rts
1440 lds r4,fpul
1441
1442 .align 2
1443 L1:
1444 #ifndef FMOVD_WORKS
1445 .long 0x80000
1446 #else
1447 .long 0x180000
1448 #endif
1449 .double 2147483648
1450
1451 ENDFUNC(GLOBAL(udivsi3_i4))
1452 #endif /* ! __SH4__ */
1453 #endif
1454
1455 #ifdef L_udivsi3
1456 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1457 sh2e/sh3e code. */
1458 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1459
1460 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1461 .global GLOBAL(udivsi3)
1462 HIDDEN_FUNC(GLOBAL(udivsi3))
1463
1464 #if __SHMEDIA__
1465 #if __SH5__ == 32
1466 .section .text..SHmedia32,"ax"
1467 #else
1468 .text
1469 #endif
1470 .align 2
1471 #if 0
1472 /* The assembly code that follows is a hand-optimized version of the C
1473 code that follows. Note that the registers that are modified are
1474 exactly those listed as clobbered in the patterns udivsi3_i1 and
1475 udivsi3_i1_media.
1476
1477 unsigned
1478 __udivsi3 (i, j)
1479 unsigned i, j;
1480 {
1481 register unsigned long long r0 asm ("r0") = 0;
1482 register unsigned long long r18 asm ("r18") = 1;
1483 register unsigned long long r4 asm ("r4") = i;
1484 register unsigned long long r19 asm ("r19") = j;
1485
1486 r19 <<= 31;
1487 r18 <<= 31;
1488 do
1489 if (r4 >= r19)
1490 r0 |= r18, r4 -= r19;
1491 while (r19 >>= 1, r18 >>= 1);
1492
1493 return r0;
1494 }
1495 */
1496 GLOBAL(udivsi3):
1497 pt/l LOCAL(udivsi3_dontadd), tr2
1498 pt/l LOCAL(udivsi3_loop), tr1
1499 ptabs/l r18, tr0
1500 movi 0, r0
1501 movi 1, r18
1502 addz.l r5, r63, r19
1503 addz.l r4, r63, r4
1504 shlli r19, 31, r19
1505 shlli r18, 31, r18
1506 LOCAL(udivsi3_loop):
1507 bgtu r19, r4, tr2
1508 or r0, r18, r0
1509 sub r4, r19, r4
1510 LOCAL(udivsi3_dontadd):
1511 shlri r18, 1, r18
1512 shlri r19, 1, r19
1513 bnei r18, 0, tr1
1514 blink tr0, r63
1515 #else
1516 GLOBAL(udivsi3):
1517 // inputs: r4,r5
1518 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1519 // result in r0.
1520 addz.l r5,r63,r22
1521 nsb r22,r0
1522 shlld r22,r0,r25
1523 shlri r25,48,r25
1524 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1525 sub r20,r25,r21
1526 mmulfx.w r21,r21,r19
1527 mshflo.w r21,r63,r21
1528 ptabs r18,tr0
1529 mmulfx.w r25,r19,r19
1530 sub r20,r0,r0
1531 /* bubble */
1532 msub.w r21,r19,r19
1533 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1534 before the msub.w, but we need a different value for
1535 r19 to keep errors under control. */
1536 mulu.l r4,r21,r18
1537 mmulfx.w r19,r19,r19
1538 shlli r21,15,r21
1539 shlrd r18,r0,r18
1540 mulu.l r18,r22,r20
1541 mmacnfx.wl r25,r19,r21
1542 /* bubble */
1543 sub r4,r20,r25
1544
1545 mulu.l r25,r21,r19
1546 addi r0,14,r0
1547 /* bubble */
1548 shlrd r19,r0,r19
1549 mulu.l r19,r22,r20
1550 add r18,r19,r18
1551 /* bubble */
1552 sub.l r25,r20,r25
1553
1554 mulu.l r25,r21,r19
1555 addz.l r25,r63,r25
1556 sub r25,r22,r25
1557 shlrd r19,r0,r19
1558 mulu.l r19,r22,r20
1559 addi r25,1,r25
1560 add r18,r19,r18
1561
1562 cmpgt r25,r20,r25
1563 add.l r18,r25,r0
1564 blink tr0,r63
1565 #endif
1566 #elif defined (__SHMEDIA__)
1567 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1568 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1569 So use a short shmedia loop. */
1570 // clobbered: r20,r21,r25,tr0,tr1,tr2
1571 .mode SHmedia
1572 .section .text..SHmedia32,"ax"
1573 .align 2
1574 GLOBAL(udivsi3):
1575 pt/l LOCAL(udivsi3_dontsub), tr0
1576 pt/l LOCAL(udivsi3_loop), tr1
1577 ptabs/l r18,tr2
1578 shlli r5,32,r25
1579 addi r25,-1,r21
1580 addz.l r4,r63,r20
1581 LOCAL(udivsi3_loop):
1582 shlli r20,1,r20
1583 bgeu/u r21,r20,tr0
1584 sub r20,r21,r20
1585 LOCAL(udivsi3_dontsub):
1586 addi.l r25,-1,r25
1587 bnei r25,-32,tr1
1588 add.l r20,r63,r0
1589 blink tr2,r63
1590 #else /* ! defined (__SHMEDIA__) */
1591 LOCAL(div8):
1592 div1 r5,r4
1593 LOCAL(div7):
1594 div1 r5,r4; div1 r5,r4; div1 r5,r4
1595 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1596
1597 LOCAL(divx4):
1598 div1 r5,r4; rotcl r0
1599 div1 r5,r4; rotcl r0
1600 div1 r5,r4; rotcl r0
1601 rts; div1 r5,r4
1602
1603 GLOBAL(udivsi3):
1604 sts.l pr,@-r15
1605 extu.w r5,r0
1606 cmp/eq r5,r0
1607 #ifdef __sh1__
1608 bf LOCAL(large_divisor)
1609 #else
1610 bf/s LOCAL(large_divisor)
1611 #endif
1612 div0u
1613 swap.w r4,r0
1614 shlr16 r4
1615 bsr LOCAL(div8)
1616 shll16 r5
1617 bsr LOCAL(div7)
1618 div1 r5,r4
1619 xtrct r4,r0
1620 xtrct r0,r4
1621 bsr LOCAL(div8)
1622 swap.w r4,r4
1623 bsr LOCAL(div7)
1624 div1 r5,r4
1625 lds.l @r15+,pr
1626 xtrct r4,r0
1627 swap.w r0,r0
1628 rotcl r0
1629 rts
1630 shlr16 r5
1631
1632 LOCAL(large_divisor):
1633 #ifdef __sh1__
1634 div0u
1635 #endif
1636 mov #0,r0
1637 xtrct r4,r0
1638 xtrct r0,r4
1639 bsr LOCAL(divx4)
1640 rotcl r0
1641 bsr LOCAL(divx4)
1642 rotcl r0
1643 bsr LOCAL(divx4)
1644 rotcl r0
1645 bsr LOCAL(divx4)
1646 rotcl r0
1647 lds.l @r15+,pr
1648 rts
1649 rotcl r0
1650
1651 ENDFUNC(GLOBAL(udivsi3))
1652 #endif /* ! __SHMEDIA__ */
1653 #endif /* __SH4__ */
1654 #endif /* L_udivsi3 */
1655
1656 #ifdef L_udivdi3
1657 #ifdef __SHMEDIA__
1658 .mode SHmedia
1659 .section .text..SHmedia32,"ax"
1660 .align 2
1661 .global GLOBAL(udivdi3)
1662 FUNC(GLOBAL(udivdi3))
1663 GLOBAL(udivdi3):
1664 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1665 shlri r3,1,r4
1666 nsb r4,r22
1667 shlld r3,r22,r6
1668 shlri r6,49,r5
1669 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1670 sub r21,r5,r1
1671 mmulfx.w r1,r1,r4
1672 mshflo.w r1,r63,r1
1673 sub r63,r22,r20 // r63 == 64 % 64
1674 mmulfx.w r5,r4,r4
1675 pta LOCAL(large_divisor),tr0
1676 addi r20,32,r9
1677 msub.w r1,r4,r1
1678 madd.w r1,r1,r1
1679 mmulfx.w r1,r1,r4
1680 shlri r6,32,r7
1681 bgt/u r9,r63,tr0 // large_divisor
1682 mmulfx.w r5,r4,r4
1683 shlri r2,32+14,r19
1684 addi r22,-31,r0
1685 msub.w r1,r4,r1
1686
1687 mulu.l r1,r7,r4
1688 addi r1,-3,r5
1689 mulu.l r5,r19,r5
1690 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1691 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1692 the case may be, %0000000000000000 000.11111111111, still */
1693 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1694 mulu.l r5,r3,r8
1695 mshalds.l r1,r21,r1
1696 shari r4,26,r4
1697 shlld r8,r0,r8
1698 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1699 sub r2,r8,r2
1700 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1701
1702 shlri r2,22,r21
1703 mulu.l r21,r1,r21
1704 shlld r5,r0,r8
1705 addi r20,30-22,r0
1706 shlrd r21,r0,r21
1707 mulu.l r21,r3,r5
1708 add r8,r21,r8
1709 mcmpgt.l r21,r63,r21 // See Note 1
1710 addi r20,30,r0
1711 mshfhi.l r63,r21,r21
1712 sub r2,r5,r2
1713 andc r2,r21,r2
1714
1715 /* small divisor: need a third divide step */
1716 mulu.l r2,r1,r7
1717 ptabs r18,tr0
1718 addi r2,1,r2
1719 shlrd r7,r0,r7
1720 mulu.l r7,r3,r5
1721 add r8,r7,r8
1722 sub r2,r3,r2
1723 cmpgt r2,r5,r5
1724 add r8,r5,r2
1725 /* could test r3 here to check for divide by zero. */
1726 blink tr0,r63
1727
1728 LOCAL(large_divisor):
1729 mmulfx.w r5,r4,r4
1730 shlrd r2,r9,r25
1731 shlri r25,32,r8
1732 msub.w r1,r4,r1
1733
1734 mulu.l r1,r7,r4
1735 addi r1,-3,r5
1736 mulu.l r5,r8,r5
1737 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1738 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1739 the case may be, %0000000000000000 000.11111111111, still */
1740 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1741 shlri r5,14-1,r8
1742 mulu.l r8,r7,r5
1743 mshalds.l r1,r21,r1
1744 shari r4,26,r4
1745 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1746 sub r25,r5,r25
1747 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1748
1749 shlri r25,22,r21
1750 mulu.l r21,r1,r21
1751 pta LOCAL(no_lo_adj),tr0
1752 addi r22,32,r0
1753 shlri r21,40,r21
1754 mulu.l r21,r7,r5
1755 add r8,r21,r8
1756 shlld r2,r0,r2
1757 sub r25,r5,r25
1758 bgtu/u r7,r25,tr0 // no_lo_adj
1759 addi r8,1,r8
1760 sub r25,r7,r25
1761 LOCAL(no_lo_adj):
1762 mextr4 r2,r25,r2
1763
1764 /* large_divisor: only needs a few adjustments. */
1765 mulu.l r8,r6,r5
1766 ptabs r18,tr0
1767 /* bubble */
1768 cmpgtu r5,r2,r5
1769 sub r8,r5,r2
1770 blink tr0,r63
1771 ENDFUNC(GLOBAL(udivdi3))
1772 /* Note 1: To shift the result of the second divide stage so that the result
1773 always fits into 32 bits, yet we still reduce the rest sufficiently
1774 would require a lot of instructions to do the shifts just right. Using
1775 the full 64 bit shift result to multiply with the divisor would require
1776 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1777 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1778 know that the rest after taking this partial result into account will
1779 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1780 upper 32 bits of the partial result are nonzero. */
1781 #endif /* __SHMEDIA__ */
1782 #endif /* L_udivdi3 */
1783
1784 #ifdef L_divdi3
1785 #ifdef __SHMEDIA__
1786 .mode SHmedia
1787 .section .text..SHmedia32,"ax"
1788 .align 2
1789 .global GLOBAL(divdi3)
1790 FUNC(GLOBAL(divdi3))
1791 GLOBAL(divdi3):
1792 pta GLOBAL(udivdi3_internal),tr0
1793 shari r2,63,r22
1794 shari r3,63,r23
1795 xor r2,r22,r2
1796 xor r3,r23,r3
1797 sub r2,r22,r2
1798 sub r3,r23,r3
1799 beq/u r22,r23,tr0
1800 ptabs r18,tr1
1801 blink tr0,r18
1802 sub r63,r2,r2
1803 blink tr1,r63
1804 ENDFUNC(GLOBAL(divdi3))
1805 #endif /* __SHMEDIA__ */
1806 #endif /* L_divdi3 */
1807
1808 #ifdef L_umoddi3
1809 #ifdef __SHMEDIA__
1810 .mode SHmedia
1811 .section .text..SHmedia32,"ax"
1812 .align 2
1813 .global GLOBAL(umoddi3)
1814 FUNC(GLOBAL(umoddi3))
1815 GLOBAL(umoddi3):
1816 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1817 shlri r3,1,r4
1818 nsb r4,r22
1819 shlld r3,r22,r6
1820 shlri r6,49,r5
1821 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1822 sub r21,r5,r1
1823 mmulfx.w r1,r1,r4
1824 mshflo.w r1,r63,r1
1825 sub r63,r22,r20 // r63 == 64 % 64
1826 mmulfx.w r5,r4,r4
1827 pta LOCAL(large_divisor),tr0
1828 addi r20,32,r9
1829 msub.w r1,r4,r1
1830 madd.w r1,r1,r1
1831 mmulfx.w r1,r1,r4
1832 shlri r6,32,r7
1833 bgt/u r9,r63,tr0 // large_divisor
1834 mmulfx.w r5,r4,r4
1835 shlri r2,32+14,r19
1836 addi r22,-31,r0
1837 msub.w r1,r4,r1
1838
1839 mulu.l r1,r7,r4
1840 addi r1,-3,r5
1841 mulu.l r5,r19,r5
1842 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1843 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1844 the case may be, %0000000000000000 000.11111111111, still */
1845 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1846 mulu.l r5,r3,r5
1847 mshalds.l r1,r21,r1
1848 shari r4,26,r4
1849 shlld r5,r0,r5
1850 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1851 sub r2,r5,r2
1852 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1853
1854 shlri r2,22,r21
1855 mulu.l r21,r1,r21
1856 addi r20,30-22,r0
1857 /* bubble */ /* could test r3 here to check for divide by zero. */
1858 shlrd r21,r0,r21
1859 mulu.l r21,r3,r5
1860 mcmpgt.l r21,r63,r21 // See Note 1
1861 addi r20,30,r0
1862 mshfhi.l r63,r21,r21
1863 sub r2,r5,r2
1864 andc r2,r21,r2
1865
1866 /* small divisor: need a third divide step */
1867 mulu.l r2,r1,r7
1868 ptabs r18,tr0
1869 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1870 shlrd r7,r0,r7
1871 mulu.l r7,r3,r5
1872 /* bubble */
1873 addi r8,1,r7
1874 cmpgt r7,r5,r7
1875 cmvne r7,r8,r2
1876 sub r2,r5,r2
1877 blink tr0,r63
1878
1879 LOCAL(large_divisor):
1880 mmulfx.w r5,r4,r4
1881 shlrd r2,r9,r25
1882 shlri r25,32,r8
1883 msub.w r1,r4,r1
1884
1885 mulu.l r1,r7,r4
1886 addi r1,-3,r5
1887 mulu.l r5,r8,r5
1888 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1889 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1890 the case may be, %0000000000000000 000.11111111111, still */
1891 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1892 shlri r5,14-1,r8
1893 mulu.l r8,r7,r5
1894 mshalds.l r1,r21,r1
1895 shari r4,26,r4
1896 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1897 sub r25,r5,r25
1898 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1899
1900 shlri r25,22,r21
1901 mulu.l r21,r1,r21
1902 pta LOCAL(no_lo_adj),tr0
1903 addi r22,32,r0
1904 shlri r21,40,r21
1905 mulu.l r21,r7,r5
1906 add r8,r21,r8
1907 shlld r2,r0,r2
1908 sub r25,r5,r25
1909 bgtu/u r7,r25,tr0 // no_lo_adj
1910 addi r8,1,r8
1911 sub r25,r7,r25
1912 LOCAL(no_lo_adj):
1913 mextr4 r2,r25,r2
1914
1915 /* large_divisor: only needs a few adjustments. */
1916 mulu.l r8,r6,r5
1917 ptabs r18,tr0
1918 add r2,r6,r7
1919 cmpgtu r5,r2,r8
1920 cmvne r8,r7,r2
1921 sub r2,r5,r2
1922 shlrd r2,r22,r2
1923 blink tr0,r63
1924 ENDFUNC(GLOBAL(umoddi3))
1925 /* Note 1: To shift the result of the second divide stage so that the result
1926 always fits into 32 bits, yet we still reduce the rest sufficiently
1927 would require a lot of instructions to do the shifts just right. Using
1928 the full 64 bit shift result to multiply with the divisor would require
1929 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1930 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1931 know that the rest after taking this partial result into account will
1932 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1933 upper 32 bits of the partial result are nonzero. */
1934 #endif /* __SHMEDIA__ */
1935 #endif /* L_umoddi3 */
1936
1937 #ifdef L_moddi3
1938 #ifdef __SHMEDIA__
1939 .mode SHmedia
1940 .section .text..SHmedia32,"ax"
1941 .align 2
1942 .global GLOBAL(moddi3)
1943 FUNC(GLOBAL(moddi3))
1944 GLOBAL(moddi3):
1945 pta GLOBAL(umoddi3_internal),tr0
1946 shari r2,63,r22
1947 shari r3,63,r23
1948 xor r2,r22,r2
1949 xor r3,r23,r3
1950 sub r2,r22,r2
1951 sub r3,r23,r3
1952 beq/u r22,r63,tr0
1953 ptabs r18,tr1
1954 blink tr0,r18
1955 sub r63,r2,r2
1956 blink tr1,r63
1957 ENDFUNC(GLOBAL(moddi3))
1958 #endif /* __SHMEDIA__ */
1959 #endif /* L_moddi3 */
1960
1961 #ifdef L_set_fpscr
1962 #if !defined (__SH2A_NOFPU__)
1963 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1964 #ifdef __SH5__
1965 .mode SHcompact
1966 #endif
1967 .global GLOBAL(set_fpscr)
1968 HIDDEN_FUNC(GLOBAL(set_fpscr))
1969 GLOBAL(set_fpscr):
1970 lds r4,fpscr
1971 #ifdef __PIC__
1972 mov.l r12,@-r15
1973 #ifdef __vxworks
1974 mov.l LOCAL(set_fpscr_L0_base),r12
1975 mov.l LOCAL(set_fpscr_L0_index),r0
1976 mov.l @r12,r12
1977 mov.l @(r0,r12),r12
1978 #else
1979 mova LOCAL(set_fpscr_L0),r0
1980 mov.l LOCAL(set_fpscr_L0),r12
1981 add r0,r12
1982 #endif
1983 mov.l LOCAL(set_fpscr_L1),r0
1984 mov.l @(r0,r12),r1
1985 mov.l @r15+,r12
1986 #else
1987 mov.l LOCAL(set_fpscr_L1),r1
1988 #endif
1989 swap.w r4,r0
1990 or #24,r0
1991 #ifndef FMOVD_WORKS
1992 xor #16,r0
1993 #endif
1994 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1995 swap.w r0,r3
1996 mov.l r3,@(4,r1)
1997 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1998 swap.w r0,r2
1999 mov.l r2,@r1
2000 #endif
2001 #ifndef FMOVD_WORKS
2002 xor #8,r0
2003 #else
2004 xor #24,r0
2005 #endif
2006 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2007 swap.w r0,r2
2008 rts
2009 mov.l r2,@r1
2010 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2011 swap.w r0,r3
2012 rts
2013 mov.l r3,@(4,r1)
2014 #endif
2015 .align 2
2016 #ifdef __PIC__
2017 #ifdef __vxworks
2018 LOCAL(set_fpscr_L0_base):
2019 .long ___GOTT_BASE__
2020 LOCAL(set_fpscr_L0_index):
2021 .long ___GOTT_INDEX__
2022 #else
2023 LOCAL(set_fpscr_L0):
2024 .long _GLOBAL_OFFSET_TABLE_
2025 #endif
2026 LOCAL(set_fpscr_L1):
2027 .long GLOBAL(fpscr_values@GOT)
2028 #else
2029 LOCAL(set_fpscr_L1):
2030 .long GLOBAL(fpscr_values)
2031 #endif
2032
2033 ENDFUNC(GLOBAL(set_fpscr))
2034 #ifndef NO_FPSCR_VALUES
2035 #ifdef __ELF__
2036 .comm GLOBAL(fpscr_values),8,4
2037 #else
2038 .comm GLOBAL(fpscr_values),8
2039 #endif /* ELF */
2040 #endif /* NO_FPSCR_VALUES */
2041 #endif /* SH2E / SH3E / SH4 */
2042 #endif /* __SH2A_NOFPU__ */
2043 #endif /* L_set_fpscr */
2044 #ifdef L_ic_invalidate
2045 #if __SH5__ == 32
2046 .mode SHmedia
2047 .section .text..SHmedia32,"ax"
2048 .align 2
2049 .global GLOBAL(init_trampoline)
2050 HIDDEN_FUNC(GLOBAL(init_trampoline))
2051 GLOBAL(init_trampoline):
2052 st.l r0,8,r2
2053 #ifdef __LITTLE_ENDIAN__
2054 movi 9,r20
2055 shori 0x402b,r20
2056 shori 0xd101,r20
2057 shori 0xd002,r20
2058 #else
2059 movi 0xffffffffffffd002,r20
2060 shori 0xd101,r20
2061 shori 0x402b,r20
2062 shori 9,r20
2063 #endif
2064 st.q r0,0,r20
2065 st.l r0,12,r3
2066 ENDFUNC(GLOBAL(init_trampoline))
2067 .global GLOBAL(ic_invalidate)
2068 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2069 GLOBAL(ic_invalidate):
2070 ocbwb r0,0
2071 synco
2072 icbi r0, 0
2073 ptabs r18, tr0
2074 synci
2075 blink tr0, r63
2076 ENDFUNC(GLOBAL(ic_invalidate))
2077 #elif defined(__SH4A__)
2078 .global GLOBAL(ic_invalidate)
2079 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2080 GLOBAL(ic_invalidate):
2081 ocbwb @r4
2082 synco
2083 icbi @r4
2084 rts
2085 nop
2086 ENDFUNC(GLOBAL(ic_invalidate))
2087 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2088 /* For system code, we use ic_invalidate_line_i, but user code
2089 needs a different mechanism. A kernel call is generally not
2090 available, and it would also be slow. Different SH4 variants use
2091 different sizes and associativities of the Icache. We use a small
2092 bit of dispatch code that can be put hidden in every shared object,
2093 which calls the actual processor-specific invalidation code in a
2094 separate module.
2095 Or if you have operating system support, the OS could mmap the
2096 procesor-specific code from a single page, since it is highly
2097 repetitive. */
2098 .global GLOBAL(ic_invalidate)
2099 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2100 GLOBAL(ic_invalidate):
2101 #ifdef __pic__
2102 #ifdef __vxworks
2103 mov.l 1f,r1
2104 mov.l 2f,r0
2105 mov.l @r1,r1
2106 mov.l 0f,r2
2107 mov.l @(r0,r1),r0
2108 #else
2109 mov.l 1f,r1
2110 mova 1f,r0
2111 mov.l 0f,r2
2112 add r1,r0
2113 #endif
2114 mov.l @(r0,r2),r1
2115 #else
2116 mov.l 0f,r1
2117 #endif
2118 ocbwb @r4
2119 mov.l @(8,r1),r0
2120 sub r1,r4
2121 and r4,r0
2122 add r1,r0
2123 jmp @r0
2124 mov.l @(4,r1),r0
2125 .align 2
2126 #ifndef __pic__
2127 0: .long GLOBAL(ic_invalidate_array)
2128 #else /* __pic__ */
2129 .global GLOBAL(ic_invalidate_array)
2130 0: .long GLOBAL(ic_invalidate_array)@GOT
2131 #ifdef __vxworks
2132 1: .long ___GOTT_BASE__
2133 2: .long ___GOTT_INDEX__
2134 #else
2135 1: .long _GLOBAL_OFFSET_TABLE_
2136 #endif
2137 ENDFUNC(GLOBAL(ic_invalidate))
2138 #endif /* __pic__ */
2139 #endif /* SH4 */
2140 #endif /* L_ic_invalidate */
2141
2142 #ifdef L_ic_invalidate_array
2143 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2144 .global GLOBAL(ic_invalidate_array)
2145 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2146 .global GLOBAL(ic_invalidate_array)
2147 FUNC(GLOBAL(ic_invalidate_array))
2148 GLOBAL(ic_invalidate_array):
2149 add r1,r4
2150 synco
2151 icbi @r4
2152 rts
2153 nop
2154 .align 2
2155 .long 0
2156 ENDFUNC(GLOBAL(ic_invalidate_array))
2157 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2158 .global GLOBAL(ic_invalidate_array)
2159 .p2align 5
2160 FUNC(GLOBAL(ic_invalidate_array))
2161 /* This must be aligned to the beginning of a cache line. */
2162 GLOBAL(ic_invalidate_array):
2163 #ifndef WAYS
2164 #define WAYS 4
2165 #define WAY_SIZE 0x4000
2166 #endif
2167 #if WAYS == 1
2168 .rept WAY_SIZE * WAYS / 32
2169 rts
2170 nop
2171 .rept 7
2172 .long WAY_SIZE - 32
2173 .endr
2174 .endr
2175 #elif WAYS <= 6
2176 .rept WAY_SIZE * WAYS / 32
2177 braf r0
2178 add #-8,r0
2179 .long WAY_SIZE + 8
2180 .long WAY_SIZE - 32
2181 .rept WAYS-2
2182 braf r0
2183 nop
2184 .endr
2185 .rept 7 - WAYS
2186 rts
2187 nop
2188 .endr
2189 .endr
2190 #else /* WAYS > 6 */
2191 /* This variant needs two different pages for mmap-ing. */
2192 .rept WAYS-1
2193 .rept WAY_SIZE / 32
2194 braf r0
2195 nop
2196 .long WAY_SIZE
2197 .rept 6
2198 .long WAY_SIZE - 32
2199 .endr
2200 .endr
2201 .endr
2202 .rept WAY_SIZE / 32
2203 rts
2204 .rept 15
2205 nop
2206 .endr
2207 .endr
2208 #endif /* WAYS */
2209 ENDFUNC(GLOBAL(ic_invalidate_array))
2210 #endif /* SH4 */
2211 #endif /* L_ic_invalidate_array */
2212
2213 #if defined (__SH5__) && __SH5__ == 32
2214 #ifdef L_shcompact_call_trampoline
2215 .section .rodata
2216 .align 1
2217 LOCAL(ct_main_table):
2218 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2219 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2220 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2221 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2222 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2223 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2224 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2225 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2226 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2227 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2228 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2229 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2230 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2231 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2232 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2233 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2234 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2235 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2236 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2237 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2238 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2239 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2240 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2241 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2242 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2243 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2244 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2245 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2246 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2247 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2248 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2249 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2250 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2251 .mode SHmedia
2252 .section .text..SHmedia32, "ax"
2253 .align 2
2254
2255 /* This function loads 64-bit general-purpose registers from the
2256 stack, from a memory address contained in them or from an FP
2257 register, according to a cookie passed in r1. Its execution
2258 time is linear on the number of registers that actually have
2259 to be copied. See sh.h for details on the actual bit pattern.
2260
2261 The function to be called is passed in r0. If a 32-bit return
2262 value is expected, the actual function will be tail-called,
2263 otherwise the return address will be stored in r10 (that the
2264 caller should expect to be clobbered) and the return value
2265 will be expanded into r2/r3 upon return. */
2266
2267 .global GLOBAL(GCC_shcompact_call_trampoline)
2268 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2269 GLOBAL(GCC_shcompact_call_trampoline):
2270 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2271 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2272 pt/l LOCAL(ct_loop), tr1
2273 addz.l r1, r63, r1
2274 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2275 LOCAL(ct_loop):
2276 nsb r1, r28
2277 shlli r28, 1, r29
2278 ldx.w r0, r29, r30
2279 LOCAL(ct_main_label):
2280 ptrel/l r30, tr2
2281 blink tr2, r63
2282 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2283 /* It must be dr0, so just do it. */
2284 fmov.dq dr0, r2
2285 movi 7, r30
2286 shlli r30, 29, r31
2287 andc r1, r31, r1
2288 blink tr1, r63
2289 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2290 /* It is either dr0 or dr2. */
2291 movi 7, r30
2292 shlri r1, 26, r32
2293 shlli r30, 26, r31
2294 andc r1, r31, r1
2295 fmov.dq dr0, r3
2296 beqi/l r32, 4, tr1
2297 fmov.dq dr2, r3
2298 blink tr1, r63
2299 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2300 shlri r1, 23 - 3, r34
2301 andi r34, 3 << 3, r33
2302 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2303 LOCAL(ct_r4_fp_base):
2304 ptrel/l r32, tr2
2305 movi 7, r30
2306 shlli r30, 23, r31
2307 andc r1, r31, r1
2308 blink tr2, r63
2309 LOCAL(ct_r4_fp_copy):
2310 fmov.dq dr0, r4
2311 blink tr1, r63
2312 fmov.dq dr2, r4
2313 blink tr1, r63
2314 fmov.dq dr4, r4
2315 blink tr1, r63
2316 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2317 shlri r1, 20 - 3, r34
2318 andi r34, 3 << 3, r33
2319 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2320 LOCAL(ct_r5_fp_base):
2321 ptrel/l r32, tr2
2322 movi 7, r30
2323 shlli r30, 20, r31
2324 andc r1, r31, r1
2325 blink tr2, r63
2326 LOCAL(ct_r5_fp_copy):
2327 fmov.dq dr0, r5
2328 blink tr1, r63
2329 fmov.dq dr2, r5
2330 blink tr1, r63
2331 fmov.dq dr4, r5
2332 blink tr1, r63
2333 fmov.dq dr6, r5
2334 blink tr1, r63
2335 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2336 /* It must be dr8. */
2337 fmov.dq dr8, r6
2338 movi 15, r30
2339 shlli r30, 16, r31
2340 andc r1, r31, r1
2341 blink tr1, r63
2342 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2343 shlri r1, 16 - 3, r34
2344 andi r34, 3 << 3, r33
2345 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2346 LOCAL(ct_r6_fp_base):
2347 ptrel/l r32, tr2
2348 movi 7, r30
2349 shlli r30, 16, r31
2350 andc r1, r31, r1
2351 blink tr2, r63
2352 LOCAL(ct_r6_fp_copy):
2353 fmov.dq dr0, r6
2354 blink tr1, r63
2355 fmov.dq dr2, r6
2356 blink tr1, r63
2357 fmov.dq dr4, r6
2358 blink tr1, r63
2359 fmov.dq dr6, r6
2360 blink tr1, r63
2361 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2362 /* It is either dr8 or dr10. */
2363 movi 15 << 12, r31
2364 shlri r1, 12, r32
2365 andc r1, r31, r1
2366 fmov.dq dr8, r7
2367 beqi/l r32, 8, tr1
2368 fmov.dq dr10, r7
2369 blink tr1, r63
2370 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2371 shlri r1, 12 - 3, r34
2372 andi r34, 3 << 3, r33
2373 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2374 LOCAL(ct_r7_fp_base):
2375 ptrel/l r32, tr2
2376 movi 7 << 12, r31
2377 andc r1, r31, r1
2378 blink tr2, r63
2379 LOCAL(ct_r7_fp_copy):
2380 fmov.dq dr0, r7
2381 blink tr1, r63
2382 fmov.dq dr2, r7
2383 blink tr1, r63
2384 fmov.dq dr4, r7
2385 blink tr1, r63
2386 fmov.dq dr6, r7
2387 blink tr1, r63
2388 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2389 /* It is either dr8 or dr10. */
2390 movi 15 << 8, r31
2391 andi r1, 1 << 8, r32
2392 andc r1, r31, r1
2393 fmov.dq dr8, r8
2394 beq/l r32, r63, tr1
2395 fmov.dq dr10, r8
2396 blink tr1, r63
2397 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2398 shlri r1, 8 - 3, r34
2399 andi r34, 3 << 3, r33
2400 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2401 LOCAL(ct_r8_fp_base):
2402 ptrel/l r32, tr2
2403 movi 7 << 8, r31
2404 andc r1, r31, r1
2405 blink tr2, r63
2406 LOCAL(ct_r8_fp_copy):
2407 fmov.dq dr0, r8
2408 blink tr1, r63
2409 fmov.dq dr2, r8
2410 blink tr1, r63
2411 fmov.dq dr4, r8
2412 blink tr1, r63
2413 fmov.dq dr6, r8
2414 blink tr1, r63
2415 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2416 /* It is either dr8 or dr10. */
2417 movi 15 << 4, r31
2418 andi r1, 1 << 4, r32
2419 andc r1, r31, r1
2420 fmov.dq dr8, r9
2421 beq/l r32, r63, tr1
2422 fmov.dq dr10, r9
2423 blink tr1, r63
2424 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2425 shlri r1, 4 - 3, r34
2426 andi r34, 3 << 3, r33
2427 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2428 LOCAL(ct_r9_fp_base):
2429 ptrel/l r32, tr2
2430 movi 7 << 4, r31
2431 andc r1, r31, r1
2432 blink tr2, r63
2433 LOCAL(ct_r9_fp_copy):
2434 fmov.dq dr0, r9
2435 blink tr1, r63
2436 fmov.dq dr2, r9
2437 blink tr1, r63
2438 fmov.dq dr4, r9
2439 blink tr1, r63
2440 fmov.dq dr6, r9
2441 blink tr1, r63
2442 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2443 pt/l LOCAL(ct_r2_load), tr2
2444 movi 3, r30
2445 shlli r30, 29, r31
2446 and r1, r31, r32
2447 andc r1, r31, r1
2448 beq/l r31, r32, tr2
2449 addi.l r2, 8, r3
2450 ldx.q r2, r63, r2
2451 /* Fall through. */
2452 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2453 pt/l LOCAL(ct_r3_load), tr2
2454 movi 3, r30
2455 shlli r30, 26, r31
2456 and r1, r31, r32
2457 andc r1, r31, r1
2458 beq/l r31, r32, tr2
2459 addi.l r3, 8, r4
2460 ldx.q r3, r63, r3
2461 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2462 pt/l LOCAL(ct_r4_load), tr2
2463 movi 3, r30
2464 shlli r30, 23, r31
2465 and r1, r31, r32
2466 andc r1, r31, r1
2467 beq/l r31, r32, tr2
2468 addi.l r4, 8, r5
2469 ldx.q r4, r63, r4
2470 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2471 pt/l LOCAL(ct_r5_load), tr2
2472 movi 3, r30
2473 shlli r30, 20, r31
2474 and r1, r31, r32
2475 andc r1, r31, r1
2476 beq/l r31, r32, tr2
2477 addi.l r5, 8, r6
2478 ldx.q r5, r63, r5
2479 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2480 pt/l LOCAL(ct_r6_load), tr2
2481 movi 3 << 16, r31
2482 and r1, r31, r32
2483 andc r1, r31, r1
2484 beq/l r31, r32, tr2
2485 addi.l r6, 8, r7
2486 ldx.q r6, r63, r6
2487 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2488 pt/l LOCAL(ct_r7_load), tr2
2489 movi 3 << 12, r31
2490 and r1, r31, r32
2491 andc r1, r31, r1
2492 beq/l r31, r32, tr2
2493 addi.l r7, 8, r8
2494 ldx.q r7, r63, r7
2495 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2496 pt/l LOCAL(ct_r8_load), tr2
2497 movi 3 << 8, r31
2498 and r1, r31, r32
2499 andc r1, r31, r1
2500 beq/l r31, r32, tr2
2501 addi.l r8, 8, r9
2502 ldx.q r8, r63, r8
2503 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2504 pt/l LOCAL(ct_check_tramp), tr2
2505 ldx.q r9, r63, r9
2506 blink tr2, r63
2507 LOCAL(ct_r2_load):
2508 ldx.q r2, r63, r2
2509 blink tr1, r63
2510 LOCAL(ct_r3_load):
2511 ldx.q r3, r63, r3
2512 blink tr1, r63
2513 LOCAL(ct_r4_load):
2514 ldx.q r4, r63, r4
2515 blink tr1, r63
2516 LOCAL(ct_r5_load):
2517 ldx.q r5, r63, r5
2518 blink tr1, r63
2519 LOCAL(ct_r6_load):
2520 ldx.q r6, r63, r6
2521 blink tr1, r63
2522 LOCAL(ct_r7_load):
2523 ldx.q r7, r63, r7
2524 blink tr1, r63
2525 LOCAL(ct_r8_load):
2526 ldx.q r8, r63, r8
2527 blink tr1, r63
2528 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2529 movi 1, r30
2530 ldx.q r15, r63, r2
2531 shlli r30, 29, r31
2532 addi.l r15, 8, r15
2533 andc r1, r31, r1
2534 blink tr1, r63
2535 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2536 movi 1, r30
2537 ldx.q r15, r63, r3
2538 shlli r30, 26, r31
2539 addi.l r15, 8, r15
2540 andc r1, r31, r1
2541 blink tr1, r63
2542 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2543 movi 1, r30
2544 ldx.q r15, r63, r4
2545 shlli r30, 23, r31
2546 addi.l r15, 8, r15
2547 andc r1, r31, r1
2548 blink tr1, r63
2549 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2550 movi 1, r30
2551 ldx.q r15, r63, r5
2552 shlli r30, 20, r31
2553 addi.l r15, 8, r15
2554 andc r1, r31, r1
2555 blink tr1, r63
2556 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2557 movi 1, r30
2558 ldx.q r15, r63, r6
2559 shlli r30, 16, r31
2560 addi.l r15, 8, r15
2561 andc r1, r31, r1
2562 blink tr1, r63
2563 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2564 ldx.q r15, r63, r7
2565 movi 1 << 12, r31
2566 addi.l r15, 8, r15
2567 andc r1, r31, r1
2568 blink tr1, r63
2569 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2570 ldx.q r15, r63, r8
2571 movi 1 << 8, r31
2572 addi.l r15, 8, r15
2573 andc r1, r31, r1
2574 blink tr1, r63
2575 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2576 andi r1, 7 << 1, r30
2577 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2578 shlli r30, 2, r31
2579 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2580 sub.l r32, r31, r33
2581 ptabs/l r33, tr2
2582 blink tr2, r63
2583 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2584 ldx.q r15, r63, r3
2585 addi.l r15, 8, r15
2586 ldx.q r15, r63, r4
2587 addi.l r15, 8, r15
2588 ldx.q r15, r63, r5
2589 addi.l r15, 8, r15
2590 ldx.q r15, r63, r6
2591 addi.l r15, 8, r15
2592 ldx.q r15, r63, r7
2593 addi.l r15, 8, r15
2594 ldx.q r15, r63, r8
2595 addi.l r15, 8, r15
2596 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2597 ldx.q r15, r63, r9
2598 addi.l r15, 8, r15
2599 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2600 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2601 pt/u LOCAL(ct_ret_wide), tr2
2602 andi r1, 1, r1
2603 bne/u r1, r63, tr2
2604 LOCAL(ct_call_func): /* Just branch to the function. */
2605 blink tr0, r63
2606 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2607 64-bit return value. */
2608 add.l r18, r63, r10
2609 blink tr0, r18
2610 ptabs r10, tr0
2611 #if __LITTLE_ENDIAN__
2612 shari r2, 32, r3
2613 add.l r2, r63, r2
2614 #else
2615 add.l r2, r63, r3
2616 shari r2, 32, r2
2617 #endif
2618 blink tr0, r63
2619
2620 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2621 #endif /* L_shcompact_call_trampoline */
2622
2623 #ifdef L_shcompact_return_trampoline
2624 /* This function does the converse of the code in `ret_wide'
2625 above. It is tail-called by SHcompact functions returning
2626 64-bit non-floating-point values, to pack the 32-bit values in
2627 r2 and r3 into r2. */
2628
2629 .mode SHmedia
2630 .section .text..SHmedia32, "ax"
2631 .align 2
2632 .global GLOBAL(GCC_shcompact_return_trampoline)
2633 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2634 GLOBAL(GCC_shcompact_return_trampoline):
2635 ptabs/l r18, tr0
2636 #if __LITTLE_ENDIAN__
2637 addz.l r2, r63, r2
2638 shlli r3, 32, r3
2639 #else
2640 addz.l r3, r63, r3
2641 shlli r2, 32, r2
2642 #endif
2643 or r3, r2, r2
2644 blink tr0, r63
2645
2646 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2647 #endif /* L_shcompact_return_trampoline */
2648
2649 #ifdef L_shcompact_incoming_args
2650 .section .rodata
2651 .align 1
2652 LOCAL(ia_main_table):
2653 .word 1 /* Invalid, just loop */
2654 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2655 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2656 .word 1 /* Invalid, just loop */
2657 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2658 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2659 .word 1 /* Invalid, just loop */
2660 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2661 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2662 .word 1 /* Invalid, just loop */
2663 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2664 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2665 .word 1 /* Invalid, just loop */
2666 .word 1 /* Invalid, just loop */
2667 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2668 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2669 .word 1 /* Invalid, just loop */
2670 .word 1 /* Invalid, just loop */
2671 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2672 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2673 .word 1 /* Invalid, just loop */
2674 .word 1 /* Invalid, just loop */
2675 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2676 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2677 .word 1 /* Invalid, just loop */
2678 .word 1 /* Invalid, just loop */
2679 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2680 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2681 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2682 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2683 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2684 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2685 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2686 .mode SHmedia
2687 .section .text..SHmedia32, "ax"
2688 .align 2
2689
2690 /* This function stores 64-bit general-purpose registers back in
2691 the stack, and loads the address in which each register
2692 was stored into itself. The lower 32 bits of r17 hold the address
2693 to begin storing, and the upper 32 bits of r17 hold the cookie.
2694 Its execution time is linear on the
2695 number of registers that actually have to be copied, and it is
2696 optimized for structures larger than 64 bits, as opposed to
2697 individual `long long' arguments. See sh.h for details on the
2698 actual bit pattern. */
2699
2700 .global GLOBAL(GCC_shcompact_incoming_args)
2701 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2702 GLOBAL(GCC_shcompact_incoming_args):
2703 ptabs/l r18, tr0 /* Prepare to return. */
2704 shlri r17, 32, r0 /* Load the cookie. */
2705 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2706 pt/l LOCAL(ia_loop), tr1
2707 add.l r17, r63, r17
2708 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2709 LOCAL(ia_loop):
2710 nsb r0, r36
2711 shlli r36, 1, r37
2712 ldx.w r43, r37, r38
2713 LOCAL(ia_main_label):
2714 ptrel/l r38, tr2
2715 blink tr2, r63
2716 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2717 movi 3, r38
2718 shlli r38, 29, r39
2719 and r0, r39, r40
2720 andc r0, r39, r0
2721 stx.q r17, r63, r2
2722 add.l r17, r63, r2
2723 addi.l r17, 8, r17
2724 beq/u r39, r40, tr1
2725 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2726 movi 3, r38
2727 shlli r38, 26, r39
2728 and r0, r39, r40
2729 andc r0, r39, r0
2730 stx.q r17, r63, r3
2731 add.l r17, r63, r3
2732 addi.l r17, 8, r17
2733 beq/u r39, r40, tr1
2734 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2735 movi 3, r38
2736 shlli r38, 23, r39
2737 and r0, r39, r40
2738 andc r0, r39, r0
2739 stx.q r17, r63, r4
2740 add.l r17, r63, r4
2741 addi.l r17, 8, r17
2742 beq/u r39, r40, tr1
2743 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2744 movi 3, r38
2745 shlli r38, 20, r39
2746 and r0, r39, r40
2747 andc r0, r39, r0
2748 stx.q r17, r63, r5
2749 add.l r17, r63, r5
2750 addi.l r17, 8, r17
2751 beq/u r39, r40, tr1
2752 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2753 movi 3, r38
2754 shlli r38, 16, r39
2755 and r0, r39, r40
2756 andc r0, r39, r0
2757 stx.q r17, r63, r6
2758 add.l r17, r63, r6
2759 addi.l r17, 8, r17
2760 beq/u r39, r40, tr1
2761 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2762 movi 3 << 12, r39
2763 and r0, r39, r40
2764 andc r0, r39, r0
2765 stx.q r17, r63, r7
2766 add.l r17, r63, r7
2767 addi.l r17, 8, r17
2768 beq/u r39, r40, tr1
2769 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2770 movi 3 << 8, r39
2771 and r0, r39, r40
2772 andc r0, r39, r0
2773 stx.q r17, r63, r8
2774 add.l r17, r63, r8
2775 addi.l r17, 8, r17
2776 beq/u r39, r40, tr1
2777 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2778 stx.q r17, r63, r9
2779 add.l r17, r63, r9
2780 blink tr0, r63
2781 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2782 movi 1, r38
2783 shlli r38, 29, r39
2784 andc r0, r39, r0
2785 stx.q r17, r63, r2
2786 addi.l r17, 8, r17
2787 blink tr1, r63
2788 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2789 movi 1, r38
2790 shlli r38, 26, r39
2791 andc r0, r39, r0
2792 stx.q r17, r63, r3
2793 addi.l r17, 8, r17
2794 blink tr1, r63
2795 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2796 movi 1, r38
2797 shlli r38, 23, r39
2798 andc r0, r39, r0
2799 stx.q r17, r63, r4
2800 addi.l r17, 8, r17
2801 blink tr1, r63
2802 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2803 movi 1, r38
2804 shlli r38, 20, r39
2805 andc r0, r39, r0
2806 stx.q r17, r63, r5
2807 addi.l r17, 8, r17
2808 blink tr1, r63
2809 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2810 movi 1, r38
2811 shlli r38, 16, r39
2812 andc r0, r39, r0
2813 stx.q r17, r63, r6
2814 addi.l r17, 8, r17
2815 blink tr1, r63
2816 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2817 movi 1 << 12, r39
2818 andc r0, r39, r0
2819 stx.q r17, r63, r7
2820 addi.l r17, 8, r17
2821 blink tr1, r63
2822 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2823 movi 1 << 8, r39
2824 andc r0, r39, r0
2825 stx.q r17, r63, r8
2826 addi.l r17, 8, r17
2827 blink tr1, r63
2828 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2829 andi r0, 7 << 1, r38
2830 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2831 shlli r38, 2, r39
2832 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2833 sub.l r40, r39, r41
2834 ptabs/l r41, tr2
2835 blink tr2, r63
2836 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2837 stx.q r17, r63, r3
2838 addi.l r17, 8, r17
2839 stx.q r17, r63, r4
2840 addi.l r17, 8, r17
2841 stx.q r17, r63, r5
2842 addi.l r17, 8, r17
2843 stx.q r17, r63, r6
2844 addi.l r17, 8, r17
2845 stx.q r17, r63, r7
2846 addi.l r17, 8, r17
2847 stx.q r17, r63, r8
2848 addi.l r17, 8, r17
2849 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2850 stx.q r17, r63, r9
2851 LOCAL(ia_return): /* Return. */
2852 blink tr0, r63
2853 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2854 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2855 #endif /* L_shcompact_incoming_args */
2856 #endif
2857 #if __SH5__
2858 #ifdef L_nested_trampoline
2859 #if __SH5__ == 32
2860 .section .text..SHmedia32,"ax"
2861 #else
2862 .text
2863 #endif
2864 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2865 .global GLOBAL(GCC_nested_trampoline)
2866 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2867 GLOBAL(GCC_nested_trampoline):
2868 .mode SHmedia
2869 ptrel/u r63, tr0
2870 gettr tr0, r0
2871 #if __SH5__ == 64
2872 ld.q r0, 24, r1
2873 #else
2874 ld.l r0, 24, r1
2875 #endif
2876 ptabs/l r1, tr1
2877 #if __SH5__ == 64
2878 ld.q r0, 32, r1
2879 #else
2880 ld.l r0, 28, r1
2881 #endif
2882 blink tr1, r63
2883
2884 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2885 #endif /* L_nested_trampoline */
2886 #endif /* __SH5__ */
2887 #if __SH5__ == 32
2888 #ifdef L_push_pop_shmedia_regs
2889 .section .text..SHmedia32,"ax"
2890 .mode SHmedia
2891 .align 2
2892 #ifndef __SH4_NOFPU__
2893 .global GLOBAL(GCC_push_shmedia_regs)
2894 FUNC(GLOBAL(GCC_push_shmedia_regs))
2895 GLOBAL(GCC_push_shmedia_regs):
2896 addi.l r15, -14*8, r15
2897 fst.d r15, 13*8, dr62
2898 fst.d r15, 12*8, dr60
2899 fst.d r15, 11*8, dr58
2900 fst.d r15, 10*8, dr56
2901 fst.d r15, 9*8, dr54
2902 fst.d r15, 8*8, dr52
2903 fst.d r15, 7*8, dr50
2904 fst.d r15, 6*8, dr48
2905 fst.d r15, 5*8, dr46
2906 fst.d r15, 4*8, dr44
2907 fst.d r15, 3*8, dr42
2908 fst.d r15, 2*8, dr40
2909 fst.d r15, 1*8, dr38
2910 fst.d r15, 0*8, dr36
2911 #else /* ! __SH4_NOFPU__ */
2912 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2913 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2914 GLOBAL(GCC_push_shmedia_regs_nofpu):
2915 #endif /* ! __SH4_NOFPU__ */
2916 ptabs/l r18, tr0
2917 addi.l r15, -27*8, r15
2918 gettr tr7, r62
2919 gettr tr6, r61
2920 gettr tr5, r60
2921 st.q r15, 26*8, r62
2922 st.q r15, 25*8, r61
2923 st.q r15, 24*8, r60
2924 st.q r15, 23*8, r59
2925 st.q r15, 22*8, r58
2926 st.q r15, 21*8, r57
2927 st.q r15, 20*8, r56
2928 st.q r15, 19*8, r55
2929 st.q r15, 18*8, r54
2930 st.q r15, 17*8, r53
2931 st.q r15, 16*8, r52
2932 st.q r15, 15*8, r51
2933 st.q r15, 14*8, r50
2934 st.q r15, 13*8, r49
2935 st.q r15, 12*8, r48
2936 st.q r15, 11*8, r47
2937 st.q r15, 10*8, r46
2938 st.q r15, 9*8, r45
2939 st.q r15, 8*8, r44
2940 st.q r15, 7*8, r35
2941 st.q r15, 6*8, r34
2942 st.q r15, 5*8, r33
2943 st.q r15, 4*8, r32
2944 st.q r15, 3*8, r31
2945 st.q r15, 2*8, r30
2946 st.q r15, 1*8, r29
2947 st.q r15, 0*8, r28
2948 blink tr0, r63
2949 #ifndef __SH4_NOFPU__
2950 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2951 #else
2952 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2953 #endif
2954 #ifndef __SH4_NOFPU__
2955 .global GLOBAL(GCC_pop_shmedia_regs)
2956 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2957 GLOBAL(GCC_pop_shmedia_regs):
2958 pt .L0, tr1
2959 movi 41*8, r0
2960 fld.d r15, 40*8, dr62
2961 fld.d r15, 39*8, dr60
2962 fld.d r15, 38*8, dr58
2963 fld.d r15, 37*8, dr56
2964 fld.d r15, 36*8, dr54
2965 fld.d r15, 35*8, dr52
2966 fld.d r15, 34*8, dr50
2967 fld.d r15, 33*8, dr48
2968 fld.d r15, 32*8, dr46
2969 fld.d r15, 31*8, dr44
2970 fld.d r15, 30*8, dr42
2971 fld.d r15, 29*8, dr40
2972 fld.d r15, 28*8, dr38
2973 fld.d r15, 27*8, dr36
2974 blink tr1, r63
2975 #else /* ! __SH4_NOFPU__ */
2976 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2977 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2978 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2979 #endif /* ! __SH4_NOFPU__ */
2980 movi 27*8, r0
2981 .L0:
2982 ptabs r18, tr0
2983 ld.q r15, 26*8, r62
2984 ld.q r15, 25*8, r61
2985 ld.q r15, 24*8, r60
2986 ptabs r62, tr7
2987 ptabs r61, tr6
2988 ptabs r60, tr5
2989 ld.q r15, 23*8, r59
2990 ld.q r15, 22*8, r58
2991 ld.q r15, 21*8, r57
2992 ld.q r15, 20*8, r56
2993 ld.q r15, 19*8, r55
2994 ld.q r15, 18*8, r54
2995 ld.q r15, 17*8, r53
2996 ld.q r15, 16*8, r52
2997 ld.q r15, 15*8, r51
2998 ld.q r15, 14*8, r50
2999 ld.q r15, 13*8, r49
3000 ld.q r15, 12*8, r48
3001 ld.q r15, 11*8, r47
3002 ld.q r15, 10*8, r46
3003 ld.q r15, 9*8, r45
3004 ld.q r15, 8*8, r44
3005 ld.q r15, 7*8, r35
3006 ld.q r15, 6*8, r34
3007 ld.q r15, 5*8, r33
3008 ld.q r15, 4*8, r32
3009 ld.q r15, 3*8, r31
3010 ld.q r15, 2*8, r30
3011 ld.q r15, 1*8, r29
3012 ld.q r15, 0*8, r28
3013 add.l r15, r0, r15
3014 blink tr0, r63
3015
3016 #ifndef __SH4_NOFPU__
3017 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3018 #else
3019 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3020 #endif
3021 #endif /* __SH5__ == 32 */
3022 #endif /* L_push_pop_shmedia_regs */
3023
3024 #ifdef L_div_table
3025 #if __SH5__
3026 #if defined(__pic__) && defined(__SHMEDIA__)
3027 .global GLOBAL(sdivsi3)
3028 FUNC(GLOBAL(sdivsi3))
3029 #if __SH5__ == 32
3030 .section .text..SHmedia32,"ax"
3031 #else
3032 .text
3033 #endif
3034 #if 0
3035 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3036 in a text section does not work (at least for shared libraries):
3037 the linker sets the LSB of the address as if this was SHmedia code. */
3038 #define TEXT_DATA_BUG
3039 #endif
3040 .align 2
3041 // inputs: r4,r5
3042 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3043 // result in r0
3044 .global GLOBAL(sdivsi3)
3045 GLOBAL(sdivsi3):
3046 #ifdef TEXT_DATA_BUG
3047 ptb datalabel Local_div_table,tr0
3048 #else
3049 ptb GLOBAL(div_table_internal),tr0
3050 #endif
3051 nsb r5, r1
3052 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3053 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3054 /* bubble */
3055 gettr tr0,r20
3056 ldx.ub r20, r21, r19 // u0.8
3057 shari r25, 32, r25 // normalize to s2.30
3058 shlli r21, 1, r21
3059 muls.l r25, r19, r19 // s2.38
3060 ldx.w r20, r21, r21 // s2.14
3061 ptabs r18, tr0
3062 shari r19, 24, r19 // truncate to s2.14
3063 sub r21, r19, r19 // some 11 bit inverse in s1.14
3064 muls.l r19, r19, r21 // u0.28
3065 sub r63, r1, r1
3066 addi r1, 92, r1
3067 muls.l r25, r21, r18 // s2.58
3068 shlli r19, 45, r19 // multiply by two and convert to s2.58
3069 /* bubble */
3070 sub r19, r18, r18
3071 shari r18, 28, r18 // some 22 bit inverse in s1.30
3072 muls.l r18, r25, r0 // s2.60
3073 muls.l r18, r4, r25 // s32.30
3074 /* bubble */
3075 shari r0, 16, r19 // s-16.44
3076 muls.l r19, r18, r19 // s-16.74
3077 shari r25, 63, r0
3078 shari r4, 14, r18 // s19.-14
3079 shari r19, 30, r19 // s-16.44
3080 muls.l r19, r18, r19 // s15.30
3081 xor r21, r0, r21 // You could also use the constant 1 << 27.
3082 add r21, r25, r21
3083 sub r21, r19, r21
3084 shard r21, r1, r21
3085 sub r21, r0, r0
3086 blink tr0, r63
3087 ENDFUNC(GLOBAL(sdivsi3))
3088 /* This table has been generated by divtab.c .
3089 Defects for bias -330:
3090 Max defect: 6.081536e-07 at -1.000000e+00
3091 Min defect: 2.849516e-08 at 1.030651e+00
3092 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3093 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3094 Defect at 1: 1.238659e-07
3095 Defect at -2: 1.061708e-07 */
3096 #else /* ! __pic__ || ! __SHMEDIA__ */
3097 .section .rodata
3098 #endif /* __pic__ */
3099 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3100 .balign 2
3101 .type Local_div_table,@object
3102 .size Local_div_table,128
3103 /* negative division constants */
3104 .word -16638
3105 .word -17135
3106 .word -17737
3107 .word -18433
3108 .word -19103
3109 .word -19751
3110 .word -20583
3111 .word -21383
3112 .word -22343
3113 .word -23353
3114 .word -24407
3115 .word -25582
3116 .word -26863
3117 .word -28382
3118 .word -29965
3119 .word -31800
3120 /* negative division factors */
3121 .byte 66
3122 .byte 70
3123 .byte 75
3124 .byte 81
3125 .byte 87
3126 .byte 93
3127 .byte 101
3128 .byte 109
3129 .byte 119
3130 .byte 130
3131 .byte 142
3132 .byte 156
3133 .byte 172
3134 .byte 192
3135 .byte 214
3136 .byte 241
3137 .skip 16
3138 Local_div_table:
3139 .skip 16
3140 /* positive division factors */
3141 .byte 241
3142 .byte 214
3143 .byte 192
3144 .byte 172
3145 .byte 156
3146 .byte 142
3147 .byte 130
3148 .byte 119
3149 .byte 109
3150 .byte 101
3151 .byte 93
3152 .byte 87
3153 .byte 81
3154 .byte 75
3155 .byte 70
3156 .byte 66
3157 /* positive division constants */
3158 .word 31801
3159 .word 29966
3160 .word 28383
3161 .word 26864
3162 .word 25583
3163 .word 24408
3164 .word 23354
3165 .word 22344
3166 .word 21384
3167 .word 20584
3168 .word 19752
3169 .word 19104
3170 .word 18434
3171 .word 17738
3172 .word 17136
3173 .word 16639
3174 .section .rodata
3175 #endif /* TEXT_DATA_BUG */
3176 .balign 2
3177 .type GLOBAL(div_table),@object
3178 .size GLOBAL(div_table),128
3179 /* negative division constants */
3180 .word -16638
3181 .word -17135
3182 .word -17737
3183 .word -18433
3184 .word -19103
3185 .word -19751
3186 .word -20583
3187 .word -21383
3188 .word -22343
3189 .word -23353
3190 .word -24407
3191 .word -25582
3192 .word -26863
3193 .word -28382
3194 .word -29965
3195 .word -31800
3196 /* negative division factors */
3197 .byte 66
3198 .byte 70
3199 .byte 75
3200 .byte 81
3201 .byte 87
3202 .byte 93
3203 .byte 101
3204 .byte 109
3205 .byte 119
3206 .byte 130
3207 .byte 142
3208 .byte 156
3209 .byte 172
3210 .byte 192
3211 .byte 214
3212 .byte 241
3213 .skip 16
3214 .global GLOBAL(div_table)
3215 GLOBAL(div_table):
3216 HIDDEN_ALIAS(div_table_internal,div_table)
3217 .skip 16
3218 /* positive division factors */
3219 .byte 241
3220 .byte 214
3221 .byte 192
3222 .byte 172
3223 .byte 156
3224 .byte 142
3225 .byte 130
3226 .byte 119
3227 .byte 109
3228 .byte 101
3229 .byte 93
3230 .byte 87
3231 .byte 81
3232 .byte 75
3233 .byte 70
3234 .byte 66
3235 /* positive division constants */
3236 .word 31801
3237 .word 29966
3238 .word 28383
3239 .word 26864
3240 .word 25583
3241 .word 24408
3242 .word 23354
3243 .word 22344
3244 .word 21384
3245 .word 20584
3246 .word 19752
3247 .word 19104
3248 .word 18434
3249 .word 17738
3250 .word 17136
3251 .word 16639
3252
3253 #elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3254 /* This code used shld, thus is not suitable for SH1 / SH2. */
3255
3256 /* Signed / unsigned division without use of FPU, optimized for SH4.
3257 Uses a lookup table for divisors in the range -128 .. +128, and
3258 div1 with case distinction for larger divisors in three more ranges.
3259 The code is lumped together with the table to allow the use of mova. */
3260 #ifdef __LITTLE_ENDIAN__
3261 #define L_LSB 0
3262 #define L_LSWMSB 1
3263 #define L_MSWLSB 2
3264 #else
3265 #define L_LSB 3
3266 #define L_LSWMSB 2
3267 #define L_MSWLSB 1
3268 #endif
3269
3270 .balign 4
3271 .global GLOBAL(udivsi3_i4i)
3272 FUNC(GLOBAL(udivsi3_i4i))
3273 GLOBAL(udivsi3_i4i):
3274 mov.w LOCAL(c128_w), r1
3275 div0u
3276 mov r4,r0
3277 shlr8 r0
3278 cmp/hi r1,r5
3279 extu.w r5,r1
3280 bf LOCAL(udiv_le128)
3281 cmp/eq r5,r1
3282 bf LOCAL(udiv_ge64k)
3283 shlr r0
3284 mov r5,r1
3285 shll16 r5
3286 mov.l r4,@-r15
3287 div1 r5,r0
3288 mov.l r1,@-r15
3289 div1 r5,r0
3290 div1 r5,r0
3291 bra LOCAL(udiv_25)
3292 div1 r5,r0
3293
3294 LOCAL(div_le128):
3295 mova LOCAL(div_table_ix),r0
3296 bra LOCAL(div_le128_2)
3297 mov.b @(r0,r5),r1
3298 LOCAL(udiv_le128):
3299 mov.l r4,@-r15
3300 mova LOCAL(div_table_ix),r0
3301 mov.b @(r0,r5),r1
3302 mov.l r5,@-r15
3303 LOCAL(div_le128_2):
3304 mova LOCAL(div_table_inv),r0
3305 mov.l @(r0,r1),r1
3306 mov r5,r0
3307 tst #0xfe,r0
3308 mova LOCAL(div_table_clz),r0
3309 dmulu.l r1,r4
3310 mov.b @(r0,r5),r1
3311 bt/s LOCAL(div_by_1)
3312 mov r4,r0
3313 mov.l @r15+,r5
3314 sts mach,r0
3315 /* clrt */
3316 addc r4,r0
3317 mov.l @r15+,r4
3318 rotcr r0
3319 rts
3320 shld r1,r0
3321
3322 LOCAL(div_by_1_neg):
3323 neg r4,r0
3324 LOCAL(div_by_1):
3325 mov.l @r15+,r5
3326 rts
3327 mov.l @r15+,r4
3328
3329 LOCAL(div_ge64k):
3330 bt/s LOCAL(div_r8)
3331 div0u
3332 shll8 r5
3333 bra LOCAL(div_ge64k_2)
3334 div1 r5,r0
3335 LOCAL(udiv_ge64k):
3336 cmp/hi r0,r5
3337 mov r5,r1
3338 bt LOCAL(udiv_r8)
3339 shll8 r5
3340 mov.l r4,@-r15
3341 div1 r5,r0
3342 mov.l r1,@-r15
3343 LOCAL(div_ge64k_2):
3344 div1 r5,r0
3345 mov.l LOCAL(zero_l),r1
3346 .rept 4
3347 div1 r5,r0
3348 .endr
3349 mov.l r1,@-r15
3350 div1 r5,r0
3351 mov.w LOCAL(m256_w),r1
3352 div1 r5,r0
3353 mov.b r0,@(L_LSWMSB,r15)
3354 xor r4,r0
3355 and r1,r0
3356 bra LOCAL(div_ge64k_end)
3357 xor r4,r0
3358
3359 LOCAL(div_r8):
3360 shll16 r4
3361 bra LOCAL(div_r8_2)
3362 shll8 r4
3363 LOCAL(udiv_r8):
3364 mov.l r4,@-r15
3365 shll16 r4
3366 clrt
3367 shll8 r4
3368 mov.l r5,@-r15
3369 LOCAL(div_r8_2):
3370 rotcl r4
3371 mov r0,r1
3372 div1 r5,r1
3373 mov r4,r0
3374 rotcl r0
3375 mov r5,r4
3376 div1 r5,r1
3377 .rept 5
3378 rotcl r0; div1 r5,r1
3379 .endr
3380 rotcl r0
3381 mov.l @r15+,r5
3382 div1 r4,r1
3383 mov.l @r15+,r4
3384 rts
3385 rotcl r0
3386
3387 ENDFUNC(GLOBAL(udivsi3_i4i))
3388
3389 .global GLOBAL(sdivsi3_i4i)
3390 FUNC(GLOBAL(sdivsi3_i4i))
3391 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3392 but we effectively clobber only r1. */
3393 GLOBAL(sdivsi3_i4i):
3394 mov.l r4,@-r15
3395 cmp/pz r5
3396 mov.w LOCAL(c128_w), r1
3397 bt/s LOCAL(pos_divisor)
3398 cmp/pz r4
3399 mov.l r5,@-r15
3400 neg r5,r5
3401 bt/s LOCAL(neg_result)
3402 cmp/hi r1,r5
3403 neg r4,r4
3404 LOCAL(pos_result):
3405 extu.w r5,r0
3406 bf LOCAL(div_le128)
3407 cmp/eq r5,r0
3408 mov r4,r0
3409 shlr8 r0
3410 bf/s LOCAL(div_ge64k)
3411 cmp/hi r0,r5
3412 div0u
3413 shll16 r5
3414 div1 r5,r0
3415 div1 r5,r0
3416 div1 r5,r0
3417 LOCAL(udiv_25):
3418 mov.l LOCAL(zero_l),r1
3419 div1 r5,r0
3420 div1 r5,r0
3421 mov.l r1,@-r15
3422 .rept 3
3423 div1 r5,r0
3424 .endr
3425 mov.b r0,@(L_MSWLSB,r15)
3426 xtrct r4,r0
3427 swap.w r0,r0
3428 .rept 8
3429 div1 r5,r0
3430 .endr
3431 mov.b r0,@(L_LSWMSB,r15)
3432 LOCAL(div_ge64k_end):
3433 .rept 8
3434 div1 r5,r0
3435 .endr
3436 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3437 extu.b r0,r0
3438 mov.l @r15+,r5
3439 or r4,r0
3440 mov.l @r15+,r4
3441 rts
3442 rotcl r0
3443
3444 LOCAL(div_le128_neg):
3445 tst #0xfe,r0
3446 mova LOCAL(div_table_ix),r0
3447 mov.b @(r0,r5),r1
3448 mova LOCAL(div_table_inv),r0
3449 bt/s LOCAL(div_by_1_neg)
3450 mov.l @(r0,r1),r1
3451 mova LOCAL(div_table_clz),r0
3452 dmulu.l r1,r4
3453 mov.b @(r0,r5),r1
3454 mov.l @r15+,r5
3455 sts mach,r0
3456 /* clrt */
3457 addc r4,r0
3458 mov.l @r15+,r4
3459 rotcr r0
3460 shld r1,r0
3461 rts
3462 neg r0,r0
3463
3464 LOCAL(pos_divisor):
3465 mov.l r5,@-r15
3466 bt/s LOCAL(pos_result)
3467 cmp/hi r1,r5
3468 neg r4,r4
3469 LOCAL(neg_result):
3470 extu.w r5,r0
3471 bf LOCAL(div_le128_neg)
3472 cmp/eq r5,r0
3473 mov r4,r0
3474 shlr8 r0
3475 bf/s LOCAL(div_ge64k_neg)
3476 cmp/hi r0,r5
3477 div0u
3478 mov.l LOCAL(zero_l),r1
3479 shll16 r5
3480 div1 r5,r0
3481 mov.l r1,@-r15
3482 .rept 7
3483 div1 r5,r0
3484 .endr
3485 mov.b r0,@(L_MSWLSB,r15)
3486 xtrct r4,r0
3487 swap.w r0,r0
3488 .rept 8
3489 div1 r5,r0
3490 .endr
3491 mov.b r0,@(L_LSWMSB,r15)
3492 LOCAL(div_ge64k_neg_end):
3493 .rept 8
3494 div1 r5,r0
3495 .endr
3496 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3497 extu.b r0,r1
3498 mov.l @r15+,r5
3499 or r4,r1
3500 LOCAL(div_r8_neg_end):
3501 mov.l @r15+,r4
3502 rotcl r1
3503 rts
3504 neg r1,r0
3505
3506 LOCAL(div_ge64k_neg):
3507 bt/s LOCAL(div_r8_neg)
3508 div0u
3509 shll8 r5
3510 mov.l LOCAL(zero_l),r1
3511 .rept 6
3512 div1 r5,r0
3513 .endr
3514 mov.l r1,@-r15
3515 div1 r5,r0
3516 mov.w LOCAL(m256_w),r1
3517 div1 r5,r0
3518 mov.b r0,@(L_LSWMSB,r15)
3519 xor r4,r0
3520 and r1,r0
3521 bra LOCAL(div_ge64k_neg_end)
3522 xor r4,r0
3523
3524 LOCAL(c128_w):
3525 .word 128
3526
3527 LOCAL(div_r8_neg):
3528 clrt
3529 shll16 r4
3530 mov r4,r1
3531 shll8 r1
3532 mov r5,r4
3533 .rept 7
3534 rotcl r1; div1 r5,r0
3535 .endr
3536 mov.l @r15+,r5
3537 rotcl r1
3538 bra LOCAL(div_r8_neg_end)
3539 div1 r4,r0
3540
3541 LOCAL(m256_w):
3542 .word 0xff00
3543 /* This table has been generated by divtab-sh4.c. */
3544 .balign 4
3545 LOCAL(div_table_clz):
3546 .byte 0
3547 .byte 1
3548 .byte 0
3549 .byte -1
3550 .byte -1
3551 .byte -2
3552 .byte -2
3553 .byte -2
3554 .byte -2
3555 .byte -3
3556 .byte -3
3557 .byte -3
3558 .byte -3
3559 .byte -3
3560 .byte -3
3561 .byte -3
3562 .byte -3
3563 .byte -4
3564 .byte -4
3565 .byte -4
3566 .byte -4
3567 .byte -4
3568 .byte -4
3569 .byte -4
3570 .byte -4
3571 .byte -4
3572 .byte -4
3573 .byte -4
3574 .byte -4
3575 .byte -4
3576 .byte -4
3577 .byte -4
3578 .byte -4
3579 .byte -5
3580 .byte -5
3581 .byte -5
3582 .byte -5
3583 .byte -5
3584 .byte -5
3585 .byte -5
3586 .byte -5
3587 .byte -5
3588 .byte -5
3589 .byte -5
3590 .byte -5
3591 .byte -5
3592 .byte -5
3593 .byte -5
3594 .byte -5
3595 .byte -5
3596 .byte -5
3597 .byte -5
3598 .byte -5
3599 .byte -5
3600 .byte -5
3601 .byte -5
3602 .byte -5
3603 .byte -5
3604 .byte -5
3605 .byte -5
3606 .byte -5
3607 .byte -5
3608 .byte -5
3609 .byte -5
3610 .byte -5
3611 .byte -6
3612 .byte -6
3613 .byte -6
3614 .byte -6
3615 .byte -6
3616 .byte -6
3617 .byte -6
3618 .byte -6
3619 .byte -6
3620 .byte -6
3621 .byte -6
3622 .byte -6
3623 .byte -6
3624 .byte -6
3625 .byte -6
3626 .byte -6
3627 .byte -6
3628 .byte -6
3629 .byte -6
3630 .byte -6
3631 .byte -6
3632 .byte -6
3633 .byte -6
3634 .byte -6
3635 .byte -6
3636 .byte -6
3637 .byte -6
3638 .byte -6
3639 .byte -6
3640 .byte -6
3641 .byte -6
3642 .byte -6
3643 .byte -6
3644 .byte -6
3645 .byte -6
3646 .byte -6
3647 .byte -6
3648 .byte -6
3649 .byte -6
3650 .byte -6
3651 .byte -6
3652 .byte -6
3653 .byte -6
3654 .byte -6
3655 .byte -6
3656 .byte -6
3657 .byte -6
3658 .byte -6
3659 .byte -6
3660 .byte -6
3661 .byte -6
3662 .byte -6
3663 .byte -6
3664 .byte -6
3665 .byte -6
3666 .byte -6
3667 .byte -6
3668 .byte -6
3669 .byte -6
3670 .byte -6
3671 .byte -6
3672 .byte -6
3673 .byte -6
3674 /* Lookup table translating positive divisor to index into table of
3675 normalized inverse. N.B. the '0' entry is also the last entry of the
3676 previous table, and causes an unaligned access for division by zero. */
3677 LOCAL(div_table_ix):
3678 .byte -6
3679 .byte -128
3680 .byte -128
3681 .byte 0
3682 .byte -128
3683 .byte -64
3684 .byte 0
3685 .byte 64
3686 .byte -128
3687 .byte -96
3688 .byte -64
3689 .byte -32
3690 .byte 0
3691 .byte 32
3692 .byte 64
3693 .byte 96
3694 .byte -128
3695 .byte -112
3696 .byte -96
3697 .byte -80
3698 .byte -64
3699 .byte -48
3700 .byte -32
3701 .byte -16
3702 .byte 0
3703 .byte 16
3704 .byte 32
3705 .byte 48
3706 .byte 64
3707 .byte 80
3708 .byte 96
3709 .byte 112
3710 .byte -128
3711 .byte -120
3712 .byte -112
3713 .byte -104
3714 .byte -96
3715 .byte -88
3716 .byte -80
3717 .byte -72
3718 .byte -64
3719 .byte -56
3720 .byte -48
3721 .byte -40
3722 .byte -32
3723 .byte -24
3724 .byte -16
3725 .byte -8
3726 .byte 0
3727 .byte 8
3728 .byte 16
3729 .byte 24
3730 .byte 32
3731 .byte 40
3732 .byte 48
3733 .byte 56
3734 .byte 64
3735 .byte 72
3736 .byte 80
3737 .byte 88
3738 .byte 96
3739 .byte 104
3740 .byte 112
3741 .byte 120
3742 .byte -128
3743 .byte -124
3744 .byte -120
3745 .byte -116
3746 .byte -112
3747 .byte -108
3748 .byte -104
3749 .byte -100
3750 .byte -96
3751 .byte -92
3752 .byte -88
3753 .byte -84
3754 .byte -80
3755 .byte -76
3756 .byte -72
3757 .byte -68
3758 .byte -64
3759 .byte -60
3760 .byte -56
3761 .byte -52
3762 .byte -48
3763 .byte -44
3764 .byte -40
3765 .byte -36
3766 .byte -32
3767 .byte -28
3768 .byte -24
3769 .byte -20
3770 .byte -16
3771 .byte -12
3772 .byte -8
3773 .byte -4
3774 .byte 0
3775 .byte 4
3776 .byte 8
3777 .byte 12
3778 .byte 16
3779 .byte 20
3780 .byte 24
3781 .byte 28
3782 .byte 32
3783 .byte 36
3784 .byte 40
3785 .byte 44
3786 .byte 48
3787 .byte 52
3788 .byte 56
3789 .byte 60
3790 .byte 64
3791 .byte 68
3792 .byte 72
3793 .byte 76
3794 .byte 80
3795 .byte 84
3796 .byte 88
3797 .byte 92
3798 .byte 96
3799 .byte 100
3800 .byte 104
3801 .byte 108
3802 .byte 112
3803 .byte 116
3804 .byte 120
3805 .byte 124
3806 .byte -128
3807 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3808 .balign 4
3809 LOCAL(zero_l):
3810 .long 0x0
3811 .long 0xF81F81F9
3812 .long 0xF07C1F08
3813 .long 0xE9131AC0
3814 .long 0xE1E1E1E2
3815 .long 0xDAE6076C
3816 .long 0xD41D41D5
3817 .long 0xCD856891
3818 .long 0xC71C71C8
3819 .long 0xC0E07039
3820 .long 0xBACF914D
3821 .long 0xB4E81B4F
3822 .long 0xAF286BCB
3823 .long 0xA98EF607
3824 .long 0xA41A41A5
3825 .long 0x9EC8E952
3826 .long 0x9999999A
3827 .long 0x948B0FCE
3828 .long 0x8F9C18FA
3829 .long 0x8ACB90F7
3830 .long 0x86186187
3831 .long 0x81818182
3832 .long 0x7D05F418
3833 .long 0x78A4C818
3834 .long 0x745D1746
3835 .long 0x702E05C1
3836 .long 0x6C16C16D
3837 .long 0x68168169
3838 .long 0x642C8591
3839 .long 0x60581606
3840 .long 0x5C9882BA
3841 .long 0x58ED2309
3842 LOCAL(div_table_inv):
3843 .long 0x55555556
3844 .long 0x51D07EAF
3845 .long 0x4E5E0A73
3846 .long 0x4AFD6A06
3847 .long 0x47AE147B
3848 .long 0x446F8657
3849 .long 0x41414142
3850 .long 0x3E22CBCF
3851 .long 0x3B13B13C
3852 .long 0x38138139
3853 .long 0x3521CFB3
3854 .long 0x323E34A3
3855 .long 0x2F684BDB
3856 .long 0x2C9FB4D9
3857 .long 0x29E4129F
3858 .long 0x27350B89
3859 .long 0x24924925
3860 .long 0x21FB7813
3861 .long 0x1F7047DD
3862 .long 0x1CF06ADB
3863 .long 0x1A7B9612
3864 .long 0x18118119
3865 .long 0x15B1E5F8
3866 .long 0x135C8114
3867 .long 0x11111112
3868 .long 0xECF56BF
3869 .long 0xC9714FC
3870 .long 0xA6810A7
3871 .long 0x8421085
3872 .long 0x624DD30
3873 .long 0x4104105
3874 .long 0x2040811
3875 /* maximum error: 0.987342 scaled: 0.921875*/
3876
3877 ENDFUNC(GLOBAL(sdivsi3_i4i))
3878 #endif /* SH3 / SH4 */
3879
3880 #endif /* L_div_table */
3881
3882 #ifdef L_udiv_qrnnd_16
3883 #if !__SHMEDIA__
3884 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3885 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3886 /* n1 < d, but n1 might be larger than d1. */
3887 .global GLOBAL(udiv_qrnnd_16)
3888 .balign 8
3889 GLOBAL(udiv_qrnnd_16):
3890 div0u
3891 cmp/hi r6,r0
3892 bt .Lots
3893 .rept 16
3894 div1 r6,r0
3895 .endr
3896 extu.w r0,r1
3897 bt 0f
3898 add r6,r0
3899 0: rotcl r1
3900 mulu.w r1,r5
3901 xtrct r4,r0
3902 swap.w r0,r0
3903 sts macl,r2
3904 cmp/hs r2,r0
3905 sub r2,r0
3906 bt 0f
3907 addc r5,r0
3908 add #-1,r1
3909 bt 0f
3910 1: add #-1,r1
3911 rts
3912 add r5,r0
3913 .balign 8
3914 .Lots:
3915 sub r5,r0
3916 swap.w r4,r1
3917 xtrct r0,r1
3918 clrt
3919 mov r1,r0
3920 addc r5,r0
3921 mov #-1,r1
3922 SL1(bf, 1b,
3923 shlr16 r1)
3924 0: rts
3925 nop
3926 ENDFUNC(GLOBAL(udiv_qrnnd_16))
3927 #endif /* !__SHMEDIA__ */
3928 #endif /* L_udiv_qrnnd_16 */