* Fix typos. Remove trailing blanks. Fix coding style.
[gcc.git] / libgcc / config / avr / lib1funcs-fixed.S
1 /* -*- Mode: Asm -*- */
2 ;; Copyright (C) 2012-2013
3 ;; Free Software Foundation, Inc.
4 ;; Contributed by Sean D'Epagnier (sean@depagnier.com)
5 ;; Georg-Johann Lay (avr@gjlay.de)
6
7 ;; This file is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by the
9 ;; Free Software Foundation; either version 3, or (at your option) any
10 ;; later version.
11
12 ;; In addition to the permissions in the GNU General Public License, the
13 ;; Free Software Foundation gives you unlimited permission to link the
14 ;; compiled version of this file into combinations with other programs,
15 ;; and to distribute those combinations without any restriction coming
16 ;; from the use of this file. (The General Public License restrictions
17 ;; do apply in other respects; for example, they cover modification of
18 ;; the file, and distribution when not linked into a combine
19 ;; executable.)
20
21 ;; This file is distributed in the hope that it will be useful, but
22 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 ;; General Public License for more details.
25
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with this program; see the file COPYING. If not, write to
28 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
29 ;; Boston, MA 02110-1301, USA.
30
31 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 ;; Fixed point library routines for AVR
33 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34
35 .section .text.libgcc.fixed, "ax", @progbits
36
37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38 ;; Conversions to float
39 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
40
41 #if defined (L_fractqqsf)
42 DEFUN __fractqqsf
43 ;; Move in place for SA -> SF conversion
44 clr r22
45 mov r23, r24
46 ;; Sign-extend
47 lsl r24
48 sbc r24, r24
49 mov r25, r24
50 XJMP __fractsasf
51 ENDF __fractqqsf
52 #endif /* L_fractqqsf */
53
54 #if defined (L_fractuqqsf)
55 DEFUN __fractuqqsf
56 ;; Move in place for USA -> SF conversion
57 clr r22
58 mov r23, r24
59 ;; Zero-extend
60 clr r24
61 clr r25
62 XJMP __fractusasf
63 ENDF __fractuqqsf
64 #endif /* L_fractuqqsf */
65
66 #if defined (L_fracthqsf)
67 DEFUN __fracthqsf
68 ;; Move in place for SA -> SF conversion
69 wmov 22, 24
70 ;; Sign-extend
71 lsl r25
72 sbc r24, r24
73 mov r25, r24
74 XJMP __fractsasf
75 ENDF __fracthqsf
76 #endif /* L_fracthqsf */
77
78 #if defined (L_fractuhqsf)
79 DEFUN __fractuhqsf
80 ;; Move in place for USA -> SF conversion
81 wmov 22, 24
82 ;; Zero-extend
83 clr r24
84 clr r25
85 XJMP __fractusasf
86 ENDF __fractuhqsf
87 #endif /* L_fractuhqsf */
88
89 #if defined (L_fracthasf)
90 DEFUN __fracthasf
91 ;; Move in place for SA -> SF conversion
92 clr r22
93 mov r23, r24
94 mov r24, r25
95 ;; Sign-extend
96 lsl r25
97 sbc r25, r25
98 XJMP __fractsasf
99 ENDF __fracthasf
100 #endif /* L_fracthasf */
101
102 #if defined (L_fractuhasf)
103 DEFUN __fractuhasf
104 ;; Move in place for USA -> SF conversion
105 clr r22
106 mov r23, r24
107 mov r24, r25
108 ;; Zero-extend
109 clr r25
110 XJMP __fractusasf
111 ENDF __fractuhasf
112 #endif /* L_fractuhasf */
113
114
115 #if defined (L_fractsqsf)
116 DEFUN __fractsqsf
117 XCALL __floatsisf
118 ;; Divide non-zero results by 2^31 to move the
119 ;; decimal point into place
120 tst r25
121 breq 0f
122 subi r24, exp_lo (31)
123 sbci r25, exp_hi (31)
124 0: ret
125 ENDF __fractsqsf
126 #endif /* L_fractsqsf */
127
128 #if defined (L_fractusqsf)
129 DEFUN __fractusqsf
130 XCALL __floatunsisf
131 ;; Divide non-zero results by 2^32 to move the
132 ;; decimal point into place
133 cpse r25, __zero_reg__
134 subi r25, exp_hi (32)
135 ret
136 ENDF __fractusqsf
137 #endif /* L_fractusqsf */
138
139 #if defined (L_fractsasf)
140 DEFUN __fractsasf
141 XCALL __floatsisf
142 ;; Divide non-zero results by 2^15 to move the
143 ;; decimal point into place
144 tst r25
145 breq 0f
146 subi r24, exp_lo (15)
147 sbci r25, exp_hi (15)
148 0: ret
149 ENDF __fractsasf
150 #endif /* L_fractsasf */
151
152 #if defined (L_fractusasf)
153 DEFUN __fractusasf
154 XCALL __floatunsisf
155 ;; Divide non-zero results by 2^16 to move the
156 ;; decimal point into place
157 cpse r25, __zero_reg__
158 subi r25, exp_hi (16)
159 ret
160 ENDF __fractusasf
161 #endif /* L_fractusasf */
162
163 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
164 ;; Conversions from float
165 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166
167 #if defined (L_fractsfqq)
168 DEFUN __fractsfqq
169 ;; Multiply with 2^{24+7} to get a QQ result in r25
170 subi r24, exp_lo (-31)
171 sbci r25, exp_hi (-31)
172 XCALL __fixsfsi
173 mov r24, r25
174 ret
175 ENDF __fractsfqq
176 #endif /* L_fractsfqq */
177
178 #if defined (L_fractsfuqq)
179 DEFUN __fractsfuqq
180 ;; Multiply with 2^{24+8} to get a UQQ result in r25
181 subi r25, exp_hi (-32)
182 XCALL __fixunssfsi
183 mov r24, r25
184 ret
185 ENDF __fractsfuqq
186 #endif /* L_fractsfuqq */
187
188 #if defined (L_fractsfha)
189 DEFUN __fractsfha
190 ;; Multiply with 2^{16+7} to get a HA result in r25:r24
191 subi r24, exp_lo (-23)
192 sbci r25, exp_hi (-23)
193 XJMP __fixsfsi
194 ENDF __fractsfha
195 #endif /* L_fractsfha */
196
197 #if defined (L_fractsfuha)
198 DEFUN __fractsfuha
199 ;; Multiply with 2^24 to get a UHA result in r25:r24
200 subi r25, exp_hi (-24)
201 XJMP __fixunssfsi
202 ENDF __fractsfuha
203 #endif /* L_fractsfuha */
204
205 #if defined (L_fractsfhq)
206 FALIAS __fractsfsq
207
208 DEFUN __fractsfhq
209 ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
210 ;; resp. with 2^31 to get a SQ result in r25:r22
211 subi r24, exp_lo (-31)
212 sbci r25, exp_hi (-31)
213 XJMP __fixsfsi
214 ENDF __fractsfhq
215 #endif /* L_fractsfhq */
216
217 #if defined (L_fractsfuhq)
218 FALIAS __fractsfusq
219
220 DEFUN __fractsfuhq
221 ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
222 ;; resp. with 2^32 to get a USQ result in r25:r22
223 subi r25, exp_hi (-32)
224 XJMP __fixunssfsi
225 ENDF __fractsfuhq
226 #endif /* L_fractsfuhq */
227
228 #if defined (L_fractsfsa)
229 DEFUN __fractsfsa
230 ;; Multiply with 2^15 to get a SA result in r25:r22
231 subi r24, exp_lo (-15)
232 sbci r25, exp_hi (-15)
233 XJMP __fixsfsi
234 ENDF __fractsfsa
235 #endif /* L_fractsfsa */
236
237 #if defined (L_fractsfusa)
238 DEFUN __fractsfusa
239 ;; Multiply with 2^16 to get a USA result in r25:r22
240 subi r25, exp_hi (-16)
241 XJMP __fixunssfsi
242 ENDF __fractsfusa
243 #endif /* L_fractsfusa */
244
245
246 ;; For multiplication the functions here are called directly from
247 ;; avr-fixed.md instead of using the standard libcall mechanisms.
248 ;; This can make better code because GCC knows exactly which
249 ;; of the call-used registers (not all of them) are clobbered. */
250
251 /*******************************************************
252 Fractional Multiplication 8 x 8 without MUL
253 *******************************************************/
254
255 #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
256 ;;; R23 = R24 * R25
257 ;;; Clobbers: __tmp_reg__, R22, R24, R25
258 ;;; Rounding: ???
259 DEFUN __mulqq3
260 XCALL __fmuls
261 ;; TR 18037 requires that (-1) * (-1) does not overflow
262 ;; The only input that can produce -1 is (-1)^2.
263 dec r23
264 brvs 0f
265 inc r23
266 0: ret
267 ENDF __mulqq3
268 #endif /* L_mulqq3 && ! HAVE_MUL */
269
270 /*******************************************************
271 Fractional Multiply .16 x .16 with and without MUL
272 *******************************************************/
273
274 #if defined (L_mulhq3)
275 ;;; Same code with and without MUL, but the interfaces differ:
276 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
277 ;;; Clobbers: ABI, called by optabs
278 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
279 ;;; Clobbers: __tmp_reg__, R22, R23
280 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
281 DEFUN __mulhq3
282 XCALL __mulhisi3
283 ;; Shift result into place
284 lsl r23
285 rol r24
286 rol r25
287 brvs 1f
288 ;; Round
289 sbrc r23, 7
290 adiw r24, 1
291 ret
292 1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
293 ldi r24, lo8 (0x7fff)
294 ldi r25, hi8 (0x7fff)
295 ret
296 ENDF __mulhq3
297 #endif /* defined (L_mulhq3) */
298
299 #if defined (L_muluhq3)
300 ;;; Same code with and without MUL, but the interfaces differ:
301 ;;; no MUL: (R25:R24) *= (R23:R22)
302 ;;; Clobbers: ABI, called by optabs
303 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
304 ;;; Clobbers: __tmp_reg__, R22, R23
305 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
306 DEFUN __muluhq3
307 XCALL __umulhisi3
308 ;; Round
309 sbrc r23, 7
310 adiw r24, 1
311 ret
312 ENDF __muluhq3
313 #endif /* L_muluhq3 */
314
315
316 /*******************************************************
317 Fixed Multiply 8.8 x 8.8 with and without MUL
318 *******************************************************/
319
320 #if defined (L_mulha3)
321 ;;; Same code with and without MUL, but the interfaces differ:
322 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
323 ;;; Clobbers: ABI, called by optabs
324 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
325 ;;; Clobbers: __tmp_reg__, R22, R23
326 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
327 DEFUN __mulha3
328 XCALL __mulhisi3
329 lsl r22
330 rol r23
331 rol r24
332 XJMP __muluha3_round
333 ENDF __mulha3
334 #endif /* L_mulha3 */
335
336 #if defined (L_muluha3)
337 ;;; Same code with and without MUL, but the interfaces differ:
338 ;;; no MUL: (R25:R24) *= (R23:R22)
339 ;;; Clobbers: ABI, called by optabs
340 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
341 ;;; Clobbers: __tmp_reg__, R22, R23
342 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
343 DEFUN __muluha3
344 XCALL __umulhisi3
345 XJMP __muluha3_round
346 ENDF __muluha3
347 #endif /* L_muluha3 */
348
349 #if defined (L_muluha3_round)
350 DEFUN __muluha3_round
351 ;; Shift result into place
352 mov r25, r24
353 mov r24, r23
354 ;; Round
355 sbrc r22, 7
356 adiw r24, 1
357 ret
358 ENDF __muluha3_round
359 #endif /* L_muluha3_round */
360
361
362 /*******************************************************
363 Fixed Multiplication 16.16 x 16.16
364 *******************************************************/
365
366 ;; Bits outside the result (below LSB), used in the signed version
367 #define GUARD __tmp_reg__
368
369 #if defined (__AVR_HAVE_MUL__)
370
371 ;; Multiplier
372 #define A0 16
373 #define A1 A0+1
374 #define A2 A1+1
375 #define A3 A2+1
376
377 ;; Multiplicand
378 #define B0 20
379 #define B1 B0+1
380 #define B2 B1+1
381 #define B3 B2+1
382
383 ;; Result
384 #define C0 24
385 #define C1 C0+1
386 #define C2 C1+1
387 #define C3 C2+1
388
389 #if defined (L_mulusa3)
390 ;;; (C3:C0) = (A3:A0) * (B3:B0)
391 DEFUN __mulusa3
392 set
393 ;; Fallthru
394 ENDF __mulusa3
395
396 ;;; Round for last digit iff T = 1
397 ;;; Return guard bits in GUARD (__tmp_reg__).
398 ;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
399 ;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
400 DEFUN __mulusa3_round
401 ;; Some of the MUL instructions have LSBs outside the result.
402 ;; Don't ignore these LSBs in order to tame rounding error.
403 ;; Use C2/C3 for these LSBs.
404
405 clr C0
406 clr C1
407 mul A0, B0 $ movw C2, r0
408
409 mul A1, B0 $ add C3, r0 $ adc C0, r1
410 mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
411
412 ;; Round if T = 1. Store guarding bits outside the result for rounding
413 ;; and left-shift by the signed version (function below).
414 brtc 0f
415 sbrc C3, 7
416 adiw C0, 1
417 0: push C3
418
419 ;; The following MULs don't have LSBs outside the result.
420 ;; C2/C3 is the high part.
421
422 mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
423 mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
424 mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
425 neg C2
426
427 mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
428 mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
429 mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
430 mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
431 neg C3
432
433 mul A1, B3 $ add C2, r0 $ adc C3, r1
434 mul A2, B2 $ add C2, r0 $ adc C3, r1
435 mul A3, B1 $ add C2, r0 $ adc C3, r1
436
437 mul A2, B3 $ add C3, r0
438 mul A3, B2 $ add C3, r0
439
440 ;; Guard bits used in the signed version below.
441 pop GUARD
442 clr __zero_reg__
443 ret
444 ENDF __mulusa3_round
445 #endif /* L_mulusa3 */
446
447 #if defined (L_mulsa3)
448 ;;; (C3:C0) = (A3:A0) * (B3:B0)
449 ;;; Clobbers: __tmp_reg__, T
450 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
451 DEFUN __mulsa3
452 clt
453 XCALL __mulusa3_round
454 ;; A posteriori sign extension of the operands
455 tst B3
456 brpl 1f
457 sub C2, A0
458 sbc C3, A1
459 1: sbrs A3, 7
460 rjmp 2f
461 sub C2, B0
462 sbc C3, B1
463 2:
464 ;; Shift 1 bit left to adjust for 15 fractional bits
465 lsl GUARD
466 rol C0
467 rol C1
468 rol C2
469 rol C3
470 ;; Round last digit
471 lsl GUARD
472 adc C0, __zero_reg__
473 adc C1, __zero_reg__
474 adc C2, __zero_reg__
475 adc C3, __zero_reg__
476 ret
477 ENDF __mulsa3
478 #endif /* L_mulsa3 */
479
480 #undef A0
481 #undef A1
482 #undef A2
483 #undef A3
484 #undef B0
485 #undef B1
486 #undef B2
487 #undef B3
488 #undef C0
489 #undef C1
490 #undef C2
491 #undef C3
492
493 #else /* __AVR_HAVE_MUL__ */
494
495 #define A0 18
496 #define A1 A0+1
497 #define A2 A0+2
498 #define A3 A0+3
499
500 #define B0 22
501 #define B1 B0+1
502 #define B2 B0+2
503 #define B3 B0+3
504
505 #define C0 22
506 #define C1 C0+1
507 #define C2 C0+2
508 #define C3 C0+3
509
510 ;; __tmp_reg__
511 #define CC0 0
512 ;; __zero_reg__
513 #define CC1 1
514 #define CC2 16
515 #define CC3 17
516
517 #define AA0 26
518 #define AA1 AA0+1
519 #define AA2 30
520 #define AA3 AA2+1
521
522 #if defined (L_mulsa3)
523 ;;; (R25:R22) *= (R21:R18)
524 ;;; Clobbers: ABI, called by optabs
525 ;;; Rounding: -1 LSB <= error <= 1 LSB
526 DEFUN __mulsa3
527 push B0
528 push B1
529 push B3
530 clt
531 XCALL __mulusa3_round
532 pop r30
533 ;; sign-extend B
534 bst r30, 7
535 brtc 1f
536 ;; A1, A0 survived in R27:R26
537 sub C2, AA0
538 sbc C3, AA1
539 1:
540 pop AA1 ;; B1
541 pop AA0 ;; B0
542
543 ;; sign-extend A. A3 survived in R31
544 bst AA3, 7
545 brtc 2f
546 sub C2, AA0
547 sbc C3, AA1
548 2:
549 ;; Shift 1 bit left to adjust for 15 fractional bits
550 lsl GUARD
551 rol C0
552 rol C1
553 rol C2
554 rol C3
555 ;; Round last digit
556 lsl GUARD
557 adc C0, __zero_reg__
558 adc C1, __zero_reg__
559 adc C2, __zero_reg__
560 adc C3, __zero_reg__
561 ret
562 ENDF __mulsa3
563 #endif /* L_mulsa3 */
564
565 #if defined (L_mulusa3)
566 ;;; (R25:R22) *= (R21:R18)
567 ;;; Clobbers: ABI, called by optabs
568 ;;; Rounding: -1 LSB <= error <= 1 LSB
569 DEFUN __mulusa3
570 set
571 ;; Fallthru
572 ENDF __mulusa3
573
574 ;;; A[] survives in 26, 27, 30, 31
575 ;;; Also used by __mulsa3 with T = 0
576 ;;; Round if T = 1
577 ;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
578 DEFUN __mulusa3_round
579 push CC2
580 push CC3
581 ; clear result
582 clr __tmp_reg__
583 wmov CC2, CC0
584 ; save multiplicand
585 wmov AA0, A0
586 wmov AA2, A2
587 rjmp 3f
588
589 ;; Loop the integral part
590
591 1: ;; CC += A * 2^n; n >= 0
592 add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
593
594 2: ;; A <<= 1
595 lsl A0 $ rol A1 $ rol A2 $ rol A3
596
597 3: ;; IBIT(B) >>= 1
598 ;; Carry = n-th bit of B; n >= 0
599 lsr B3
600 ror B2
601 brcs 1b
602 sbci B3, 0
603 brne 2b
604
605 ;; Loop the fractional part
606 ;; B2/B3 is 0 now, use as guard bits for rounding
607 ;; Restore multiplicand
608 wmov A0, AA0
609 wmov A2, AA2
610 rjmp 5f
611
612 4: ;; CC += A:Guard * 2^n; n < 0
613 add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
614 5:
615 ;; A:Guard >>= 1
616 lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
617
618 ;; FBIT(B) <<= 1
619 ;; Carry = n-th bit of B; n < 0
620 lsl B0
621 rol B1
622 brcs 4b
623 sbci B0, 0
624 brne 5b
625
626 ;; Save guard bits and set carry for rounding
627 push B3
628 lsl B3
629 ;; Move result into place
630 wmov C2, CC2
631 wmov C0, CC0
632 clr __zero_reg__
633 brtc 6f
634 ;; Round iff T = 1
635 adc C0, __zero_reg__
636 adc C1, __zero_reg__
637 adc C2, __zero_reg__
638 adc C3, __zero_reg__
639 6:
640 pop GUARD
641 ;; Epilogue
642 pop CC3
643 pop CC2
644 ret
645 ENDF __mulusa3_round
646 #endif /* L_mulusa3 */
647
648 #undef A0
649 #undef A1
650 #undef A2
651 #undef A3
652 #undef B0
653 #undef B1
654 #undef B2
655 #undef B3
656 #undef C0
657 #undef C1
658 #undef C2
659 #undef C3
660 #undef AA0
661 #undef AA1
662 #undef AA2
663 #undef AA3
664 #undef CC0
665 #undef CC1
666 #undef CC2
667 #undef CC3
668
669 #endif /* __AVR_HAVE_MUL__ */
670
671 #undef GUARD
672
673 /*******************************************************
674 Fractional Division 8 / 8
675 *******************************************************/
676
677 #define r_divd r25 /* dividend */
678 #define r_quo r24 /* quotient */
679 #define r_div r22 /* divisor */
680 #define r_sign __tmp_reg__
681
682 #if defined (L_divqq3)
683 DEFUN __divqq3
684 mov r_sign, r_divd
685 eor r_sign, r_div
686 sbrc r_div, 7
687 neg r_div
688 sbrc r_divd, 7
689 neg r_divd
690 XCALL __divqq_helper
691 lsr r_quo
692 sbrc r_sign, 7 ; negate result if needed
693 neg r_quo
694 ret
695 ENDF __divqq3
696 #endif /* L_divqq3 */
697
698 #if defined (L_udivuqq3)
699 DEFUN __udivuqq3
700 cp r_divd, r_div
701 brsh 0f
702 XJMP __divqq_helper
703 ;; Result is out of [0, 1) ==> Return 1 - eps.
704 0: ldi r_quo, 0xff
705 ret
706 ENDF __udivuqq3
707 #endif /* L_udivuqq3 */
708
709
710 #if defined (L_divqq_helper)
711 DEFUN __divqq_helper
712 clr r_quo ; clear quotient
713 inc __zero_reg__ ; init loop counter, used per shift
714 __udivuqq3_loop:
715 lsl r_divd ; shift dividend
716 brcs 0f ; dividend overflow
717 cp r_divd,r_div ; compare dividend & divisor
718 brcc 0f ; dividend >= divisor
719 rol r_quo ; shift quotient (with CARRY)
720 rjmp __udivuqq3_cont
721 0:
722 sub r_divd,r_div ; restore dividend
723 lsl r_quo ; shift quotient (without CARRY)
724 __udivuqq3_cont:
725 lsl __zero_reg__ ; shift loop-counter bit
726 brne __udivuqq3_loop
727 com r_quo ; complement result
728 ; because C flag was complemented in loop
729 ret
730 ENDF __divqq_helper
731 #endif /* L_divqq_helper */
732
733 #undef r_divd
734 #undef r_quo
735 #undef r_div
736 #undef r_sign
737
738
739 /*******************************************************
740 Fractional Division 16 / 16
741 *******************************************************/
742 #define r_divdL 26 /* dividend Low */
743 #define r_divdH 27 /* dividend Hig */
744 #define r_quoL 24 /* quotient Low */
745 #define r_quoH 25 /* quotient High */
746 #define r_divL 22 /* divisor */
747 #define r_divH 23 /* divisor */
748 #define r_cnt 21
749
750 #if defined (L_divhq3)
751 DEFUN __divhq3
752 mov r0, r_divdH
753 eor r0, r_divH
754 sbrs r_divH, 7
755 rjmp 1f
756 NEG2 r_divL
757 1:
758 sbrs r_divdH, 7
759 rjmp 2f
760 NEG2 r_divdL
761 2:
762 cp r_divdL, r_divL
763 cpc r_divdH, r_divH
764 breq __divhq3_minus1 ; if equal return -1
765 XCALL __udivuhq3
766 lsr r_quoH
767 ror r_quoL
768 brpl 9f
769 ;; negate result if needed
770 NEG2 r_quoL
771 9:
772 ret
773 __divhq3_minus1:
774 ldi r_quoH, 0x80
775 clr r_quoL
776 ret
777 ENDF __divhq3
778 #endif /* defined (L_divhq3) */
779
780 #if defined (L_udivuhq3)
781 DEFUN __udivuhq3
782 sub r_quoH,r_quoH ; clear quotient and carry
783 ;; FALLTHRU
784 ENDF __udivuhq3
785
786 DEFUN __udivuha3_common
787 clr r_quoL ; clear quotient
788 ldi r_cnt,16 ; init loop counter
789 __udivuhq3_loop:
790 rol r_divdL ; shift dividend (with CARRY)
791 rol r_divdH
792 brcs __udivuhq3_ep ; dividend overflow
793 cp r_divdL,r_divL ; compare dividend & divisor
794 cpc r_divdH,r_divH
795 brcc __udivuhq3_ep ; dividend >= divisor
796 rol r_quoL ; shift quotient (with CARRY)
797 rjmp __udivuhq3_cont
798 __udivuhq3_ep:
799 sub r_divdL,r_divL ; restore dividend
800 sbc r_divdH,r_divH
801 lsl r_quoL ; shift quotient (without CARRY)
802 __udivuhq3_cont:
803 rol r_quoH ; shift quotient
804 dec r_cnt ; decrement loop counter
805 brne __udivuhq3_loop
806 com r_quoL ; complement result
807 com r_quoH ; because C flag was complemented in loop
808 ret
809 ENDF __udivuha3_common
810 #endif /* defined (L_udivuhq3) */
811
812 /*******************************************************
813 Fixed Division 8.8 / 8.8
814 *******************************************************/
815 #if defined (L_divha3)
816 DEFUN __divha3
817 mov r0, r_divdH
818 eor r0, r_divH
819 sbrs r_divH, 7
820 rjmp 1f
821 NEG2 r_divL
822 1:
823 sbrs r_divdH, 7
824 rjmp 2f
825 NEG2 r_divdL
826 2:
827 XCALL __udivuha3
828 lsr r_quoH ; adjust to 7 fractional bits
829 ror r_quoL
830 sbrs r0, 7 ; negate result if needed
831 ret
832 NEG2 r_quoL
833 ret
834 ENDF __divha3
835 #endif /* defined (L_divha3) */
836
837 #if defined (L_udivuha3)
838 DEFUN __udivuha3
839 mov r_quoH, r_divdL
840 mov r_divdL, r_divdH
841 clr r_divdH
842 lsl r_quoH ; shift quotient into carry
843 XJMP __udivuha3_common ; same as fractional after rearrange
844 ENDF __udivuha3
845 #endif /* defined (L_udivuha3) */
846
847 #undef r_divdL
848 #undef r_divdH
849 #undef r_quoL
850 #undef r_quoH
851 #undef r_divL
852 #undef r_divH
853 #undef r_cnt
854
855 /*******************************************************
856 Fixed Division 16.16 / 16.16
857 *******************************************************/
858
859 #define r_arg1L 24 /* arg1 gets passed already in place */
860 #define r_arg1H 25
861 #define r_arg1HL 26
862 #define r_arg1HH 27
863 #define r_divdL 26 /* dividend Low */
864 #define r_divdH 27
865 #define r_divdHL 30
866 #define r_divdHH 31 /* dividend High */
867 #define r_quoL 22 /* quotient Low */
868 #define r_quoH 23
869 #define r_quoHL 24
870 #define r_quoHH 25 /* quotient High */
871 #define r_divL 18 /* divisor Low */
872 #define r_divH 19
873 #define r_divHL 20
874 #define r_divHH 21 /* divisor High */
875 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
876
877 #if defined (L_divsa3)
878 DEFUN __divsa3
879 mov r0, r_arg1HH
880 eor r0, r_divHH
881 sbrs r_divHH, 7
882 rjmp 1f
883 NEG4 r_divL
884 1:
885 sbrs r_arg1HH, 7
886 rjmp 2f
887 NEG4 r_arg1L
888 2:
889 XCALL __udivusa3
890 lsr r_quoHH ; adjust to 15 fractional bits
891 ror r_quoHL
892 ror r_quoH
893 ror r_quoL
894 sbrs r0, 7 ; negate result if needed
895 ret
896 ;; negate r_quoL
897 XJMP __negsi2
898 ENDF __divsa3
899 #endif /* defined (L_divsa3) */
900
901 #if defined (L_udivusa3)
902 DEFUN __udivusa3
903 ldi r_divdHL, 32 ; init loop counter
904 mov r_cnt, r_divdHL
905 clr r_divdHL
906 clr r_divdHH
907 wmov r_quoL, r_divdHL
908 lsl r_quoHL ; shift quotient into carry
909 rol r_quoHH
910 __udivusa3_loop:
911 rol r_divdL ; shift dividend (with CARRY)
912 rol r_divdH
913 rol r_divdHL
914 rol r_divdHH
915 brcs __udivusa3_ep ; dividend overflow
916 cp r_divdL,r_divL ; compare dividend & divisor
917 cpc r_divdH,r_divH
918 cpc r_divdHL,r_divHL
919 cpc r_divdHH,r_divHH
920 brcc __udivusa3_ep ; dividend >= divisor
921 rol r_quoL ; shift quotient (with CARRY)
922 rjmp __udivusa3_cont
923 __udivusa3_ep:
924 sub r_divdL,r_divL ; restore dividend
925 sbc r_divdH,r_divH
926 sbc r_divdHL,r_divHL
927 sbc r_divdHH,r_divHH
928 lsl r_quoL ; shift quotient (without CARRY)
929 __udivusa3_cont:
930 rol r_quoH ; shift quotient
931 rol r_quoHL
932 rol r_quoHH
933 dec r_cnt ; decrement loop counter
934 brne __udivusa3_loop
935 com r_quoL ; complement result
936 com r_quoH ; because C flag was complemented in loop
937 com r_quoHL
938 com r_quoHH
939 ret
940 ENDF __udivusa3
941 #endif /* defined (L_udivusa3) */
942
943 #undef r_arg1L
944 #undef r_arg1H
945 #undef r_arg1HL
946 #undef r_arg1HH
947 #undef r_divdL
948 #undef r_divdH
949 #undef r_divdHL
950 #undef r_divdHH
951 #undef r_quoL
952 #undef r_quoH
953 #undef r_quoHL
954 #undef r_quoHH
955 #undef r_divL
956 #undef r_divH
957 #undef r_divHL
958 #undef r_divHH
959 #undef r_cnt
960
961 \f
962 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
963 ;; Saturation, 2 Bytes
964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
965
966 ;; First Argument and Return Register
967 #define A0 24
968 #define A1 A0+1
969
970 #if defined (L_ssneg_2)
971 DEFUN __ssneg_2
972 NEG2 A0
973 brvc 0f
974 sbiw A0, 1
975 0: ret
976 ENDF __ssneg_2
977 #endif /* L_ssneg_2 */
978
979 #if defined (L_ssabs_2)
980 DEFUN __ssabs_2
981 sbrs A1, 7
982 ret
983 XJMP __ssneg_2
984 ENDF __ssabs_2
985 #endif /* L_ssabs_2 */
986
987 #undef A0
988 #undef A1
989
990
991 \f
992 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
993 ;; Saturation, 4 Bytes
994 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
995
996 ;; First Argument and Return Register
997 #define A0 22
998 #define A1 A0+1
999 #define A2 A0+2
1000 #define A3 A0+3
1001
1002 #if defined (L_ssneg_4)
1003 DEFUN __ssneg_4
1004 XCALL __negsi2
1005 brvc 0f
1006 ldi A3, 0x7f
1007 ldi A2, 0xff
1008 ldi A1, 0xff
1009 ldi A0, 0xff
1010 0: ret
1011 ENDF __ssneg_4
1012 #endif /* L_ssneg_4 */
1013
1014 #if defined (L_ssabs_4)
1015 DEFUN __ssabs_4
1016 sbrs A3, 7
1017 ret
1018 XJMP __ssneg_4
1019 ENDF __ssabs_4
1020 #endif /* L_ssabs_4 */
1021
1022 #undef A0
1023 #undef A1
1024 #undef A2
1025 #undef A3
1026
1027
1028 \f
1029 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1030 ;; Saturation, 8 Bytes
1031 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1032
1033 ;; First Argument and Return Register
1034 #define A0 18
1035 #define A1 A0+1
1036 #define A2 A0+2
1037 #define A3 A0+3
1038 #define A4 A0+4
1039 #define A5 A0+5
1040 #define A6 A0+6
1041 #define A7 A0+7
1042
1043 #if defined (L_clr_8)
1044 FALIAS __usneguta2
1045 FALIAS __usneguda2
1046 FALIAS __usnegudq2
1047
1048 ;; Clear Carry and all Bytes
1049 DEFUN __clr_8
1050 ;; Clear Carry and set Z
1051 sub A7, A7
1052 ;; FALLTHRU
1053 ENDF __clr_8
1054 ;; Propagate Carry to all Bytes, Carry unaltered
1055 DEFUN __sbc_8
1056 sbc A7, A7
1057 sbc A6, A6
1058 wmov A4, A6
1059 wmov A2, A6
1060 wmov A0, A6
1061 ret
1062 ENDF __sbc_8
1063 #endif /* L_clr_8 */
1064
1065 #if defined (L_ssneg_8)
1066 FALIAS __ssnegta2
1067 FALIAS __ssnegda2
1068 FALIAS __ssnegdq2
1069
1070 DEFUN __ssneg_8
1071 XCALL __negdi2
1072 brvc 0f
1073 ;; A[] = 0x7fffffff
1074 sec
1075 XCALL __sbc_8
1076 ldi A7, 0x7f
1077 0: ret
1078 ENDF __ssneg_8
1079 #endif /* L_ssneg_8 */
1080
1081 #if defined (L_ssabs_8)
1082 FALIAS __ssabsta2
1083 FALIAS __ssabsda2
1084 FALIAS __ssabsdq2
1085
1086 DEFUN __ssabs_8
1087 sbrs A7, 7
1088 ret
1089 XJMP __ssneg_8
1090 ENDF __ssabs_8
1091 #endif /* L_ssabs_8 */
1092
1093 ;; Second Argument
1094 #define B0 10
1095 #define B1 B0+1
1096 #define B2 B0+2
1097 #define B3 B0+3
1098 #define B4 B0+4
1099 #define B5 B0+5
1100 #define B6 B0+6
1101 #define B7 B0+7
1102
1103 #if defined (L_usadd_8)
1104 FALIAS __usadduta3
1105 FALIAS __usadduda3
1106 FALIAS __usaddudq3
1107
1108 DEFUN __usadd_8
1109 XCALL __adddi3
1110 brcs 0f
1111 ret
1112 0: ;; A[] = 0xffffffff
1113 XJMP __sbc_8
1114 ENDF __usadd_8
1115 #endif /* L_usadd_8 */
1116
1117 #if defined (L_ussub_8)
1118 FALIAS __ussubuta3
1119 FALIAS __ussubuda3
1120 FALIAS __ussubudq3
1121
1122 DEFUN __ussub_8
1123 XCALL __subdi3
1124 brcs 0f
1125 ret
1126 0: ;; A[] = 0
1127 XJMP __clr_8
1128 ENDF __ussub_8
1129 #endif /* L_ussub_8 */
1130
1131 #if defined (L_ssadd_8)
1132 FALIAS __ssaddta3
1133 FALIAS __ssaddda3
1134 FALIAS __ssadddq3
1135
1136 DEFUN __ssadd_8
1137 XCALL __adddi3
1138 brvc 0f
1139 ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
1140 cpi B7, 0x80
1141 XCALL __sbc_8
1142 subi A7, 0x80
1143 0: ret
1144 ENDF __ssadd_8
1145 #endif /* L_ssadd_8 */
1146
1147 #if defined (L_sssub_8)
1148 FALIAS __sssubta3
1149 FALIAS __sssubda3
1150 FALIAS __sssubdq3
1151
1152 DEFUN __sssub_8
1153 XCALL __subdi3
1154 brvc 0f
1155 ;; A = (B < 0) ? INT64_MAX : INT64_MIN
1156 ldi A7, 0x7f
1157 cp A7, B7
1158 XCALL __sbc_8
1159 subi A7, 0x80
1160 0: ret
1161 ENDF __sssub_8
1162 #endif /* L_sssub_8 */
1163
1164 #undef A0
1165 #undef A1
1166 #undef A2
1167 #undef A3
1168 #undef A4
1169 #undef A5
1170 #undef A6
1171 #undef A7
1172 #undef B0
1173 #undef B1
1174 #undef B2
1175 #undef B3
1176 #undef B4
1177 #undef B5
1178 #undef B6
1179 #undef B7