4 // UNR recurrence (q = a / b):
5 // look for z such that 2^32 - b <= b * z < 2^32
6 // then q - 1 <= (a * z) / 2^32 <= q
8 // INPUT: $r0: dividend, $r1: divisor
9 // OUTPUT: $r0: result, $r1: modulus
10 // CLOBBER: $r2 - $r3, $p0 - $p1
11 // SIZE: 22 / 14 * 8 bytes
13 sched 0x28282804280428
17 shl b32 $r2 $r3 clamp $r2
18 cvt u32 $r1 neg u32 $r1
19 mul $r3 u32 $r1 u32 $r2
20 add $r2 (mul high u32 $r2 u32 $r3) $r2
21 sched 0x28282828282828
22 mul $r3 u32 $r1 u32 $r2
23 add $r2 (mul high u32 $r2 u32 $r3) $r2
24 mul $r3 u32 $r1 u32 $r2
25 add $r2 (mul high u32 $r2 u32 $r3) $r2
26 mul $r3 u32 $r1 u32 $r2
27 add $r2 (mul high u32 $r2 u32 $r3) $r2
28 mul $r3 u32 $r1 u32 $r2
29 sched 0x042c2828042804
30 add $r2 (mul high u32 $r2 u32 $r3) $r2
32 mul high $r0 u32 $r0 u32 $r2
33 cvt u32 $r2 neg u32 $r1
34 add $r1 (mul u32 $r1 u32 $r0) $r3
35 set $p0 0x1 ge u32 $r1 $r2
36 $p0 sub b32 $r1 $r1 $r2
37 sched 0x20282e20042c28
38 $p0 add b32 $r0 $r0 0x1
39 $p0 set $p0 0x1 ge u32 $r1 $r2
40 $p0 sub b32 $r1 $r1 $r2
41 $p0 add b32 $r0 $r0 0x1
44 // DIV S32, like DIV U32 after taking ABS(inputs)
46 // INPUT: $r0: dividend, $r1: divisor
47 // OUTPUT: $r0: result, $r1: modulus
48 // CLOBBER: $r2 - $r3, $p0 - $p3
50 set $p2 0x1 lt s32 $r0 0x0
51 set $p3 0x1 lt s32 $r1 0x0 xor $p2
52 sched 0x28042804282820
53 cvt s32 $r0 abs s32 $r0
54 cvt s32 $r1 abs s32 $r1
58 shl b32 $r2 $r3 clamp $r2
59 cvt u32 $r1 neg u32 $r1
60 sched 0x28282828282828
61 mul $r3 u32 $r1 u32 $r2
62 add $r2 (mul high u32 $r2 u32 $r3) $r2
63 mul $r3 u32 $r1 u32 $r2
64 add $r2 (mul high u32 $r2 u32 $r3) $r2
65 mul $r3 u32 $r1 u32 $r2
66 add $r2 (mul high u32 $r2 u32 $r3) $r2
67 mul $r3 u32 $r1 u32 $r2
68 sched 0x28280428042828
69 add $r2 (mul high u32 $r2 u32 $r3) $r2
70 mul $r3 u32 $r1 u32 $r2
71 add $r2 (mul high u32 $r2 u32 $r3) $r2
73 mul high $r0 u32 $r0 u32 $r2
74 cvt u32 $r2 neg u32 $r1
75 add $r1 (mul u32 $r1 u32 $r0) $r3
76 sched 0x2028042c28042c
77 set $p0 0x1 ge u32 $r1 $r2
78 $p0 sub b32 $r1 $r1 $r2
79 $p0 add b32 $r0 $r0 0x1
80 $p0 set $p0 0x1 ge u32 $r1 $r2
81 $p0 sub b32 $r1 $r1 $r2
82 $p0 add b32 $r0 $r0 0x1
83 $p3 cvt s32 $r0 neg s32 $r0
84 sched 0x2c200428042e04
85 $p2 cvt s32 $r1 neg s32 $r1