a0c5497524a64f423c2243d32abd377086719414
[mesa.git] / src / gallium / drivers / nouveau / codegen / target_lib_nvf0.asm
1 //
2 // DIV U32
3 //
4 // UNR recurrence (q = a / b):
5 // look for z such that 2^32 - b <= b * z < 2^32
6 // then q - 1 <= (a * z) / 2^32 <= q
7 //
8 // INPUT: $r0: dividend, $r1: divisor
9 // OUTPUT: $r0: result, $r1: modulus
10 // CLOBBER: $r2 - $r3, $p0 - $p1
11 // SIZE: 22 / 14 * 8 bytes
12 //
13 sched 0x28282804280428
14 bfind u32 $r2 $r1
15 xor b32 $r2 $r2 0x1f
16 mov b32 $r3 0x1
17 shl b32 $r2 $r3 clamp $r2
18 cvt u32 $r1 neg u32 $r1
19 mul $r3 u32 $r1 u32 $r2
20 add $r2 (mul high u32 $r2 u32 $r3) $r2
21 sched 0x28282828282828
22 mul $r3 u32 $r1 u32 $r2
23 add $r2 (mul high u32 $r2 u32 $r3) $r2
24 mul $r3 u32 $r1 u32 $r2
25 add $r2 (mul high u32 $r2 u32 $r3) $r2
26 mul $r3 u32 $r1 u32 $r2
27 add $r2 (mul high u32 $r2 u32 $r3) $r2
28 mul $r3 u32 $r1 u32 $r2
29 sched 0x042c2828042804
30 add $r2 (mul high u32 $r2 u32 $r3) $r2
31 mov b32 $r3 $r0
32 mul high $r0 u32 $r0 u32 $r2
33 cvt u32 $r2 neg u32 $r1
34 add $r1 (mul u32 $r1 u32 $r0) $r3
35 set $p0 0x1 ge u32 $r1 $r2
36 $p0 sub b32 $r1 $r1 $r2
37 sched 0x20282e20042c28
38 $p0 add b32 $r0 $r0 0x1
39 $p0 set $p0 0x1 ge u32 $r1 $r2
40 $p0 sub b32 $r1 $r1 $r2
41 $p0 add b32 $r0 $r0 0x1
42 ret
43 //
44 // DIV S32, like DIV U32 after taking ABS(inputs)
45 //
46 // INPUT: $r0: dividend, $r1: divisor
47 // OUTPUT: $r0: result, $r1: modulus
48 // CLOBBER: $r2 - $r3, $p0 - $p3
49 //
50 set $p2 0x1 lt s32 $r0 0x0
51 set $p3 0x1 lt s32 $r1 0x0 xor $p2
52 sched 0x28042804282820
53 cvt s32 $r0 abs s32 $r0
54 cvt s32 $r1 abs s32 $r1
55 bfind u32 $r2 $r1
56 xor b32 $r2 $r2 0x1f
57 mov b32 $r3 0x1
58 shl b32 $r2 $r3 clamp $r2
59 cvt u32 $r1 neg u32 $r1
60 sched 0x28282828282828
61 mul $r3 u32 $r1 u32 $r2
62 add $r2 (mul high u32 $r2 u32 $r3) $r2
63 mul $r3 u32 $r1 u32 $r2
64 add $r2 (mul high u32 $r2 u32 $r3) $r2
65 mul $r3 u32 $r1 u32 $r2
66 add $r2 (mul high u32 $r2 u32 $r3) $r2
67 mul $r3 u32 $r1 u32 $r2
68 sched 0x28280428042828
69 add $r2 (mul high u32 $r2 u32 $r3) $r2
70 mul $r3 u32 $r1 u32 $r2
71 add $r2 (mul high u32 $r2 u32 $r3) $r2
72 mov b32 $r3 $r0
73 mul high $r0 u32 $r0 u32 $r2
74 cvt u32 $r2 neg u32 $r1
75 add $r1 (mul u32 $r1 u32 $r0) $r3
76 sched 0x2028042c28042c
77 set $p0 0x1 ge u32 $r1 $r2
78 $p0 sub b32 $r1 $r1 $r2
79 $p0 add b32 $r0 $r0 0x1
80 $p0 set $p0 0x1 ge u32 $r1 $r2
81 $p0 sub b32 $r1 $r1 $r2
82 $p0 add b32 $r0 $r0 0x1
83 $p3 cvt s32 $r0 neg s32 $r0
84 sched 0x2c200428042e04
85 $p2 cvt s32 $r1 neg s32 $r1
86 ret