2ed8c0af8713c6b6a925d9e09c29c3a203476640
[mesa.git] / src / gallium / drivers / nvc0 / codegen / target_lib_nve4.asm
1 //
2 // DIV U32
3 //
4 // UNR recurrence (q = a / b):
5 // look for z such that 2^32 - b <= b * z < 2^32
6 // then q - 1 <= (a * z) / 2^32 <= q
7 //
8 // INPUT: $r0: dividend, $r1: divisor
9 // OUTPUT: $r0: result, $r1: modulus
10 // CLOBBER: $r2 - $r3, $p0 - $p1
11 // SIZE: 22 / 14 * 8 bytes
12 //
13 sched 0x28 0x4 0x28 0x4 0x28 0x28 0x28
14 bfind u32 $r2 $r1
15 long xor b32 $r2 $r2 0x1f
16 long mov b32 $r3 0x1
17 shl b32 $r2 $r3 clamp $r2
18 long cvt u32 $r1 neg u32 $r1
19 long mul $r3 u32 $r1 u32 $r2
20 add $r2 (mul high u32 $r2 u32 $r3) $r2
21 sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
22 mul $r3 u32 $r1 u32 $r2
23 add $r2 (mul high u32 $r2 u32 $r3) $r2
24 mul $r3 u32 $r1 u32 $r2
25 add $r2 (mul high u32 $r2 u32 $r3) $r2
26 mul $r3 u32 $r1 u32 $r2
27 add $r2 (mul high u32 $r2 u32 $r3) $r2
28 mul $r3 u32 $r1 u32 $r2
29 sched 0x4 0x28 0x4 0x28 0x28 0x2c 0x4
30 add $r2 (mul high u32 $r2 u32 $r3) $r2
31 mov b32 $r3 $r0
32 mul high $r0 u32 $r0 u32 $r2
33 long cvt u32 $r2 neg u32 $r1
34 long add $r1 (mul u32 $r1 u32 $r0) $r3
35 set $p0 0x1 ge u32 $r1 $r2
36 $p0 sub b32 $r1 $r1 $r2
37 sched 0x28 0x2c 0x4 0x20 0x2e 0x28 0x20
38 $p0 add b32 $r0 $r0 0x1
39 $p0 set $p0 0x1 ge u32 $r1 $r2
40 $p0 sub b32 $r1 $r1 $r2
41 $p0 add b32 $r0 $r0 0x1
42 long ret
43 //
44 // DIV S32, like DIV U32 after taking ABS(inputs)
45 //
46 // INPUT: $r0: dividend, $r1: divisor
47 // OUTPUT: $r0: result, $r1: modulus
48 // CLOBBER: $r2 - $r3, $p0 - $p3
49 //
50 set $p2 0x1 lt s32 $r0 0x0
51 set $p3 0x1 lt s32 $r1 0x0 xor $p2
52 sched 0x20 0x28 0x28 0x4 0x28 0x04 0x28
53 long cvt s32 $r0 abs s32 $r0
54 long cvt s32 $r1 abs s32 $r1
55 bfind u32 $r2 $r1
56 long xor b32 $r2 $r2 0x1f
57 long mov b32 $r3 0x1
58 shl b32 $r2 $r3 clamp $r2
59 cvt u32 $r1 neg u32 $r1
60 sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
61 mul $r3 u32 $r1 u32 $r2
62 add $r2 (mul high u32 $r2 u32 $r3) $r2
63 mul $r3 u32 $r1 u32 $r2
64 add $r2 (mul high u32 $r2 u32 $r3) $r2
65 mul $r3 u32 $r1 u32 $r2
66 add $r2 (mul high u32 $r2 u32 $r3) $r2
67 mul $r3 u32 $r1 u32 $r2
68 sched 0x28 0x28 0x4 0x28 0x04 0x28 0x28
69 add $r2 (mul high u32 $r2 u32 $r3) $r2
70 mul $r3 u32 $r1 u32 $r2
71 add $r2 (mul high u32 $r2 u32 $r3) $r2
72 mov b32 $r3 $r0
73 mul high $r0 u32 $r0 u32 $r2
74 long cvt u32 $r2 neg u32 $r1
75 long add $r1 (mul u32 $r1 u32 $r0) $r3
76 sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20
77 set $p0 0x1 ge u32 $r1 $r2
78 $p0 sub b32 $r1 $r1 $r2
79 $p0 add b32 $r0 $r0 0x1
80 $p0 set $p0 0x1 ge u32 $r1 $r2
81 $p0 sub b32 $r1 $r1 $r2
82 long $p0 add b32 $r0 $r0 0x1
83 long $p3 cvt s32 $r0 neg s32 $r0
84 sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c
85 $p2 cvt s32 $r1 neg s32 $r1
86 long ret
87 //
88 // SULDP [for each format]
89 // $r4d: address
90 // $r2: surface info (format)
91 // $p0: access predicate
92 // $p1, $p2: caching predicate (00: cv, 01: ca, 10: cg)
93 //
94 // RGBA32
95 $p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0
96 set $p1 0x1 $p1 xor not $p2
97 $p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0
98 $p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0
99 long ret
100 // RGBA16_UNORM
101 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
102 $p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0
103 set $p1 0x1 $p1 xor not $p2
104 $p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0
105 $p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0
106 cvt rn f32 $r3 u16 1 $r1
107 cvt rn f32 $r2 u16 0 $r1
108 mul f32 $r3 $r3 0x37800074
109 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
110 cvt rn f32 $r1 u16 1 $r0
111 mul f32 $r2 $r2 0x37800074
112 cvt rn f32 $r0 u16 0 $r0
113 mul f32 $r1 $r1 0x37800074
114 mul f32 $r0 $r0 0x37800074
115 long ret
116 // RGBA16_SNORM
117 $p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
118 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
119 set $p1 0x1 $p1 xor not $p2
120 $p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
121 $p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
122 cvt rn f32 $r3 s16 1 $r1
123 cvt rn f32 $r2 s16 0 $r1
124 mul f32 $r3 $r3 0x38000187
125 cvt rn f32 $r1 s16 1 $r0
126 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
127 mul f32 $r2 $r2 0x38000187
128 cvt rn f32 $r0 s16 0 $r0
129 mul f32 $r1 $r1 0x38000187
130 mul f32 $r0 $r0 0x38000187
131 long ret
132 // RGBA16_SINT
133 $p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
134 set $p1 0x1 $p1 xor not $p2
135 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
136 $p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
137 $p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
138 cvt s32 $r3 s16 1 $r1
139 cvt s32 $r2 s16 0 $r1
140 cvt s32 $r1 s16 1 $r0
141 cvt s32 $r0 s16 0 $r0
142 long ret
143 // RGBA16_UINT
144 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
145 $p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
146 set $p1 0x1 $p1 xor not $p2
147 $p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
148 $p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
149 cvt u32 $r3 u16 1 $r1
150 cvt u32 $r2 u16 0 $r1
151 cvt u32 $r1 u16 1 $r0
152 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
153 cvt u32 $r0 u16 0 $r0
154 long ret
155 // RGBA16_FLOAT
156 $p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
157 set $p1 0x1 $p1 xor not $p2
158 $p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
159 $p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
160 cvt f32 $r3 f16 $r1 1
161 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
162 cvt f32 $r2 f16 $r1 0
163 cvt f32 $r1 f16 $r0 1
164 cvt f32 $r0 f16 $r0 0
165 long ret
166 // RG32_FLOAT
167 $p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
168 set $p1 0x1 $p1 xor not $p2
169 $p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
170 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
171 $p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
172 long mov b32 $r2 0x00000000
173 long mov b32 $r3 0x3f800000
174 long ret
175 // RG32_xINT
176 $p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
177 set $p1 0x1 $p1 xor not $p2
178 $p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
179 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
180 $p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
181 long mov b32 $r2 0x00000000
182 long mov b32 $r3 0x00000001
183 long ret
184 // RGB10A2_UNORM
185 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
186 set $p1 0x1 $p1 xor not $p2
187 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
188 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
189 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
190 ext u32 $r1 $r0 0x0a0a
191 long mov b32 $r3 0x3f800000
192 ext u32 $r2 $r0 0x0a14
193 long and b32 $r0 $r0 0x3ff
194 cvt rn f32 $r2 u16 0 $r2
195 cvt rn f32 $r1 u16 0 $r1
196 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
197 mul f32 $r2 $r2 0x3a802007
198 cvt rn f32 $r0 u16 0 $r0
199 mul f32 $r1 $r1 0x3a802007
200 mul f32 $r0 $r0 0x3a802007
201 long ret
202 // RGB10A2_UINT
203 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
204 set $p1 0x1 $p1 xor not $p2
205 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
206 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
207 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
208 ext u32 $r1 $r0 0x0a0a
209 long mov b32 $r3 0x00000001
210 ext u32 $r2 $r0 0x0a14
211 long and b32 $r0 $r0 0x3ff
212 long ret
213 // RGBA8_UNORM
214 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
215 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
216 set $p1 0x1 $p1 xor not $p2
217 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
218 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
219 cvt rn f32 $r3 u8 3 $r0
220 cvt rn f32 $r2 u8 2 $r0
221 mul f32 $r3 $r3 0x3b808081
222 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
223 cvt rn f32 $r1 u8 1 $r0
224 mul f32 $r2 $r2 0x3b808081
225 cvt rn f32 $r0 u8 0 $r0
226 mul f32 $r1 $r1 0x3b808081
227 mul f32 $r0 $r0 0x3b808081
228 long ret
229 // RGBA8_SNORM
230 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
231 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
232 set $p1 0x1 $p1 xor not $p2
233 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
234 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
235 cvt rn f32 $r3 s8 3 $r0
236 cvt rn f32 $r2 s8 2 $r0
237 mul f32 $r3 $r3 0x3c010204
238 cvt rn f32 $r1 s8 1 $r0
239 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
240 mul f32 $r2 $r2 0x3c010204
241 cvt rn f32 $r0 s8 0 $r0
242 mul f32 $r1 $r1 0x3c010204
243 mul f32 $r0 $r0 0x3c010204
244 long ret
245 // RGBA8_SINT
246 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
247 set $p1 0x1 $p1 xor not $p2
248 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
249 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
250 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
251 cvt s32 $r3 s8 3 $r0
252 cvt s32 $r2 s8 2 $r0
253 cvt s32 $r1 s8 1 $r0
254 cvt s32 $r0 s8 0 $r0
255 long ret
256 // RGBA8_UINT
257 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
258 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
259 set $p1 0x1 $p1 xor not $p2
260 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
261 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
262 cvt u32 $r3 u8 3 $r0
263 cvt u32 $r2 u8 2 $r0
264 cvt u32 $r1 u8 1 $r0
265 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
266 cvt u32 $r0 u8 0 $r0
267 long ret
268 // R5G6B5_UNORM
269 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
270 set $p1 0x1 $p1 xor not $p2
271 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
272 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
273 ext u32 $r1 $r0 0x0605
274 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
275 long mov b32 $r3 0x3f800000
276 ext u32 $r2 $r0 0x050b
277 long and b32 $r0 $r0 0x1f
278 cvt rn f32 $r2 u8 0 $r2
279 cvt rn f32 $r1 u8 0 $r1
280 mul f32 $r2 $r2 0x3d042108
281 cvt rn f32 $r0 u8 0 $r0
282 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
283 mul f32 $r1 $r1 0x3c820821
284 mul f32 $r0 $r0 0x3d042108
285 long ret
286 // R5G5B5X1_UNORM
287 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
288 set $p1 0x1 $p1 xor not $p2
289 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
290 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
291 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
292 ext u32 $r1 $r0 0x0505
293 ext u32 $r2 $r0 0x050a
294 long and b32 $r0 $r0 0x1f
295 long mov b32 $r3 0x3f800000
296 cvt rn f32 $r2 u8 0 $r2
297 cvt rn f32 $r1 u8 0 $r1
298 cvt rn f32 $r0 u8 0 $r0
299 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
300 mul f32 $r2 $r2 0x3d042108
301 mul f32 $r1 $r1 0x3d042108
302 mul f32 $r0 $r0 0x3d042108
303 long ret
304 // RG16_UNORM
305 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
306 set $p1 0x1 $p1 xor not $p2
307 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
308 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
309 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
310 cvt rn f32 $r1 u16 1 $r0
311 cvt rn f32 $r0 u16 0 $r0
312 mul f32 $r1 $r1 0x37800074
313 mul f32 $r0 $r0 0x37800074
314 long mov b32 $r2 0x00000000
315 long mov b32 $r3 0x3f800000
316 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
317 long ret
318 // RG16_SNORM
319 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
320 set $p1 0x1 $p1 xor not $p2
321 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
322 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
323 mov b32 $r3 0x3f800000
324 cvt rn f32 $r1 s16 1 $r0
325 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
326 mov b32 $r2 0x00000000
327 cvt rn f32 $r0 s16 0 $r0
328 mul f32 $r1 $r1 0x38000187
329 mul f32 $r0 $r0 0x38000187
330 long ret
331 // RG16_SINT
332 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
333 set $p1 0x1 $p1 xor not $p2
334 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
335 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
336 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
337 mov b32 $r3 0x00000001
338 cvt s32 $r1 s16 1 $r0
339 mov b32 $r2 0x00000000
340 cvt s32 $r0 s16 0 $r0
341 long ret
342 // RG16_UINT
343 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
344 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
345 set $p1 0x1 $p1 xor not $p2
346 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
347 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
348 mov b32 $r3 0x00000001
349 cvt u32 $r1 u16 1 $r0
350 mov b32 $r2 0x00000000
351 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
352 cvt u32 $r0 u16 0 $r0
353 long ret
354 // RG16_FLOAT
355 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
356 set $p1 0x1 $p1 xor not $p2
357 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
358 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
359 mov b32 $r3 0x3f800000
360 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
361 cvt f32 $r1 f16 $r0 1
362 mov b32 $r2 0x00000000
363 cvt f32 $r0 f16 $r0 0
364 long ret
365 // R32_FLOAT
366 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
367 set $p1 0x1 $p1 xor not $p2
368 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
369 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
370 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
371 long mov b32 $r3 0x3f800000
372 long mov b32 $r2 0x00000000
373 long mov b32 $r1 0x00000000
374 long ret
375 // R32_xINT
376 $p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
377 set $p1 0x1 $p1 xor not $p2
378 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
379 $p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
380 $p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
381 long mov b32 $r3 0x00000001
382 long mov b32 $r2 0x00000000
383 long mov b32 $r1 0x00000000
384 long ret
385 // RG8_UNORM
386 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
387 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
388 set $p1 0x1 $p1 xor not $p2
389 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
390 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
391 mov b32 $r3 0x3f800000
392 cvt rn f32 $r1 u8 1 $r0
393 mov b32 $r2 0x00000000
394 cvt rn f32 $r0 u8 0 $r0
395 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
396 mul f32 $r1 $r1 0x3b808081
397 mul f32 $r0 $r0 0x3b808081
398 long ret
399 // RG8_SNORM
400 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
401 set $p1 0x1 $p1 xor not $p2
402 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
403 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
404 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
405 long mov b32 $r3 0x3f800000
406 cvt rn f32 $r1 s8 1 $r0
407 long mov b32 $r2 0x00000000
408 cvt rn f32 $r0 s8 0 $r0
409 mul f32 $r1 $r1 0x3c010204
410 mul f32 $r0 $r0 0x3c010204
411 long ret
412 // RG8_UINT
413 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
414 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
415 set $p1 0x1 $p1 xor not $p2
416 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
417 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
418 long mov b32 $r3 0x00000001
419 cvt u32 $r1 u8 1 $r0
420 long mov b32 $r2 0x00000000
421 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
422 cvt u32 $r0 u8 0 $r0
423 long ret
424 // RG8_SINT
425 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
426 set $p1 0x1 $p1 xor not $p2
427 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
428 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
429 long mov b32 $r3 0x00000001
430 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
431 cvt s32 $r1 s8 1 $r0
432 long mov b32 $r2 0x00000000
433 cvt s32 $r0 s8 0 $r0
434 long ret
435 // R16_UNORM
436 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
437 set $p1 0x1 $p1 xor not $p2
438 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
439 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
440 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
441 long mov b32 $r3 0x3f800000
442 cvt rn f32 $r0 u16 0 $r0
443 long mov b32 $r2 0x00000000
444 long mov b32 $r1 0x00000000
445 mul f32 $r0 $r0 0x37800074
446 long ret
447 // R16_SNORM
448 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
449 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
450 set $p1 0x1 $p1 xor not $p2
451 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
452 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
453 mov b32 $r3 0x3f800000
454 cvt rn f32 $r0 s16 0 $r0
455 long mov b32 $r2 0x00000000
456 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
457 long mov b32 $r1 0x00000000
458 mul f32 $r0 $r0 0x38000187
459 long ret
460 // R16_SINT
461 $p1 suldgb s16 $r0 ca zero u8 g[$r4d] $r2 $p0
462 set $p1 0x1 $p1 xor not $p2
463 $p2 suldgb s16 $r0 cg zero u8 g[$r4d] $r2 $p0
464 $p1 suldgb s16 $r0 cv zero u8 g[$r4d] $r2 $p0
465 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
466 long mov b32 $r3 0x00000001
467 long mov b32 $r2 0x00000000
468 long mov b32 $r1 0x00000000
469 long ret
470 // R16_UINT
471 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
472 set $p1 0x1 $p1 xor not $p2
473 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
474 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
475 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
476 long mov b32 $r3 0x00000001
477 long mov b32 $r2 0x00000000
478 long mov b32 $r1 0x00000000
479 long ret
480 // R16_FLOAT
481 $p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
482 set $p1 0x1 $p1 xor not $p2
483 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
484 $p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
485 $p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
486 long mov b32 $r3 0x3f800000
487 long mov b32 $r2 0x00000000
488 cvt f32 $r0 f16 $r0 0
489 mov b32 $r1 0x00000000
490 long ret
491 // R8_UNORM
492 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
493 $p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0
494 set $p1 0x1 $p1 xor not $p2
495 $p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0
496 $p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0
497 mov b32 $r3 0x3f800000
498 cvt rn f32 $r0 u8 0 $r0
499 mov b32 $r2 0x00000000
500 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
501 mul f32 $r0 $r0 0x3b808081
502 mov b32 $r1 0x00000000
503 long ret
504 // R8_SNORM
505 $p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0
506 set $p1 0x1 $p1 xor not $p2
507 $p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0
508 $p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0
509 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
510 mov b32 $r3 0x3f800000
511 cvt rn f32 $r0 s8 0 $r0
512 mov b32 $r2 0x00000000
513 mul f32 $r0 $r0 0x3c010204
514 mov b32 $r1 0x00000000
515 long ret
516 // R8_SINT
517 $p1 suldgb s8 $r0 ca zero u8 g[$r4d] $r2 $p0
518 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
519 set $p1 0x1 $p1 xor not $p2
520 $p2 suldgb s8 $r0 cg zero u8 g[$r4d] $r2 $p0
521 $p1 suldgb s8 $r0 cv zero u8 g[$r4d] $r2 $p0
522 long mov b32 $r3 0x00000001
523 long mov b32 $r2 0x00000000
524 long mov b32 $r1 0x00000000
525 long ret
526 // R8_UINT
527 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
528 $p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0
529 set $p1 0x1 $p1 xor not $p2
530 $p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0
531 $p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0
532 long mov b32 $r3 0x00000001
533 long mov b32 $r2 0x00000000
534 long mov b32 $r1 0x00000000
535 sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
536 long ret
537 // R11G11B10_FLOAT TODO
538 $p1 suldgb b32 $r3 ca zero u8 g[$r4d] $r2 $p0
539 set $p1 0x1 $p1 xor not $p2
540 $p2 suldgb b32 $r3 cg zero u8 g[$r4d] $r2 $p0
541 $p1 suldgb b32 $r3 cv zero u8 g[$r4d] $r2 $p0
542 long mov b32 $r3 0x3f800000
543 long nop
544 long ret
545 //
546 // RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
547 //
548 // INPUT: $r0d (x)
549 // OUTPUT: $r0d (rcp(x))
550 // CLOBBER: $r2 - $r7
551 // SIZE: 9 * 8 bytes
552 //
553 long nop
554 long ret
555 // RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
556 //
557 // INPUT: $r0d (x)
558 // OUTPUT: $r0d (rsqrt(x))
559 // CLOBBER: $r2 - $r7
560 // SIZE: 14 * 8 bytes
561 //
562 long nop
563 long ret