3 # Test various instructions to check whether half<->full widening/narrowing
4 # works. The basic premise is to perform the same instruction with and
5 # without the widening/narrowing folded in and check if the results match.
7 # Note this doesn't currently diferentiate between signed/unsigned/bool,
8 # and just assumes int is signed (since unsigned is basically(ish) like
9 # signed but without sign extension)
11 # TODO probably good pick numeric src values that are better at triggering
12 # edge cases, while still not loosing precision in a full->half->full
13 # seqeuence.. but some instructions like absneg don't even appear to be
14 # subtlely wrong when you try to fold in a precision conversion.
16 # add '-v' arg to see the result values
21 # Templates for float->float instructions:
24 'add.f $dst, $src1, $src2'
25 'min.f $dst, $src1, $src2'
26 'min.f $dst, $src2, $src1'
27 'max.f $dst, $src1, $src2'
28 'max.f $dst, $src2, $src1'
29 'mul.f $dst, $src1, $src2'
31 'absneg.f $dst, \(neg\)$src1'
32 'absneg.f $dst, \(abs\)$src1'
41 # Templates for float->int instructions:
44 'cmps.f.gt $dst, $src1, $src2'
45 'cmps.f.lt $dst, $src1, $src2'
46 'cmpv.f.gt $dst, $src1, $src2'
47 'cmpv.f.lt $dst, $src1, $src2'
51 # Templates for int->int instructions:
54 'add.u $dst, $src1, $src2'
55 'add.s $dst, $src1, $src2'
56 'sub.u $dst, $src1, $src2'
57 'sub.s $dst, $src1, $src2'
58 'cmps.f.gt $dst, $src1, $src2'
59 'cmps.f.lt $dst, $src1, $src2'
60 'min.u $dst, $src1, $src2'
61 'min.u $dst, $src2, $src1'
62 'min.s $dst, $src1, $src2'
63 'min.s $dst, $src2, $src1'
64 'max.u $dst, $src1, $src2'
65 'max.u $dst, $src2, $src1'
66 'max.s $dst, $src1, $src2'
67 'max.s $dst, $src2, $src1'
68 'absneg.s $dst, \(neg\)$src1'
69 'absneg.s $dst, \(abs\)$src1'
70 'and.b $dst, $src2, $src3'
71 'or.b $dst, $src1, $src2'
73 'xor.b $dst, $src1, $src2'
74 'cmpv.u.gt $dst, $src1, $src2'
75 'cmpv.u.lt $dst, $src1, $src2'
76 'cmpv.s.gt $dst, $src1, $src2'
77 'cmpv.s.lt $dst, $src1, $src2'
78 'mul.u24 $dst, $src1, $src2'
79 'mul.s24 $dst, $src1, $src2'
80 'mull.u $dst, $src1, $src2'
84 'shl.b $dst, $src1, $src2'
85 'shr.b $dst, $src3, $src1'
86 'ashr.b $dst, $src3, $src1'
87 'mgen.b $dst, $src1, $src2'
88 'getbit.b $dst, $src3, $src2'
91 'shb $dst, $src1, $src2'
92 'msad $dst, $src1, $src2'
96 # Helper to expand instruction template:
110 echo '; control, half->half:'
111 expand $instr "hr1.x" "hr0.x" "hr0.y" "hr0.z"
112 echo '; test, full->half:'
113 expand $instr "hr1.y" "r1.x" "r1.y" "r1.z"
115 echo '; control, full->full:'
116 expand $instr "r2.x" "r1.x" "r1.y" "r1.z"
117 echo '; test, half->full:'
118 expand $instr "r2.y" "hr0.x" "hr0.y" "hr0.z"
124 # Helpers to construct test program assembly:
143 stib.untyped.1d.u32.1 g[0] + r3.x, r2.x ; control: full->full
144 stib.untyped.1d.u32.1 g[0] + r3.y, r2.y ; test: half->full
145 stib.untyped.1d.u32.1 g[0] + r3.z, r2.z ; control: half->half
146 stib.untyped.1d.u32.1 g[0] + r3.w, r2.w ; test: full->half
154 ; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
155 ; r1->r2 avail for full, r1 for src, r2 for dst
156 cov.f32f16 hr0.x, (1.0)
157 cov.f32f16 hr0.y, (2.0)
158 cov.f32f16 hr0.z, (3.0)
159 mov.f32f32 r1.x, (1.0)
160 mov.f32f32 r1.y, (2.0)
161 mov.f32f32 r1.z, (3.0)
168 ; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst
169 ; r1->r2 avail for full, r1 for src, r2 for dst
181 # Generate assembly code to test float->float opcode
191 ; convert half results back to full:
192 cov.f16f32 r2.z, hr1.x
193 cov.f16f32 r2.w, hr1.y
200 # Generate assembly code to test float->int opcode
210 ; convert half results back to full:
211 cov.s16s32 r2.z, hr1.x
212 cov.s16s32 r2.w, hr1.y
219 # Generate assembly code to test int->int opcode
229 ; convert half results back to full:
230 cov.s16s32 r2.z, hr1.x
231 cov.s16s32 r2.w, hr1.y
239 # Helper to parse computerator output and print results:
242 str
=`cat - | grep " " | head -1 | xargs`
244 if [ "$verbose" = "true" ]; then
248 # Split components of result buffer:
249 cf
=$
(echo $str | cut
-f1 -d' ')
250 tf
=$
(echo $str | cut
-f2 -d' ')
251 ch
=$
(echo $str | cut
-f3 -d' ')
252 th
=$
(echo $str | cut
-f4 -d' ')
254 # Sanity test, make sure the control results match:
255 if [ $cf != $ch ]; then
256 echo " FAIL: control results do not match! Half vs full op is not equivalent!"
257 echo " full=$cf half=$ch"
260 # Compare test (with conversion folded) to control:
261 if [ $cf != $tf ]; then
262 echo " FAIL: half -> full widening result does not match control!"
263 echo " control=$cf result=$tf"
265 if [ $ch != $th ]; then
266 echo " FAIL: full -> half narrowing result does not match control!"
267 echo " control=$ch result=$th"
270 # HACK without a delay different invocations
271 # of computerator seem to somehow clobber each
272 # other.. which isn't great..
280 if [ "$1" = "-v" ]; then
285 for instr
in ${f2f_instrs[@]}; do
287 f2f_asm
$instr | .
/computerator
-g 1,1,1 | check_results
289 for instr
in ${f2i_instrs[@]}; do
291 f2i_asm
$instr | .
/computerator
-g 1,1,1 | check_results
293 for instr
in ${i2i_instrs[@]}; do
295 i2i_asm
$instr | .
/computerator
-g 1,1,1 | check_results