3 from nmutil
.formaltest
import FHDLTestCase
4 from openpower
.decoder
.helpers
import SINGLE
, fp64toselectable
5 from openpower
.decoder
.isa
.caller
import SVP64State
6 from openpower
.decoder
.isa
.test_caller
import run_tst
7 from openpower
.decoder
.isafunctions
.double2single
import (
8 ISACallerFnHelper_double2single
)
9 from openpower
.decoder
.selectable_int
import SelectableInt
10 from openpower
.simulator
.program
import Program
11 from openpower
.insndb
.asm
import SVP64Asm
13 # really bad hack. need to access the DOUBLE2SINGLE function auto-generated
15 fph
= ISACallerFnHelper_double2single(XLEN
=64, FPSCR
=None)
16 fph
.namespace
= {'FPSCR': fph
.FPSCR
,
24 def transform_radix2(vec
, exptable
, reverse
=False):
26 # FFT and convolution test (Python), based on Project Nayuki
28 # Copyright (c) 2020 Project Nayuki. (MIT License)
29 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
32 # bits of the integer 'val'.
33 def reverse_bits(val
, width
):
35 for _
in range(width
):
36 result
= (result
<< 1) |
(val
& 1)
42 levels
= n
.bit_length() - 1
44 # Copy with bit-reversed permutation
46 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
52 for i
in range(0, n
, size
):
54 for j
in range(i
, i
+ halfsize
):
55 # exact same actual computation, just embedded in
56 # triple-nested for-loops
57 jl
, jh
= j
, j
+halfsize
59 temp1
= vec
[jh
] * exptable
[k
]
61 vec
[jh
] = temp2
- temp1
62 vec
[jl
] = temp2
+ temp1
63 print("xform jl jh k", jl
, jh
, k
,
64 "vj vjh ek", temp2
, vjh
, exptable
[k
],
65 "t1, t2", temp1
, temp2
,
66 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
73 def transform_radix2_complex(vec_r
, vec_i
, cos_r
, sin_i
, reverse
=False):
75 # FFT and convolution test (Python), based on Project Nayuki
77 # Copyright (c) 2020 Project Nayuki. (MIT License)
78 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
81 # bits of the integer 'val'.
82 def reverse_bits(val
, width
):
84 for _
in range(width
):
85 result
= (result
<< 1) |
(val
& 1)
91 levels
= n
.bit_length() - 1
93 # Copy with bit-reversed permutation
95 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
100 tablestep
= n
// size
101 for i
in range(0, n
, size
):
103 for j
in range(i
, i
+ halfsize
):
104 # exact same actual computation, just embedded in
105 # triple-nested for-loops
106 jl
, jh
= j
, j
+halfsize
108 print("xform jl jh k", jl
, jh
, k
,
109 "vr h l", vec_r
[jh
], vec_r
[jl
],
110 "vi h l", vec_i
[jh
], vec_i
[jl
])
111 print(" cr k", cos_r
[k
], "si k", sin_i
[k
])
112 mul1_r
= vec_r
[jh
] * cos_r
[k
]
113 mul2_r
= vec_i
[jh
] * sin_i
[k
]
114 tpre
= mul1_r
+ mul2_r
115 print(" vec_r[jh] * cos_r[k]", mul1_r
)
116 print(" vec_i[jh] * sin_i[k]", mul2_r
)
118 mul1_i
= vec_r
[jh
] * sin_i
[k
]
119 mul2_i
= vec_i
[jh
] * cos_r
[k
]
120 tpim
= -mul1_i
+ mul2_i
121 print(" vec_r[jh] * sin_i[k]", mul1_i
)
122 print(" vec_i[jh] * cos_r[k]", mul2_i
)
124 vec_r
[jh
] = vec_r
[jl
] - tpre
125 vec_i
[jh
] = vec_i
[jl
] - tpim
129 print(" xform jl jh k", jl
, jh
, k
,
130 "\n vr h l", vec_r
[jh
], vec_r
[jl
],
131 "\n vi h l", vec_i
[jh
], vec_i
[jl
])
138 class FFTTestCase(FHDLTestCase
):
140 def _check_regs(self
, sim
, expected
):
142 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
144 def test_sv_remap_fpmadds_fft_4(self
):
145 """>>> lst = ["svshape 2, 1, 1, 1, 0",
146 "svremap 31, 1, 0, 2, 0, 1, 0",
147 "sv.ffmadds. *2, *10, *2"
149 this is a cheap (cheating) way to run a single "ffmadds." to
150 get at least Rc=1 on sv.ffmadds to be activated. the results
151 are not actually tested because there's no checking yet on
154 lst
= SVP64Asm(["svshape 2, 1, 1, 1, 0",
155 "svremap 31, 1, 0, 2, 0, 1, 0",
156 "sv.ffmadds *0, *8, *0"
160 # array and coefficients to test
161 av
= [7.0, -9.8] # array 0..1
162 coe
= [3.1] # coefficients
166 for i
, c
in enumerate(coe
):
167 fprs
[i
+8] = fp64toselectable(c
)
168 for i
, a
in enumerate(av
):
169 fprs
[i
+0] = fp64toselectable(a
)
171 with
Program(lst
, bigendian
=False) as program
:
172 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
173 print("spr svshape0", sim
.spr
['SVSHAPE0'])
174 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
175 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
176 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
177 print("spr svshape1", sim
.spr
['SVSHAPE1'])
178 print("spr svshape2", sim
.spr
['SVSHAPE2'])
179 print("spr svshape3", sim
.spr
['SVSHAPE3'])
181 # work out the results with the twin mul/add-sub
182 res
= transform_radix2(av
, coe
)
184 for i
, expected
in enumerate(res
):
185 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
186 for i
, expected
in enumerate(res
):
187 # convert to Power single
188 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
189 expected
= float(expected
)
190 actual
= float(sim
.fpr(i
))
191 # approximate error calculation, good enough test
192 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
193 # and the rounding is different
194 err
= abs(actual
- expected
) / expected
195 self
.assertTrue(err
< 1e-7)
197 def test_sv_remap_fpmadds_fft(self
):
198 """>>> lst = ["svshape 8, 1, 1, 1, 0",
199 "svremap 31, 1, 0, 2, 0, 1, 0",
200 "sv.ffmadds *2, *10, *2"
202 runs a full in-place O(N log2 N) butterfly schedule for
203 Discrete Fourier Transform.
205 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
206 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
208 there is the *option* to target a different location (non-in-place)
211 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
212 (3 inputs, 2 outputs)
214 lst
= SVP64Asm(["svshape 8, 1, 1, 1, 0",
215 "svremap 31, 1, 0, 2, 0, 1, 0",
216 "sv.ffmadds *0, *8, *0"
220 # array and coefficients to test
221 av
= [7.0, -9.8, 3.0, -32.3,
222 -2.0, 5.0, -9.8, 31.3] # array 0..7
223 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
227 for i
, c
in enumerate(coe
):
228 fprs
[i
+8] = fp64toselectable(c
)
229 for i
, a
in enumerate(av
):
230 fprs
[i
+0] = fp64toselectable(a
)
232 with
Program(lst
, bigendian
=False) as program
:
233 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
234 print("spr svshape0", sim
.spr
['SVSHAPE0'])
235 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
236 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
237 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
238 print("spr svshape1", sim
.spr
['SVSHAPE1'])
239 print("spr svshape2", sim
.spr
['SVSHAPE2'])
240 print("spr svshape3", sim
.spr
['SVSHAPE3'])
242 # work out the results with the twin mul/add-sub
243 res
= transform_radix2(av
, coe
)
245 for i
, expected
in enumerate(res
):
246 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
247 for i
, expected
in enumerate(res
):
248 # convert to Power single
249 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
250 expected
= float(expected
)
251 actual
= float(sim
.fpr(i
))
252 # approximate error calculation, good enough test
253 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
254 # and the rounding is different
255 err
= abs(actual
- expected
) / expected
256 self
.assertTrue(err
< 1e-7)
258 def test_sv_remap_fpmadds_fft_svstep(self
):
259 """>>> lst = SVP64Asm( [
260 "svshape 8, 1, 1, 1, 1",
261 "svremap 31, 1, 0, 2, 0, 1, 0",
262 "sv.ffmadds *0, *8, *0",
263 "svstep. 12, 0, 1, 0",
266 runs a full in-place O(N log2 N) butterfly schedule for
267 Discrete Fourier Transform. this version however uses
268 SVP64 "Vertical-First" Mode and so needs an explicit
271 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
272 (3 inputs, 2 outputs)
275 "svshape 8, 1, 1, 1, 1",
276 "svremap 31, 1, 0, 2, 0, 1, 0",
277 "sv.ffmadds *0, *8, *0",
278 "svstep. 27, 0, 1, 0",
283 # array and coefficients to test
284 av
= [7.0, -9.8, 3.0, -32.3,
285 -2.0, 5.0, -9.8, 31.3] # array 0..7
286 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
290 for i
, c
in enumerate(coe
):
291 fprs
[i
+8] = fp64toselectable(c
)
292 for i
, a
in enumerate(av
):
293 fprs
[i
+0] = fp64toselectable(a
)
295 # set total. err don't know how to calculate how many there are...
296 # do it manually for now
302 tablestep
= n
// size
303 for i
in range(0, n
, size
):
304 for j
in range(i
, i
+ halfsize
):
308 # SVSTATE (calculated VL)
309 svstate
= SVP64State()
311 svstate
.maxvl
= VL
# MAXVL
312 print("SVSTATE", bin(svstate
.asint()))
314 with
Program(lst
, bigendian
=False) as program
:
315 sim
= self
.run_tst_program(program
, svstate
=svstate
,
317 print("spr svshape0", sim
.spr
['SVSHAPE0'])
318 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
319 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
320 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
321 print("spr svshape1", sim
.spr
['SVSHAPE1'])
322 print("spr svshape2", sim
.spr
['SVSHAPE2'])
323 print("spr svshape3", sim
.spr
['SVSHAPE3'])
325 # work out the results with the twin mul/add-sub
326 res
= transform_radix2(av
, coe
)
328 for i
, expected
in enumerate(res
):
329 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
330 for i
, expected
in enumerate(res
):
331 # convert to Power single
332 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
333 expected
= float(expected
)
334 actual
= float(sim
.fpr(i
))
335 # approximate error calculation, good enough test
336 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
337 # and the rounding is different
338 err
= abs(actual
- expected
) / expected
339 self
.assertTrue(err
< 1e-7)
341 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self
):
342 """>>> lst = SVP64Asm( [
343 "svshape 8, 1, 1, 1, 1",
344 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
345 "svremap 5, 1, 0, 2, 0, 0, 1",
346 "sv.fmuls 24, *0, *8",
347 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
348 "svremap 26, 0, 0, 0, 0, 1, 1",
349 "sv.ffadds *0, 24, *0",
350 "svstep. 27, 0, 1, 0",
354 runs a full in-place O(N log2 N) butterfly schedule for
355 Discrete Fourier Transform. also uses "Vertical First"
356 but also uses temporary scalars and ffadds rather than
359 this represents an incremental step towards complex FFT
361 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
363 * single fmuls FRT, FRA, FRC
364 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
365 (FRS is implicit / hidden in ff* operations)
367 multiply: # sv.fmuls FRT, FRA, FRC
368 temp1 = vec[jh] * exptable[k]
370 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
371 vec[jh] = temp2 - temp1
372 vec[jl] = temp2 + temp1
374 also see notes in complex fft test: here svremap is done in
375 "non-persistent" mode (as a demo) whereas in the complex fft
376 svremap is used in "persistent" mode, where by a complete
377 coincidence the REMAP arguments all happen to line up and
378 only one persistent svremap is needed. the exact same trick
379 *could* be applied here but for illustrative purposes it is not.
382 "svshape 8, 1, 1, 1, 1",
383 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
384 "svremap 5, 1, 0, 2, 0, 0, 0",
385 "sv.fmuls 24, *0, *8",
386 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
387 "svremap 26, 0, 0, 0, 0, 1, 0",
388 "sv.ffadds *0, 24, *0",
389 "svstep. 27, 0, 1, 0",
394 # array and coefficients to test
395 av
= [7.0, -9.8, 3.0, -32.3,
396 -2.0, 5.0, -9.8, 31.3] # array 0..7
397 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
401 for i
, c
in enumerate(coe
):
402 fprs
[i
+8] = fp64toselectable(c
)
403 for i
, a
in enumerate(av
):
404 fprs
[i
+0] = fp64toselectable(a
)
406 # set total. err don't know how to calculate how many there are...
407 # do it manually for now
413 tablestep
= n
// size
414 for i
in range(0, n
, size
):
415 for j
in range(i
, i
+ halfsize
):
419 # SVSTATE (calculated VL)
420 svstate
= SVP64State()
422 svstate
.maxvl
= VL
# MAXVL
423 print("SVSTATE", bin(svstate
.asint()))
425 with
Program(lst
, bigendian
=False) as program
:
426 sim
= self
.run_tst_program(program
, svstate
=svstate
,
428 print("spr svshape0", sim
.spr
['SVSHAPE0'])
429 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
430 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
431 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
432 print("spr svshape1", sim
.spr
['SVSHAPE1'])
433 print("spr svshape2", sim
.spr
['SVSHAPE2'])
434 print("spr svshape3", sim
.spr
['SVSHAPE3'])
436 # work out the results with the twin mul/add-sub
437 res
= transform_radix2(av
, coe
)
439 for i
, expected
in enumerate(res
):
440 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
441 for i
, expected
in enumerate(res
):
442 # convert to Power single
443 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
444 expected
= float(expected
)
445 actual
= float(sim
.fpr(i
))
446 # approximate error calculation, good enough test
447 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
448 # and the rounding is different
449 err
= abs(actual
- expected
) / expected
450 self
.assertTrue(err
< 1e-7)
452 def test_sv_fpmadds_fft(self
):
453 """>>> lst = ["sv.ffmadds *2, *10, *2"
455 four in-place vector mul-adds, four in-place vector mul-subs
457 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
458 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
460 there is the *option* to target a different location (non-in-place)
463 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
464 FRS to perform the two multiplies. one add, one subtract.
466 sv.ffmadds FRT, FRA, FRC, FRB actually does:
467 fmadds FRT , FRA, FRC, FRA
468 fnmsubs FRT+vl, FRA, FRC, FRB+vl
471 lst
= SVP64Asm(["sv.ffmadds *2, *10, *2"
476 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
477 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
478 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
480 # work out the results with the twin mul/add-sub
481 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
482 fprs
[i
+2] = fp64toselectable(a
)
483 fprs
[i
+6] = fp64toselectable(b
)
484 fprs
[i
+10] = fp64toselectable(c
)
488 # convert to Power single
489 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
))
490 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
492 print("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
494 # SVSTATE (in this case, VL=2)
495 svstate
= SVP64State()
497 svstate
.maxvl
= 4 # MAXVL
498 print("SVSTATE", bin(svstate
.asint()))
500 with
Program(lst
, bigendian
=False) as program
:
501 sim
= self
.run_tst_program(program
, svstate
=svstate
,
503 # confirm that the results are as expected
504 for i
, (t
, u
) in enumerate(res
):
505 self
.assertEqual(sim
.fpr(i
+2), t
)
506 self
.assertEqual(sim
.fpr(i
+6), u
)
508 def test_sv_ffadds_fft(self
):
509 """>>> lst = ["sv.ffadds *2, *2, *2"
511 four in-place vector adds, four in-place vector subs
513 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
514 FRS to perform the two multiplies. one add, one subtract.
516 sv.ffadds FRT, FRA, FRB actually does:
518 fsubs FRT+vl, FRA, FRB+vl
520 lst
= SVP64Asm(["sv.ffadds *2, *2, *2"
525 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
526 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
528 # work out the results with the twin add-sub
529 for i
, (a
, b
) in enumerate(zip(av
, bv
)):
530 fprs
[i
+2] = fp64toselectable(a
)
531 fprs
[i
+6] = fp64toselectable(b
)
534 # convert to Power single
535 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
))
536 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
538 print("FFT", i
, "in", a
, b
, "res", t
, u
)
540 # SVSTATE (in this case, VL=2)
541 svstate
= SVP64State()
543 svstate
.maxvl
= 4 # MAXVL
544 print("SVSTATE", bin(svstate
.asint()))
546 with
Program(lst
, bigendian
=False) as program
:
547 sim
= self
.run_tst_program(program
, svstate
=svstate
,
549 # confirm that the results are as expected
550 for i
, (t
, u
) in enumerate(res
):
551 a
= float(sim
.fpr(i
+2))
552 b
= float(sim
.fpr(i
+6))
555 print("FFT", i
, "in", a
, b
, "res", t
, u
)
556 for i
, (t
, u
) in enumerate(res
):
557 self
.assertEqual(sim
.fpr(i
+2), t
)
558 self
.assertEqual(sim
.fpr(i
+6), u
)
560 def test_sv_remap_fpmadds_fft_svstep_complex(self
):
562 runs a full in-place O(N log2 N) butterfly schedule for
563 Discrete Fourier Transform. this version however uses
564 SVP64 "Vertical-First" Mode and so needs an explicit
567 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
568 (3 inputs, 2 outputs)
570 complex calculation (FFT):
572 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
573 vec_r[jh] = vec_r[jl] - tpre
576 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
577 vec_i[jh] = vec_i[jl] - tpim
580 real-only calculation (DFT):
582 temp1 = vec[jh] * exptable[k]
584 vec[jh] = temp2 - temp1
585 vec[jl] = temp2 + temp1
587 note: a rather nice convenience / coincidence. the meaning of
588 these two instructions is:
589 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
590 "svremap 5, 1, 0, 2, 0, 0, 1",
591 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
592 "svremap 26, 0, 0, 0, 0, 1, 1",
594 however it turns out that they can be *merged*, and for
595 the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
596 *ignore* their REMAPs (by definition, because you can't REMAP
597 scalar operands), and for the second one (sv.ffads) exactly the
598 right REMAPs are also ignored!
600 therefore we can merge:
601 "svremap 5, 1, 0, 2, 0, 0, 1",
602 "svremap 26, 0, 0, 0, 0, 1, 1",
604 "svremap 31, 1, 0, 2, 0, 1, 1",
605 and save one instruction.
608 # set triple butterfly mode with persistent "REMAP"
609 "svshape 8, 1, 1, 1, 1",
610 "svremap 31, 1, 0, 2, 0, 1, 1",
612 "sv.fmuls 24, *0, *16", # mul1_r = r*cos_r
613 "sv.fmadds 24, *8, *20, 24", # mul2_r = i*sin_i
614 # tpre = mul1_r + mul2_r
616 "sv.fmuls 26, *0, *20", # mul1_i = r*sin_i
617 "sv.fmsubs 26, *8, *16, 26", # mul2_i = i*cos_r
618 # tpim = mul2_i - mul1_i
620 "sv.ffadds *0, 24, *0", # vh/vl +/- tpre
622 "sv.ffadds *8, 26, *8", # vh/vl +- tpim
625 "svstep. 27, 0, 1, 0",
630 # array and coefficients to test
631 ar
= [7.0, -9.8, 3.0, -32.3,
632 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
633 ai
= [1.0, -1.8, 3.0, 19.3,
634 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
635 coer
= [-0.25, 0.5, 3.1, 6.2] # coefficients real
636 coei
= [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
640 for i
, a
in enumerate(ar
):
641 fprs
[i
+0] = fp64toselectable(a
)
642 for i
, a
in enumerate(ai
):
643 fprs
[i
+8] = fp64toselectable(a
)
644 for i
, cr
in enumerate(coer
):
645 fprs
[i
+16] = fp64toselectable(cr
)
646 for i
, ci
in enumerate(coei
):
647 fprs
[i
+20] = fp64toselectable(ci
)
649 # set total. err don't know how to calculate how many there are...
650 # do it manually for now
656 tablestep
= n
// size
657 for i
in range(0, n
, size
):
658 for j
in range(i
, i
+ halfsize
):
662 # SVSTATE (calculated VL)
663 svstate
= SVP64State()
665 svstate
.maxvl
= VL
# MAXVL
666 print("SVSTATE", bin(svstate
.asint()))
668 with
Program(lst
, bigendian
=False) as program
:
669 sim
= self
.run_tst_program(program
, svstate
=svstate
,
671 print("spr svshape0", sim
.spr
['SVSHAPE0'])
672 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
673 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
674 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
675 print("spr svshape1", sim
.spr
['SVSHAPE1'])
676 print("spr svshape2", sim
.spr
['SVSHAPE2'])
677 print("spr svshape3", sim
.spr
['SVSHAPE3'])
679 # work out the results with the twin mul/add-sub, explicit
681 res_r
, res_i
= transform_radix2_complex(ar
, ai
, coer
, coei
)
683 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
684 print("i", i
, float(sim
.fpr(i
)), float(sim
.fpr(i
+8)),
685 "expected_r", expected_r
,
686 "expected_i", expected_i
)
687 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
688 # convert to Power single
689 expected_r
= fph
.DOUBLE2SINGLE(fp64toselectable(expected_r
))
690 expected_r
= float(expected_r
)
691 actual_r
= float(sim
.fpr(i
))
692 # approximate error calculation, good enough test
693 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
694 # and the rounding is different
695 err
= abs(actual_r
- expected_r
) / expected_r
696 self
.assertTrue(err
< 1e-6)
697 # convert to Power single
698 expected_i
= fph
.DOUBLE2SINGLE(fp64toselectable(expected_i
))
699 expected_i
= float(expected_i
)
700 actual_i
= float(sim
.fpr(i
+8))
701 # approximate error calculation, good enough test
702 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
703 # and the rounding is different
704 err
= abs(actual_i
- expected_i
) / expected_i
705 self
.assertTrue(err
< 1e-6)
707 def test_sv_ffadds_fft_scalar(self
):
708 """>>> lst = ["sv.ffadds *2, 12, 13"
710 four in-place vector adds and subs, but done with a scalar
713 lst
= SVP64Asm(["sv.ffadds *2, 12, 13"
720 fprs
[12] = fp64toselectable(scalar_a
)
721 fprs
[13] = fp64toselectable(scalar_b
)
723 # work out the results with the twin add-sub
725 t
= scalar_b
+ scalar_a
726 u
= scalar_b
- scalar_a
727 # convert to Power single
728 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
))
729 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
731 print("FFT", i
, "res", t
, u
)
733 # SVSTATE (in this case, VL=2)
734 svstate
= SVP64State()
736 svstate
.maxvl
= 4 # MAXVL
737 print("SVSTATE", bin(svstate
.asint()))
739 with
Program(lst
, bigendian
=False) as program
:
740 sim
= self
.run_tst_program(program
, svstate
=svstate
,
742 # confirm that the results are as expected
743 for i
, (t
, u
) in enumerate(res
):
744 a
= float(sim
.fpr(i
+2))
745 b
= float(sim
.fpr(i
+6))
748 print("FFT", i
, "in", a
, b
, "res", t
, u
)
749 for i
, (t
, u
) in enumerate(res
):
750 self
.assertEqual(sim
.fpr(i
+2), t
)
751 self
.assertEqual(sim
.fpr(i
+6), u
)
753 def test_sv_remap_fpmadds_fft_ldst(self
):
754 """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
755 "sv.lfs/els *0, 4(0)",
756 "svshape 8, 1, 1, 1, 0",
757 "svremap 31, 1, 0, 2, 0, 1, 0",
758 "sv.ffmadds *0, *8, *0"
760 runs a full in-place O(N log2 N) butterfly schedule for
761 Discrete Fourier Transform, using bit-reversed LD/ST
763 lst
= SVP64Asm(["svshape 8, 1, 1, 15, 0",
764 "svremap 1, 0, 0, 0, 0, 0, 0",
765 "sv.lfs/els *0, 4(0)",
766 "svshape 8, 1, 1, 1, 0",
767 "svremap 31, 1, 0, 2, 0, 1, 0",
768 "sv.ffmadds *0, *8, *0"
772 # array and coefficients to test
773 av
= [7.0, -9.8, 3.0, -32.3,
774 -2.0, 5.0, -9.8, 31.3] # array 0..7
775 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
779 for i
, c
in enumerate(coe
):
780 fprs
[i
+8] = fp64toselectable(c
)
784 for i
, a
in enumerate(av
):
785 a
= SINGLE(fp64toselectable(a
)).value
790 mem
[(i
//2)*8] = val |
(a
<< 32)
792 with
Program(lst
, bigendian
=False) as program
:
793 sim
= self
.run_tst_program(program
, initial_mem
=mem
,
795 print("spr svshape0", sim
.spr
['SVSHAPE0'])
796 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
797 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
798 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
799 print("spr svshape1", sim
.spr
['SVSHAPE1'])
800 print("spr svshape2", sim
.spr
['SVSHAPE2'])
801 print("spr svshape3", sim
.spr
['SVSHAPE3'])
804 print(sim
.mem
.dump())
806 # work out the results with the twin mul/add-sub,
807 # note bit-reverse mode requested
808 res
= transform_radix2(av
, coe
, reverse
=True)
810 for i
, expected
in enumerate(res
):
811 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
812 for i
, expected
in enumerate(res
):
813 # convert to Power single
814 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
815 expected
= float(expected
)
816 actual
= float(sim
.fpr(i
))
817 # approximate error calculation, good enough test
818 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
819 # and the rounding is different
820 err
= abs(actual
- expected
) / expected
821 self
.assertTrue(err
< 1e-6)
823 def run_tst_program(self
, prog
, initial_regs
=None,
827 if initial_regs
is None:
828 initial_regs
= [0] * 32
829 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
830 initial_fprs
=initial_fprs
,
841 if __name__
== "__main__":