3 from nmutil
.formaltest
import FHDLTestCase
4 from openpower
.decoder
.helpers
import SINGLE
, fp64toselectable
5 from openpower
.decoder
.isa
.caller
import SVP64State
6 from openpower
.decoder
.isa
.test_caller
import run_tst
7 from openpower
.decoder
.isafunctions
.double2single
import ISACallerFnHelper
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.simulator
.program
import Program
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
12 # really bad hack. need to access the DOUBLE2SINGLE function auto-generated
14 fph
= ISACallerFnHelper(XLEN
=64)
17 def transform_radix2(vec
, exptable
, reverse
=False):
19 # FFT and convolution test (Python), based on Project Nayuki
21 # Copyright (c) 2020 Project Nayuki. (MIT License)
22 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
25 # bits of the integer 'val'.
26 def reverse_bits(val
, width
):
28 for _
in range(width
):
29 result
= (result
<< 1) |
(val
& 1)
35 levels
= n
.bit_length() - 1
37 # Copy with bit-reversed permutation
39 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
45 for i
in range(0, n
, size
):
47 for j
in range(i
, i
+ halfsize
):
48 # exact same actual computation, just embedded in
49 # triple-nested for-loops
50 jl
, jh
= j
, j
+halfsize
52 temp1
= vec
[jh
] * exptable
[k
]
54 vec
[jh
] = temp2
- temp1
55 vec
[jl
] = temp2
+ temp1
56 print("xform jl jh k", jl
, jh
, k
,
57 "vj vjh ek", temp2
, vjh
, exptable
[k
],
58 "t1, t2", temp1
, temp2
,
59 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
66 def transform_radix2_complex(vec_r
, vec_i
, cos_r
, sin_i
, reverse
=False):
68 # FFT and convolution test (Python), based on Project Nayuki
70 # Copyright (c) 2020 Project Nayuki. (MIT License)
71 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
74 # bits of the integer 'val'.
75 def reverse_bits(val
, width
):
77 for _
in range(width
):
78 result
= (result
<< 1) |
(val
& 1)
84 levels
= n
.bit_length() - 1
86 # Copy with bit-reversed permutation
88 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
94 for i
in range(0, n
, size
):
96 for j
in range(i
, i
+ halfsize
):
97 # exact same actual computation, just embedded in
98 # triple-nested for-loops
99 jl
, jh
= j
, j
+halfsize
101 print("xform jl jh k", jl
, jh
, k
,
102 "vr h l", vec_r
[jh
], vec_r
[jl
],
103 "vi h l", vec_i
[jh
], vec_i
[jl
])
104 print(" cr k", cos_r
[k
], "si k", sin_i
[k
])
105 mul1_r
= vec_r
[jh
] * cos_r
[k
]
106 mul2_r
= vec_i
[jh
] * sin_i
[k
]
107 tpre
= mul1_r
+ mul2_r
108 print(" vec_r[jh] * cos_r[k]", mul1_r
)
109 print(" vec_i[jh] * sin_i[k]", mul2_r
)
111 mul1_i
= vec_r
[jh
] * sin_i
[k
]
112 mul2_i
= vec_i
[jh
] * cos_r
[k
]
113 tpim
= -mul1_i
+ mul2_i
114 print(" vec_r[jh] * sin_i[k]", mul1_i
)
115 print(" vec_i[jh] * cos_r[k]", mul2_i
)
117 vec_r
[jh
] = vec_r
[jl
] - tpre
118 vec_i
[jh
] = vec_i
[jl
] - tpim
122 print(" xform jl jh k", jl
, jh
, k
,
123 "\n vr h l", vec_r
[jh
], vec_r
[jl
],
124 "\n vi h l", vec_i
[jh
], vec_i
[jl
])
131 class FFTTestCase(FHDLTestCase
):
133 def _check_regs(self
, sim
, expected
):
135 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
137 def test_sv_remap_fpmadds_fft_4(self
):
138 """>>> lst = ["svshape 2, 1, 1, 1, 0",
139 "svremap 31, 1, 0, 2, 0, 1, 0",
140 "sv.ffmadds. *2, *2, *2, *10"
142 this is a cheap (cheating) way to run a single "ffmadds." to
143 get at least Rc=1 on sv.ffmadds to be activated. the results
144 are not actually tested because there's no checking yet on
147 lst
= SVP64Asm(["svshape 2, 1, 1, 1, 0",
148 "svremap 31, 1, 0, 2, 0, 1, 0",
149 "sv.ffmadds *0, *0, *0, *8"
153 # array and coefficients to test
154 av
= [7.0, -9.8] # array 0..1
155 coe
= [3.1] # coefficients
159 for i
, c
in enumerate(coe
):
160 fprs
[i
+8] = fp64toselectable(c
)
161 for i
, a
in enumerate(av
):
162 fprs
[i
+0] = fp64toselectable(a
)
164 with
Program(lst
, bigendian
=False) as program
:
165 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
166 print("spr svshape0", sim
.spr
['SVSHAPE0'])
167 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
168 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
169 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
170 print("spr svshape1", sim
.spr
['SVSHAPE1'])
171 print("spr svshape2", sim
.spr
['SVSHAPE2'])
172 print("spr svshape3", sim
.spr
['SVSHAPE3'])
174 # work out the results with the twin mul/add-sub
175 res
= transform_radix2(av
, coe
)
177 for i
, expected
in enumerate(res
):
178 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
179 for i
, expected
in enumerate(res
):
180 # convert to Power single
181 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
182 expected
= float(expected
)
183 actual
= float(sim
.fpr(i
))
184 # approximate error calculation, good enough test
185 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
186 # and the rounding is different
187 err
= abs(actual
- expected
) / expected
188 self
.assertTrue(err
< 1e-7)
190 def test_sv_remap_fpmadds_fft(self
):
191 """>>> lst = ["svshape 8, 1, 1, 1, 0",
192 "svremap 31, 1, 0, 2, 0, 1, 0",
193 "sv.ffmadds *2, *2, *2, *10"
195 runs a full in-place O(N log2 N) butterfly schedule for
196 Discrete Fourier Transform.
198 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
199 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
201 there is the *option* to target a different location (non-in-place)
204 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
205 (3 inputs, 2 outputs)
207 lst
= SVP64Asm(["svshape 8, 1, 1, 1, 0",
208 "svremap 31, 1, 0, 2, 0, 1, 0",
209 "sv.ffmadds *0, *0, *0, *8"
213 # array and coefficients to test
214 av
= [7.0, -9.8, 3.0, -32.3,
215 -2.0, 5.0, -9.8, 31.3] # array 0..7
216 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
220 for i
, c
in enumerate(coe
):
221 fprs
[i
+8] = fp64toselectable(c
)
222 for i
, a
in enumerate(av
):
223 fprs
[i
+0] = fp64toselectable(a
)
225 with
Program(lst
, bigendian
=False) as program
:
226 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
227 print("spr svshape0", sim
.spr
['SVSHAPE0'])
228 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
229 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
230 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
231 print("spr svshape1", sim
.spr
['SVSHAPE1'])
232 print("spr svshape2", sim
.spr
['SVSHAPE2'])
233 print("spr svshape3", sim
.spr
['SVSHAPE3'])
235 # work out the results with the twin mul/add-sub
236 res
= transform_radix2(av
, coe
)
238 for i
, expected
in enumerate(res
):
239 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
240 for i
, expected
in enumerate(res
):
241 # convert to Power single
242 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
243 expected
= float(expected
)
244 actual
= float(sim
.fpr(i
))
245 # approximate error calculation, good enough test
246 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
247 # and the rounding is different
248 err
= abs(actual
- expected
) / expected
249 self
.assertTrue(err
< 1e-7)
251 def test_sv_remap_fpmadds_fft_svstep(self
):
252 """>>> lst = SVP64Asm( [
253 "svshape 8, 1, 1, 1, 1",
254 "svremap 31, 1, 0, 2, 0, 1, 0",
255 "sv.ffmadds *0, *0, *0, *8",
259 runs a full in-place O(N log2 N) butterfly schedule for
260 Discrete Fourier Transform. this version however uses
261 SVP64 "Vertical-First" Mode and so needs an explicit
264 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
265 (3 inputs, 2 outputs)
268 "svshape 8, 1, 1, 1, 1",
269 "svremap 31, 1, 0, 2, 0, 1, 0",
270 "sv.ffmadds *0, *0, *0, *8",
276 # array and coefficients to test
277 av
= [7.0, -9.8, 3.0, -32.3,
278 -2.0, 5.0, -9.8, 31.3] # array 0..7
279 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
283 for i
, c
in enumerate(coe
):
284 fprs
[i
+8] = fp64toselectable(c
)
285 for i
, a
in enumerate(av
):
286 fprs
[i
+0] = fp64toselectable(a
)
288 # set total. err don't know how to calculate how many there are...
289 # do it manually for now
295 tablestep
= n
// size
296 for i
in range(0, n
, size
):
297 for j
in range(i
, i
+ halfsize
):
301 # SVSTATE (calculated VL)
302 svstate
= SVP64State()
304 svstate
.maxvl
= VL
# MAXVL
305 print("SVSTATE", bin(svstate
.asint()))
307 with
Program(lst
, bigendian
=False) as program
:
308 sim
= self
.run_tst_program(program
, svstate
=svstate
,
310 print("spr svshape0", sim
.spr
['SVSHAPE0'])
311 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
312 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
313 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
314 print("spr svshape1", sim
.spr
['SVSHAPE1'])
315 print("spr svshape2", sim
.spr
['SVSHAPE2'])
316 print("spr svshape3", sim
.spr
['SVSHAPE3'])
318 # work out the results with the twin mul/add-sub
319 res
= transform_radix2(av
, coe
)
321 for i
, expected
in enumerate(res
):
322 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
323 for i
, expected
in enumerate(res
):
324 # convert to Power single
325 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
326 expected
= float(expected
)
327 actual
= float(sim
.fpr(i
))
328 # approximate error calculation, good enough test
329 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
330 # and the rounding is different
331 err
= abs(actual
- expected
) / expected
332 self
.assertTrue(err
< 1e-7)
334 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self
):
335 """>>> lst = SVP64Asm( [
336 "svshape 8, 1, 1, 1, 1",
337 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
338 "svremap 5, 1, 0, 2, 0, 0, 1",
339 "sv.fmuls 24, *0, *8",
340 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
341 "svremap 26, 0, 0, 0, 0, 1, 1",
342 "sv.ffadds *0, 24, *0",
347 runs a full in-place O(N log2 N) butterfly schedule for
348 Discrete Fourier Transform. also uses "Vertical First"
349 but also uses temporary scalars and ffadds rather than
352 this represents an incremental step towards complex FFT
354 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
356 * single fmuls FRT, FRA, FRC
357 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
358 (FRS is implicit / hidden in ff* operations)
360 multiply: # sv.fmuls FRT, FRA, FRC
361 temp1 = vec[jh] * exptable[k]
363 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
364 vec[jh] = temp2 - temp1
365 vec[jl] = temp2 + temp1
367 also see notes in complex fft test: here svremap is done in
368 "non-persistent" mode (as a demo) whereas in the complex fft
369 svremap is used in "persistent" mode, where by a complete
370 coincidence the REMAP arguments all happen to line up and
371 only one persistent svremap is needed. the exact same trick
372 *could* be applied here but for illustrative purposes it is not.
375 "svshape 8, 1, 1, 1, 1",
376 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
377 "svremap 5, 1, 0, 2, 0, 0, 0",
378 "sv.fmuls 24, *0, *8",
379 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
380 "svremap 26, 0, 0, 0, 0, 1, 0",
381 "sv.ffadds *0, 24, *0",
387 # array and coefficients to test
388 av
= [7.0, -9.8, 3.0, -32.3,
389 -2.0, 5.0, -9.8, 31.3] # array 0..7
390 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
394 for i
, c
in enumerate(coe
):
395 fprs
[i
+8] = fp64toselectable(c
)
396 for i
, a
in enumerate(av
):
397 fprs
[i
+0] = fp64toselectable(a
)
399 # set total. err don't know how to calculate how many there are...
400 # do it manually for now
406 tablestep
= n
// size
407 for i
in range(0, n
, size
):
408 for j
in range(i
, i
+ halfsize
):
412 # SVSTATE (calculated VL)
413 svstate
= SVP64State()
415 svstate
.maxvl
= VL
# MAXVL
416 print("SVSTATE", bin(svstate
.asint()))
418 with
Program(lst
, bigendian
=False) as program
:
419 sim
= self
.run_tst_program(program
, svstate
=svstate
,
421 print("spr svshape0", sim
.spr
['SVSHAPE0'])
422 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
423 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
424 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
425 print("spr svshape1", sim
.spr
['SVSHAPE1'])
426 print("spr svshape2", sim
.spr
['SVSHAPE2'])
427 print("spr svshape3", sim
.spr
['SVSHAPE3'])
429 # work out the results with the twin mul/add-sub
430 res
= transform_radix2(av
, coe
)
432 for i
, expected
in enumerate(res
):
433 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
434 for i
, expected
in enumerate(res
):
435 # convert to Power single
436 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
437 expected
= float(expected
)
438 actual
= float(sim
.fpr(i
))
439 # approximate error calculation, good enough test
440 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
441 # and the rounding is different
442 err
= abs(actual
- expected
) / expected
443 self
.assertTrue(err
< 1e-7)
445 def test_sv_fpmadds_fft(self
):
446 """>>> lst = ["sv.ffmadds *2, *2, *2, *10"
448 four in-place vector mul-adds, four in-place vector mul-subs
450 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
451 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
453 there is the *option* to target a different location (non-in-place)
456 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
457 FRS to perform the two multiplies. one add, one subtract.
459 sv.ffmadds FRT, FRA, FRC, FRB actually does:
460 fmadds FRT , FRA, FRC, FRA
461 fnmsubs FRT+vl, FRA, FRC, FRB+vl
464 lst
= SVP64Asm(["sv.ffmadds *2, *2, *2, *10"
469 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
470 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
471 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
473 # work out the results with the twin mul/add-sub
474 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
475 fprs
[i
+2] = fp64toselectable(a
)
476 fprs
[i
+6] = fp64toselectable(b
)
477 fprs
[i
+10] = fp64toselectable(c
)
481 # convert to Power single
482 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
))
483 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
485 print("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
487 # SVSTATE (in this case, VL=2)
488 svstate
= SVP64State()
490 svstate
.maxvl
= 4 # MAXVL
491 print("SVSTATE", bin(svstate
.asint()))
493 with
Program(lst
, bigendian
=False) as program
:
494 sim
= self
.run_tst_program(program
, svstate
=svstate
,
496 # confirm that the results are as expected
497 for i
, (t
, u
) in enumerate(res
):
498 self
.assertEqual(sim
.fpr(i
+2), t
)
499 self
.assertEqual(sim
.fpr(i
+6), u
)
501 def test_sv_ffadds_fft(self
):
502 """>>> lst = ["sv.ffadds *2, *2, *2"
504 four in-place vector adds, four in-place vector subs
506 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
507 FRS to perform the two multiplies. one add, one subtract.
509 sv.ffadds FRT, FRA, FRB actually does:
511 fsubs FRT+vl, FRA, FRB+vl
513 lst
= SVP64Asm(["sv.ffadds *2, *2, *2"
518 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
519 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
521 # work out the results with the twin add-sub
522 for i
, (a
, b
) in enumerate(zip(av
, bv
)):
523 fprs
[i
+2] = fp64toselectable(a
)
524 fprs
[i
+6] = fp64toselectable(b
)
527 # convert to Power single
528 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
))
529 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
531 print("FFT", i
, "in", a
, b
, "res", t
, u
)
533 # SVSTATE (in this case, VL=2)
534 svstate
= SVP64State()
536 svstate
.maxvl
= 4 # MAXVL
537 print("SVSTATE", bin(svstate
.asint()))
539 with
Program(lst
, bigendian
=False) as program
:
540 sim
= self
.run_tst_program(program
, svstate
=svstate
,
542 # confirm that the results are as expected
543 for i
, (t
, u
) in enumerate(res
):
544 a
= float(sim
.fpr(i
+2))
545 b
= float(sim
.fpr(i
+6))
548 print("FFT", i
, "in", a
, b
, "res", t
, u
)
549 for i
, (t
, u
) in enumerate(res
):
550 self
.assertEqual(sim
.fpr(i
+2), t
)
551 self
.assertEqual(sim
.fpr(i
+6), u
)
553 def test_sv_remap_fpmadds_fft_svstep_complex(self
):
555 runs a full in-place O(N log2 N) butterfly schedule for
556 Discrete Fourier Transform. this version however uses
557 SVP64 "Vertical-First" Mode and so needs an explicit
560 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
561 (3 inputs, 2 outputs)
563 complex calculation (FFT):
565 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
566 vec_r[jh] = vec_r[jl] - tpre
569 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
570 vec_i[jh] = vec_i[jl] - tpim
573 real-only calculation (DFT):
575 temp1 = vec[jh] * exptable[k]
577 vec[jh] = temp2 - temp1
578 vec[jl] = temp2 + temp1
580 note: a rather nice convenience / coincidence. the meaning of
581 these two instructions is:
582 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
583 "svremap 5, 1, 0, 2, 0, 0, 1",
584 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
585 "svremap 26, 0, 0, 0, 0, 1, 1",
587 however it turns out that they can be *merged*, and for
588 the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
589 *ignore* their REMAPs (by definition, because you can't REMAP
590 scalar operands), and for the second one (sv.ffads) exactly the
591 right REMAPs are also ignored!
593 therefore we can merge:
594 "svremap 5, 1, 0, 2, 0, 0, 1",
595 "svremap 26, 0, 0, 0, 0, 1, 1",
597 "svremap 31, 1, 0, 2, 0, 1, 1",
598 and save one instruction.
601 # set triple butterfly mode with persistent "REMAP"
602 "svshape 8, 1, 1, 1, 1",
603 "svremap 31, 1, 0, 2, 0, 1, 1",
605 "sv.fmuls 24, *0, *16", # mul1_r = r*cos_r
606 "sv.fmadds 24, *8, *20, 24", # mul2_r = i*sin_i
607 # tpre = mul1_r + mul2_r
609 "sv.fmuls 26, *0, *20", # mul1_i = r*sin_i
610 "sv.fmsubs 26, *8, *16, 26", # mul2_i = i*cos_r
611 # tpim = mul2_i - mul1_i
613 "sv.ffadds *0, 24, *0", # vh/vl +/- tpre
615 "sv.ffadds *8, 26, *8", # vh/vl +- tpim
623 # array and coefficients to test
624 ar
= [7.0, -9.8, 3.0, -32.3,
625 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
626 ai
= [1.0, -1.8, 3.0, 19.3,
627 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
628 coer
= [-0.25, 0.5, 3.1, 6.2] # coefficients real
629 coei
= [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
633 for i
, a
in enumerate(ar
):
634 fprs
[i
+0] = fp64toselectable(a
)
635 for i
, a
in enumerate(ai
):
636 fprs
[i
+8] = fp64toselectable(a
)
637 for i
, cr
in enumerate(coer
):
638 fprs
[i
+16] = fp64toselectable(cr
)
639 for i
, ci
in enumerate(coei
):
640 fprs
[i
+20] = fp64toselectable(ci
)
642 # set total. err don't know how to calculate how many there are...
643 # do it manually for now
649 tablestep
= n
// size
650 for i
in range(0, n
, size
):
651 for j
in range(i
, i
+ halfsize
):
655 # SVSTATE (calculated VL)
656 svstate
= SVP64State()
658 svstate
.maxvl
= VL
# MAXVL
659 print("SVSTATE", bin(svstate
.asint()))
661 with
Program(lst
, bigendian
=False) as program
:
662 sim
= self
.run_tst_program(program
, svstate
=svstate
,
664 print("spr svshape0", sim
.spr
['SVSHAPE0'])
665 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
666 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
667 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
668 print("spr svshape1", sim
.spr
['SVSHAPE1'])
669 print("spr svshape2", sim
.spr
['SVSHAPE2'])
670 print("spr svshape3", sim
.spr
['SVSHAPE3'])
672 # work out the results with the twin mul/add-sub, explicit
674 res_r
, res_i
= transform_radix2_complex(ar
, ai
, coer
, coei
)
676 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
677 print("i", i
, float(sim
.fpr(i
)), float(sim
.fpr(i
+8)),
678 "expected_r", expected_r
,
679 "expected_i", expected_i
)
680 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
681 # convert to Power single
682 expected_r
= fph
.DOUBLE2SINGLE(fp64toselectable(expected_r
))
683 expected_r
= float(expected_r
)
684 actual_r
= float(sim
.fpr(i
))
685 # approximate error calculation, good enough test
686 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
687 # and the rounding is different
688 err
= abs(actual_r
- expected_r
) / expected_r
689 self
.assertTrue(err
< 1e-6)
690 # convert to Power single
691 expected_i
= fph
.DOUBLE2SINGLE(fp64toselectable(expected_i
))
692 expected_i
= float(expected_i
)
693 actual_i
= float(sim
.fpr(i
+8))
694 # approximate error calculation, good enough test
695 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
696 # and the rounding is different
697 err
= abs(actual_i
- expected_i
) / expected_i
698 self
.assertTrue(err
< 1e-6)
700 def test_sv_ffadds_fft_scalar(self
):
701 """>>> lst = ["sv.ffadds *2, 12, 13"
703 four in-place vector adds and subs, but done with a scalar
706 lst
= SVP64Asm(["sv.ffadds *2, 12, 13"
713 fprs
[12] = fp64toselectable(scalar_a
)
714 fprs
[13] = fp64toselectable(scalar_b
)
716 # work out the results with the twin add-sub
718 t
= scalar_b
+ scalar_a
719 u
= scalar_b
- scalar_a
720 # convert to Power single
721 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
))
722 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
724 print("FFT", i
, "res", t
, u
)
726 # SVSTATE (in this case, VL=2)
727 svstate
= SVP64State()
729 svstate
.maxvl
= 4 # MAXVL
730 print("SVSTATE", bin(svstate
.asint()))
732 with
Program(lst
, bigendian
=False) as program
:
733 sim
= self
.run_tst_program(program
, svstate
=svstate
,
735 # confirm that the results are as expected
736 for i
, (t
, u
) in enumerate(res
):
737 a
= float(sim
.fpr(i
+2))
738 b
= float(sim
.fpr(i
+6))
741 print("FFT", i
, "in", a
, b
, "res", t
, u
)
742 for i
, (t
, u
) in enumerate(res
):
743 self
.assertEqual(sim
.fpr(i
+2), t
)
744 self
.assertEqual(sim
.fpr(i
+6), u
)
746 def test_sv_remap_fpmadds_fft_ldst(self
):
747 """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
748 "sv.lfs/els *0, 4(0)",
749 "svshape 8, 1, 1, 1, 0",
750 "svremap 31, 1, 0, 2, 0, 1, 0",
751 "sv.ffmadds *0, *0, *0, *8"
753 runs a full in-place O(N log2 N) butterfly schedule for
754 Discrete Fourier Transform, using bit-reversed LD/ST
756 lst
= SVP64Asm(["svshape 8, 1, 1, 15, 0",
757 "svremap 1, 0, 0, 0, 0, 0, 0",
758 "sv.lfs/els *0, 4(0)",
759 "svshape 8, 1, 1, 1, 0",
760 "svremap 31, 1, 0, 2, 0, 1, 0",
761 "sv.ffmadds *0, *0, *0, *8"
765 # array and coefficients to test
766 av
= [7.0, -9.8, 3.0, -32.3,
767 -2.0, 5.0, -9.8, 31.3] # array 0..7
768 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
772 for i
, c
in enumerate(coe
):
773 fprs
[i
+8] = fp64toselectable(c
)
777 for i
, a
in enumerate(av
):
778 a
= SINGLE(fp64toselectable(a
)).value
783 mem
[(i
//2)*8] = val |
(a
<< 32)
785 with
Program(lst
, bigendian
=False) as program
:
786 sim
= self
.run_tst_program(program
, initial_mem
=mem
,
788 print("spr svshape0", sim
.spr
['SVSHAPE0'])
789 print(" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
790 print(" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
791 print(" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
792 print("spr svshape1", sim
.spr
['SVSHAPE1'])
793 print("spr svshape2", sim
.spr
['SVSHAPE2'])
794 print("spr svshape3", sim
.spr
['SVSHAPE3'])
797 print(sim
.mem
.dump())
799 # work out the results with the twin mul/add-sub,
800 # note bit-reverse mode requested
801 res
= transform_radix2(av
, coe
, reverse
=True)
803 for i
, expected
in enumerate(res
):
804 print("i", i
, float(sim
.fpr(i
)), "expected", expected
)
805 for i
, expected
in enumerate(res
):
806 # convert to Power single
807 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
808 expected
= float(expected
)
809 actual
= float(sim
.fpr(i
))
810 # approximate error calculation, good enough test
811 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
812 # and the rounding is different
813 err
= abs(actual
- expected
) / expected
814 self
.assertTrue(err
< 1e-6)
816 def run_tst_program(self
, prog
, initial_regs
=None,
820 if initial_regs
is None:
821 initial_regs
= [0] * 32
822 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
823 initial_fprs
=initial_fprs
,
834 if __name__
== "__main__":