cf5915adf99b4348316ec7853376362df0ed6893
1 from nmigen
import Module
, Signal
2 from nmigen
.sim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.power_decoder
import (create_pdecode
)
6 from openpower
.simulator
.program
import Program
7 from openpower
.decoder
.isa
.caller
import SVP64State
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from copy
import deepcopy
12 from openpower
.decoder
.helpers
import fp64toselectable
, SINGLE
13 from openpower
.decoder
.isafunctions
.double2single
import ISACallerFnHelper
15 # really bad hack. need to access the DOUBLE2SINGLE function auto-generated
17 fph
= ISACallerFnHelper(XLEN
=64)
20 def transform_radix2(vec
, exptable
, reverse
=False):
22 # FFT and convolution test (Python), based on Project Nayuki
24 # Copyright (c) 2020 Project Nayuki. (MIT License)
25 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
28 # bits of the integer 'val'.
29 def reverse_bits(val
, width
):
31 for _
in range(width
):
32 result
= (result
<< 1) |
(val
& 1)
38 levels
= n
.bit_length() - 1
40 # Copy with bit-reversed permutation
42 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
48 for i
in range(0, n
, size
):
50 for j
in range(i
, i
+ halfsize
):
51 # exact same actual computation, just embedded in
52 # triple-nested for-loops
53 jl
, jh
= j
, j
+halfsize
55 temp1
= vec
[jh
] * exptable
[k
]
57 vec
[jh
] = temp2
- temp1
58 vec
[jl
] = temp2
+ temp1
59 print ("xform jl jh k", jl
, jh
, k
,
60 "vj vjh ek", temp2
, vjh
, exptable
[k
],
61 "t1, t2", temp1
, temp2
,
62 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
69 def transform_radix2_complex(vec_r
, vec_i
, cos_r
, sin_i
, reverse
=False):
71 # FFT and convolution test (Python), based on Project Nayuki
73 # Copyright (c) 2020 Project Nayuki. (MIT License)
74 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
77 # bits of the integer 'val'.
78 def reverse_bits(val
, width
):
80 for _
in range(width
):
81 result
= (result
<< 1) |
(val
& 1)
87 levels
= n
.bit_length() - 1
89 # Copy with bit-reversed permutation
91 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
97 for i
in range(0, n
, size
):
99 for j
in range(i
, i
+ halfsize
):
100 # exact same actual computation, just embedded in
101 # triple-nested for-loops
102 jl
, jh
= j
, j
+halfsize
104 print ("xform jl jh k", jl
, jh
, k
,
105 "vr h l", vec_r
[jh
], vec_r
[jl
],
106 "vi h l", vec_i
[jh
], vec_i
[jl
])
107 print (" cr k", cos_r
[k
], "si k", sin_i
[k
])
108 mul1_r
= vec_r
[jh
] * cos_r
[k
]
109 mul2_r
= vec_i
[jh
] * sin_i
[k
]
110 tpre
= mul1_r
+ mul2_r
111 print (" vec_r[jh] * cos_r[k]", mul1_r
)
112 print (" vec_i[jh] * sin_i[k]", mul2_r
)
113 print (" tpre", tpre
)
114 mul1_i
= vec_r
[jh
] * sin_i
[k
]
115 mul2_i
= vec_i
[jh
] * cos_r
[k
]
116 tpim
= -mul1_i
+ mul2_i
117 print (" vec_r[jh] * sin_i[k]", mul1_i
)
118 print (" vec_i[jh] * cos_r[k]", mul2_i
)
119 print (" tpim", tpim
)
120 vec_r
[jh
] = vec_r
[jl
] - tpre
121 vec_i
[jh
] = vec_i
[jl
] - tpim
125 print (" xform jl jh k", jl
, jh
, k
,
126 "\n vr h l", vec_r
[jh
], vec_r
[jl
],
127 "\n vi h l", vec_i
[jh
], vec_i
[jl
])
134 class FFTTestCase(FHDLTestCase
):
136 def _check_regs(self
, sim
, expected
):
138 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
140 def test_sv_remap_fpmadds_fft_4(self
):
141 """>>> lst = ["svshape 2, 1, 1, 1, 0",
142 "svremap 31, 1, 0, 2, 0, 1, 0",
143 "sv.ffmadds. *2, *2, *2, *10"
145 this is a cheap (cheating) way to run a single "ffmadds." to
146 get at least Rc=1 on sv.ffmadds to be activated. the results
147 are not actually tested because there's no checking yet on
150 lst
= SVP64Asm( ["svshape 2, 1, 1, 1, 0",
151 "svremap 31, 1, 0, 2, 0, 1, 0",
152 "sv.ffmadds *0, *0, *0, *8"
156 # array and coefficients to test
157 av
= [7.0, -9.8 ] # array 0..1
158 coe
= [3.1] # coefficients
162 for i
, c
in enumerate(coe
):
163 fprs
[i
+8] = fp64toselectable(c
)
164 for i
, a
in enumerate(av
):
165 fprs
[i
+0] = fp64toselectable(a
)
167 with
Program(lst
, bigendian
=False) as program
:
168 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
169 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
170 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
171 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
172 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
173 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
174 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
175 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
177 # work out the results with the twin mul/add-sub
178 res
= transform_radix2(av
, coe
)
180 for i
, expected
in enumerate(res
):
181 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
182 for i
, expected
in enumerate(res
):
183 # convert to Power single
184 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
185 expected
= float(expected
)
186 actual
= float(sim
.fpr(i
))
187 # approximate error calculation, good enough test
188 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
189 # and the rounding is different
190 err
= abs(actual
- expected
) / expected
191 self
.assertTrue(err
< 1e-7)
193 def test_sv_remap_fpmadds_fft(self
):
194 """>>> lst = ["svshape 8, 1, 1, 1, 0",
195 "svremap 31, 1, 0, 2, 0, 1, 0",
196 "sv.ffmadds *2, *2, *2, *10"
198 runs a full in-place O(N log2 N) butterfly schedule for
199 Discrete Fourier Transform.
201 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
202 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
204 there is the *option* to target a different location (non-in-place)
207 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
208 (3 inputs, 2 outputs)
210 lst
= SVP64Asm( ["svshape 8, 1, 1, 1, 0",
211 "svremap 31, 1, 0, 2, 0, 1, 0",
212 "sv.ffmadds *0, *0, *0, *8"
216 # array and coefficients to test
217 av
= [7.0, -9.8, 3.0, -32.3,
218 -2.0, 5.0, -9.8, 31.3] # array 0..7
219 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
223 for i
, c
in enumerate(coe
):
224 fprs
[i
+8] = fp64toselectable(c
)
225 for i
, a
in enumerate(av
):
226 fprs
[i
+0] = fp64toselectable(a
)
228 with
Program(lst
, bigendian
=False) as program
:
229 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
230 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
231 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
232 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
233 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
234 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
235 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
236 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
238 # work out the results with the twin mul/add-sub
239 res
= transform_radix2(av
, coe
)
241 for i
, expected
in enumerate(res
):
242 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
243 for i
, expected
in enumerate(res
):
244 # convert to Power single
245 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
246 expected
= float(expected
)
247 actual
= float(sim
.fpr(i
))
248 # approximate error calculation, good enough test
249 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
250 # and the rounding is different
251 err
= abs(actual
- expected
) / expected
252 self
.assertTrue(err
< 1e-7)
254 def test_sv_remap_fpmadds_fft_svstep(self
):
255 """>>> lst = SVP64Asm( [
256 "svshape 8, 1, 1, 1, 1",
257 "svremap 31, 1, 0, 2, 0, 1, 0",
258 "sv.ffmadds *0, *0, *0, *8",
259 "setvl. 0, 0, 1, 1, 0, 0",
262 runs a full in-place O(N log2 N) butterfly schedule for
263 Discrete Fourier Transform. this version however uses
264 SVP64 "Vertical-First" Mode and so needs an explicit
267 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
268 (3 inputs, 2 outputs)
271 "svshape 8, 1, 1, 1, 1",
272 "svremap 31, 1, 0, 2, 0, 1, 0",
273 "sv.ffmadds *0, *0, *0, *8",
274 "setvl. 0, 0, 1, 1, 0, 0",
279 # array and coefficients to test
280 av
= [7.0, -9.8, 3.0, -32.3,
281 -2.0, 5.0, -9.8, 31.3] # array 0..7
282 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
286 for i
, c
in enumerate(coe
):
287 fprs
[i
+8] = fp64toselectable(c
)
288 for i
, a
in enumerate(av
):
289 fprs
[i
+0] = fp64toselectable(a
)
291 # set total. err don't know how to calculate how many there are...
292 # do it manually for now
298 tablestep
= n
// size
299 for i
in range(0, n
, size
):
300 for j
in range(i
, i
+ halfsize
):
304 # SVSTATE (calculated VL)
305 svstate
= SVP64State()
307 svstate
.maxvl
= VL
# MAXVL
308 print ("SVSTATE", bin(svstate
.asint()))
310 with
Program(lst
, bigendian
=False) as program
:
311 sim
= self
.run_tst_program(program
, svstate
=svstate
,
313 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
314 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
315 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
316 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
317 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
318 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
319 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
321 # work out the results with the twin mul/add-sub
322 res
= transform_radix2(av
, coe
)
324 for i
, expected
in enumerate(res
):
325 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
326 for i
, expected
in enumerate(res
):
327 # convert to Power single
328 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
329 expected
= float(expected
)
330 actual
= float(sim
.fpr(i
))
331 # approximate error calculation, good enough test
332 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
333 # and the rounding is different
334 err
= abs(actual
- expected
) / expected
335 self
.assertTrue(err
< 1e-7)
337 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self
):
338 """>>> lst = SVP64Asm( [
339 "svshape 8, 1, 1, 1, 1",
340 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
341 "svremap 5, 1, 0, 2, 0, 0, 1",
342 "sv.fmuls 24, *0, *8",
343 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
344 "svremap 26, 0, 0, 0, 0, 1, 1",
345 "sv.ffadds *0, 24, *0",
346 "setvl. 0, 0, 1, 1, 0, 0",
350 runs a full in-place O(N log2 N) butterfly schedule for
351 Discrete Fourier Transform. also uses "Vertical First"
352 but also uses temporary scalars and ffadds rather than
355 this represents an incremental step towards complex FFT
357 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
359 * single fmuls FRT, FRA, FRC
360 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
361 (FRS is implicit / hidden in ff* operations)
363 multiply: # sv.fmuls FRT, FRA, FRC
364 temp1 = vec[jh] * exptable[k]
366 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
367 vec[jh] = temp2 - temp1
368 vec[jl] = temp2 + temp1
370 also see notes in complex fft test: here svremap is done in
371 "non-persistent" mode (as a demo) whereas in the complex fft
372 svremap is used in "persistent" mode, where by a complete
373 coincidence the REMAP arguments all happen to line up and
374 only one persistent svremap is needed. the exact same trick
375 *could* be applied here but for illustrative purposes it is not.
378 "svshape 8, 1, 1, 1, 1",
379 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
380 "svremap 5, 1, 0, 2, 0, 0, 0",
381 "sv.fmuls 24, *0, *8",
382 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
383 "svremap 26, 0, 0, 0, 0, 1, 0",
384 "sv.ffadds *0, 24, *0",
385 "setvl. 0, 0, 1, 1, 0, 0",
390 # array and coefficients to test
391 av
= [7.0, -9.8, 3.0, -32.3,
392 -2.0, 5.0, -9.8, 31.3] # array 0..7
393 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
397 for i
, c
in enumerate(coe
):
398 fprs
[i
+8] = fp64toselectable(c
)
399 for i
, a
in enumerate(av
):
400 fprs
[i
+0] = fp64toselectable(a
)
402 # set total. err don't know how to calculate how many there are...
403 # do it manually for now
409 tablestep
= n
// size
410 for i
in range(0, n
, size
):
411 for j
in range(i
, i
+ halfsize
):
415 # SVSTATE (calculated VL)
416 svstate
= SVP64State()
418 svstate
.maxvl
= VL
# MAXVL
419 print ("SVSTATE", bin(svstate
.asint()))
421 with
Program(lst
, bigendian
=False) as program
:
422 sim
= self
.run_tst_program(program
, svstate
=svstate
,
424 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
425 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
426 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
427 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
428 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
429 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
430 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
432 # work out the results with the twin mul/add-sub
433 res
= transform_radix2(av
, coe
)
435 for i
, expected
in enumerate(res
):
436 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
437 for i
, expected
in enumerate(res
):
438 # convert to Power single
439 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
440 expected
= float(expected
)
441 actual
= float(sim
.fpr(i
))
442 # approximate error calculation, good enough test
443 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
444 # and the rounding is different
445 err
= abs(actual
- expected
) / expected
446 self
.assertTrue(err
< 1e-7)
448 def test_sv_fpmadds_fft(self
):
449 """>>> lst = ["sv.ffmadds *2, *2, *2, *10"
451 four in-place vector mul-adds, four in-place vector mul-subs
453 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
454 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
456 there is the *option* to target a different location (non-in-place)
459 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
460 FRS to perform the two multiplies. one add, one subtract.
462 sv.ffmadds FRT, FRA, FRC, FRB actually does:
463 fmadds FRT , FRA, FRC, FRA
464 fnmsubs FRT+vl, FRA, FRC, FRB+vl
467 lst
= SVP64Asm(["sv.ffmadds *2, *2, *2, *10"
472 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
473 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
474 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
476 # work out the results with the twin mul/add-sub
477 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
478 fprs
[i
+2] = fp64toselectable(a
)
479 fprs
[i
+6] = fp64toselectable(b
)
480 fprs
[i
+10] = fp64toselectable(c
)
484 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
485 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
487 print ("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
489 # SVSTATE (in this case, VL=2)
490 svstate
= SVP64State()
492 svstate
.maxvl
= 4 # MAXVL
493 print ("SVSTATE", bin(svstate
.asint()))
495 with
Program(lst
, bigendian
=False) as program
:
496 sim
= self
.run_tst_program(program
, svstate
=svstate
,
498 # confirm that the results are as expected
499 for i
, (t
, u
) in enumerate(res
):
500 self
.assertEqual(sim
.fpr(i
+2), t
)
501 self
.assertEqual(sim
.fpr(i
+6), u
)
503 def test_sv_ffadds_fft(self
):
504 """>>> lst = ["sv.ffadds *2, *2, *2"
506 four in-place vector adds, four in-place vector subs
508 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
509 FRS to perform the two multiplies. one add, one subtract.
511 sv.ffadds FRT, FRA, FRB actually does:
513 fsubs FRT+vl, FRA, FRB+vl
515 lst
= SVP64Asm(["sv.ffadds *2, *2, *2"
520 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
521 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
523 # work out the results with the twin add-sub
524 for i
, (a
, b
) in enumerate(zip(av
, bv
)):
525 fprs
[i
+2] = fp64toselectable(a
)
526 fprs
[i
+6] = fp64toselectable(b
)
529 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
530 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
532 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
534 # SVSTATE (in this case, VL=2)
535 svstate
= SVP64State()
537 svstate
.maxvl
= 4 # MAXVL
538 print ("SVSTATE", bin(svstate
.asint()))
540 with
Program(lst
, bigendian
=False) as program
:
541 sim
= self
.run_tst_program(program
, svstate
=svstate
,
543 # confirm that the results are as expected
544 for i
, (t
, u
) in enumerate(res
):
545 a
= float(sim
.fpr(i
+2))
546 b
= float(sim
.fpr(i
+6))
549 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
550 for i
, (t
, u
) in enumerate(res
):
551 self
.assertEqual(sim
.fpr(i
+2), t
)
552 self
.assertEqual(sim
.fpr(i
+6), u
)
554 def test_sv_remap_fpmadds_fft_svstep_complex(self
):
556 runs a full in-place O(N log2 N) butterfly schedule for
557 Discrete Fourier Transform. this version however uses
558 SVP64 "Vertical-First" Mode and so needs an explicit
561 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
562 (3 inputs, 2 outputs)
564 complex calculation (FFT):
566 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
567 vec_r[jh] = vec_r[jl] - tpre
570 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
571 vec_i[jh] = vec_i[jl] - tpim
574 real-only calculation (DFT):
576 temp1 = vec[jh] * exptable[k]
578 vec[jh] = temp2 - temp1
579 vec[jl] = temp2 + temp1
581 note: a rather nice convenience / coincidence. the meaning of
582 these two instructions is:
583 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
584 "svremap 5, 1, 0, 2, 0, 0, 1",
585 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
586 "svremap 26, 0, 0, 0, 0, 1, 1",
588 however it turns out that they can be *merged*, and for
589 the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
590 *ignore* their REMAPs (by definition, because you can't REMAP
591 scalar operands), and for the second one (sv.ffads) exactly the
592 right REMAPs are also ignored!
594 therefore we can merge:
595 "svremap 5, 1, 0, 2, 0, 0, 1",
596 "svremap 26, 0, 0, 0, 0, 1, 1",
598 "svremap 31, 1, 0, 2, 0, 1, 1",
599 and save one instruction.
602 # set triple butterfly mode with persistent "REMAP"
603 "svshape 8, 1, 1, 1, 1",
604 "svremap 31, 1, 0, 2, 0, 1, 1",
606 "sv.fmuls 24, *0, *16", # mul1_r = r*cos_r
607 "sv.fmadds 24, *8, *20, 24", # mul2_r = i*sin_i
608 # tpre = mul1_r + mul2_r
610 "sv.fmuls 26, *0, *20", # mul1_i = r*sin_i
611 "sv.fmsubs 26, *8, *16, 26", # mul2_i = i*cos_r
612 # tpim = mul2_i - mul1_i
614 "sv.ffadds *0, 24, *0", # vh/vl +/- tpre
616 "sv.ffadds *8, 26, *8", # vh/vl +- tpim
619 "setvl. 0, 0, 1, 1, 0, 0",
624 # array and coefficients to test
625 ar
= [7.0, -9.8, 3.0, -32.3,
626 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
627 ai
= [1.0, -1.8, 3.0, 19.3,
628 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
629 coer
= [-0.25, 0.5, 3.1, 6.2] # coefficients real
630 coei
= [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
634 for i
, a
in enumerate(ar
):
635 fprs
[i
+0] = fp64toselectable(a
)
636 for i
, a
in enumerate(ai
):
637 fprs
[i
+8] = fp64toselectable(a
)
638 for i
, cr
in enumerate(coer
):
639 fprs
[i
+16] = fp64toselectable(cr
)
640 for i
, ci
in enumerate(coei
):
641 fprs
[i
+20] = fp64toselectable(ci
)
643 # set total. err don't know how to calculate how many there are...
644 # do it manually for now
650 tablestep
= n
// size
651 for i
in range(0, n
, size
):
652 for j
in range(i
, i
+ halfsize
):
656 # SVSTATE (calculated VL)
657 svstate
= SVP64State()
659 svstate
.maxvl
= VL
# MAXVL
660 print ("SVSTATE", bin(svstate
.asint()))
662 with
Program(lst
, bigendian
=False) as program
:
663 sim
= self
.run_tst_program(program
, svstate
=svstate
,
665 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
666 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
667 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
668 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
669 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
670 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
671 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
673 # work out the results with the twin mul/add-sub, explicit
675 res_r
, res_i
= transform_radix2_complex(ar
, ai
, coer
, coei
)
677 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
678 print ("i", i
, float(sim
.fpr(i
)), float(sim
.fpr(i
+8)),
679 "expected_r", expected_r
,
680 "expected_i", expected_i
)
681 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
682 # convert to Power single
683 expected_r
= fph
.DOUBLE2SINGLE(fp64toselectable(expected_r
))
684 expected_r
= float(expected_r
)
685 actual_r
= float(sim
.fpr(i
))
686 # approximate error calculation, good enough test
687 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
688 # and the rounding is different
689 err
= abs(actual_r
- expected_r
) / expected_r
690 self
.assertTrue(err
< 1e-6)
691 # convert to Power single
692 expected_i
= fph
.DOUBLE2SINGLE(fp64toselectable(expected_i
))
693 expected_i
= float(expected_i
)
694 actual_i
= float(sim
.fpr(i
+8))
695 # approximate error calculation, good enough test
696 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
697 # and the rounding is different
698 err
= abs(actual_i
- expected_i
) / expected_i
699 self
.assertTrue(err
< 1e-6)
701 def test_sv_ffadds_fft_scalar(self
):
702 """>>> lst = ["sv.ffadds *2, 12, 13"
704 four in-place vector adds and subs, but done with a scalar
707 lst
= SVP64Asm(["sv.ffadds *2, 12, 13"
714 fprs
[12] = fp64toselectable(scalar_a
)
715 fprs
[13] = fp64toselectable(scalar_b
)
717 # work out the results with the twin add-sub
719 t
= scalar_b
+ scalar_a
720 u
= scalar_b
- scalar_a
721 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
722 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
724 print ("FFT", i
, "res", t
, u
)
726 # SVSTATE (in this case, VL=2)
727 svstate
= SVP64State()
729 svstate
.maxvl
= 4 # MAXVL
730 print ("SVSTATE", bin(svstate
.asint()))
732 with
Program(lst
, bigendian
=False) as program
:
733 sim
= self
.run_tst_program(program
, svstate
=svstate
,
735 # confirm that the results are as expected
736 for i
, (t
, u
) in enumerate(res
):
737 a
= float(sim
.fpr(i
+2))
738 b
= float(sim
.fpr(i
+6))
741 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
742 for i
, (t
, u
) in enumerate(res
):
743 self
.assertEqual(sim
.fpr(i
+2), t
)
744 self
.assertEqual(sim
.fpr(i
+6), u
)
746 def test_sv_remap_fpmadds_fft_ldst(self
):
747 """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
748 "sv.lfs/els *0, 4(0)",
749 "svshape 8, 1, 1, 1, 0",
750 "svremap 31, 1, 0, 2, 0, 1, 0",
751 "sv.ffmadds *0, *0, *0, *8"
753 runs a full in-place O(N log2 N) butterfly schedule for
754 Discrete Fourier Transform, using bit-reversed LD/ST
756 lst
= SVP64Asm( ["svshape 8, 1, 1, 15, 0",
757 "svremap 1, 0, 0, 0, 0, 0, 0",
758 "sv.lfs/els *0, 4(0)",
759 "svshape 8, 1, 1, 1, 0",
760 "svremap 31, 1, 0, 2, 0, 1, 0",
761 "sv.ffmadds *0, *0, *0, *8"
765 # array and coefficients to test
766 av
= [7.0, -9.8, 3.0, -32.3,
767 -2.0, 5.0, -9.8, 31.3] # array 0..7
768 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
772 for i
, c
in enumerate(coe
):
773 fprs
[i
+8] = fp64toselectable(c
)
777 for i
, a
in enumerate(av
):
778 a
= SINGLE(fp64toselectable(a
)).value
783 mem
[(i
//2)*8] = val |
(a
<< 32)
785 with
Program(lst
, bigendian
=False) as program
:
786 sim
= self
.run_tst_program(program
, initial_mem
=mem
,
788 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
789 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
790 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
791 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
792 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
793 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
794 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
797 print (sim
.mem
.dump())
799 # work out the results with the twin mul/add-sub,
800 # note bit-reverse mode requested
801 res
= transform_radix2(av
, coe
, reverse
=True)
803 for i
, expected
in enumerate(res
):
804 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
805 for i
, expected
in enumerate(res
):
806 # convert to Power single
807 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
808 expected
= float(expected
)
809 actual
= float(sim
.fpr(i
))
810 # approximate error calculation, good enough test
811 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
812 # and the rounding is different
813 err
= abs(actual
- expected
) / expected
814 self
.assertTrue(err
< 1e-6)
816 def run_tst_program(self
, prog
, initial_regs
=None,
820 if initial_regs
is None:
821 initial_regs
= [0] * 32
822 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
823 initial_fprs
=initial_fprs
,
834 if __name__
== "__main__":