1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.power_decoder
import (create_pdecode
)
6 from openpower
.simulator
.program
import Program
7 from openpower
.decoder
.isa
.caller
import SVP64State
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from copy
import deepcopy
12 from openpower
.decoder
.helpers
import fp64toselectable
, SINGLE
13 from openpower
.decoder
.isafunctions
.double2single
import DOUBLE2SINGLE
16 def transform_radix2(vec
, exptable
, reverse
=False):
18 # FFT and convolution test (Python), based on Project Nayuki
20 # Copyright (c) 2020 Project Nayuki. (MIT License)
21 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
24 # bits of the integer 'val'.
25 def reverse_bits(val
, width
):
27 for _
in range(width
):
28 result
= (result
<< 1) |
(val
& 1)
34 levels
= n
.bit_length() - 1
36 # Copy with bit-reversed permutation
38 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
44 for i
in range(0, n
, size
):
46 for j
in range(i
, i
+ halfsize
):
47 # exact same actual computation, just embedded in
48 # triple-nested for-loops
49 jl
, jh
= j
, j
+halfsize
51 temp1
= vec
[jh
] * exptable
[k
]
53 vec
[jh
] = temp2
- temp1
54 vec
[jl
] = temp2
+ temp1
55 print ("xform jl jh k", jl
, jh
, k
,
56 "vj vjh ek", temp2
, vjh
, exptable
[k
],
57 "t1, t2", temp1
, temp2
,
58 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
65 def transform_radix2_complex(vec_r
, vec_i
, cos_r
, sin_i
, reverse
=False):
67 # FFT and convolution test (Python), based on Project Nayuki
69 # Copyright (c) 2020 Project Nayuki. (MIT License)
70 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
73 # bits of the integer 'val'.
74 def reverse_bits(val
, width
):
76 for _
in range(width
):
77 result
= (result
<< 1) |
(val
& 1)
83 levels
= n
.bit_length() - 1
85 # Copy with bit-reversed permutation
87 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
93 for i
in range(0, n
, size
):
95 for j
in range(i
, i
+ halfsize
):
96 # exact same actual computation, just embedded in
97 # triple-nested for-loops
98 jl
, jh
= j
, j
+halfsize
100 print ("xform jl jh k", jl
, jh
, k
,
101 "vr h l", vec_r
[jh
], vec_r
[jl
],
102 "vi h l", vec_i
[jh
], vec_i
[jl
])
103 print (" cr k", cos_r
[k
], "si k", sin_i
[k
])
104 mul1_r
= vec_r
[jh
] * cos_r
[k
]
105 mul2_r
= vec_i
[jh
] * sin_i
[k
]
106 tpre
= mul1_r
+ mul2_r
107 print (" vec_r[jh] * cos_r[k]", mul1_r
)
108 print (" vec_i[jh] * sin_i[k]", mul2_r
)
109 print (" tpre", tpre
)
110 mul1_i
= vec_r
[jh
] * sin_i
[k
]
111 mul2_i
= vec_i
[jh
] * cos_r
[k
]
112 tpim
= -mul1_i
+ mul2_i
113 print (" vec_r[jh] * sin_i[k]", mul1_i
)
114 print (" vec_i[jh] * cos_r[k]", mul2_i
)
115 print (" tpim", tpim
)
116 vec_r
[jh
] = vec_r
[jl
] - tpre
117 vec_i
[jh
] = vec_i
[jl
] - tpim
121 print (" xform jl jh k", jl
, jh
, k
,
122 "\n vr h l", vec_r
[jh
], vec_r
[jl
],
123 "\n vi h l", vec_i
[jh
], vec_i
[jl
])
130 class FFTTestCase(FHDLTestCase
):
132 def _check_regs(self
, sim
, expected
):
134 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
136 def test_sv_remap_fpmadds_fft(self
):
137 """>>> lst = ["svshape 8, 1, 1, 1, 0",
138 "svremap 31, 1, 0, 2, 0, 1, 0",
139 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
141 runs a full in-place O(N log2 N) butterfly schedule for
142 Discrete Fourier Transform.
144 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
145 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
147 there is the *option* to target a different location (non-in-place)
150 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
151 (3 inputs, 2 outputs)
153 lst
= SVP64Asm( ["svshape 8, 1, 1, 1, 0",
154 "svremap 31, 1, 0, 2, 0, 1, 0",
155 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
159 # array and coefficients to test
160 av
= [7.0, -9.8, 3.0, -32.3,
161 -2.0, 5.0, -9.8, 31.3] # array 0..7
162 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
166 for i
, c
in enumerate(coe
):
167 fprs
[i
+8] = fp64toselectable(c
)
168 for i
, a
in enumerate(av
):
169 fprs
[i
+0] = fp64toselectable(a
)
171 with
Program(lst
, bigendian
=False) as program
:
172 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
173 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
174 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
175 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
176 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
177 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
178 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
179 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
181 # work out the results with the twin mul/add-sub
182 res
= transform_radix2(av
, coe
)
184 for i
, expected
in enumerate(res
):
185 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
186 for i
, expected
in enumerate(res
):
187 # convert to Power single
188 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
189 expected
= float(expected
)
190 actual
= float(sim
.fpr(i
))
191 # approximate error calculation, good enough test
192 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
193 # and the rounding is different
194 err
= abs(actual
- expected
) / expected
195 self
.assertTrue(err
< 1e-7)
197 def test_sv_remap_fpmadds_fft_svstep(self
):
198 """>>> lst = SVP64Asm( [
199 "svshape 8, 1, 1, 1, 1",
200 "svremap 31, 1, 0, 2, 0, 1, 0",
201 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
202 "setvl. 0, 0, 1, 1, 0, 0",
205 runs a full in-place O(N log2 N) butterfly schedule for
206 Discrete Fourier Transform. this version however uses
207 SVP64 "Vertical-First" Mode and so needs an explicit
210 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
211 (3 inputs, 2 outputs)
214 "svshape 8, 1, 1, 1, 1",
215 "svremap 31, 1, 0, 2, 0, 1, 0",
216 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
217 "setvl. 0, 0, 1, 1, 0, 0",
222 # array and coefficients to test
223 av
= [7.0, -9.8, 3.0, -32.3,
224 -2.0, 5.0, -9.8, 31.3] # array 0..7
225 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
229 for i
, c
in enumerate(coe
):
230 fprs
[i
+8] = fp64toselectable(c
)
231 for i
, a
in enumerate(av
):
232 fprs
[i
+0] = fp64toselectable(a
)
234 # set total. err don't know how to calculate how many there are...
235 # do it manually for now
241 tablestep
= n
// size
242 for i
in range(0, n
, size
):
243 for j
in range(i
, i
+ halfsize
):
247 # SVSTATE (calculated VL)
248 svstate
= SVP64State()
250 svstate
.maxvl
= VL
# MAXVL
251 print ("SVSTATE", bin(svstate
.asint()))
253 with
Program(lst
, bigendian
=False) as program
:
254 sim
= self
.run_tst_program(program
, svstate
=svstate
,
256 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
257 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
258 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
259 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
260 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
261 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
262 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
264 # work out the results with the twin mul/add-sub
265 res
= transform_radix2(av
, coe
)
267 for i
, expected
in enumerate(res
):
268 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
269 for i
, expected
in enumerate(res
):
270 # convert to Power single
271 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
272 expected
= float(expected
)
273 actual
= float(sim
.fpr(i
))
274 # approximate error calculation, good enough test
275 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
276 # and the rounding is different
277 err
= abs(actual
- expected
) / expected
278 self
.assertTrue(err
< 1e-7)
280 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self
):
281 """>>> lst = SVP64Asm( [
282 "svshape 8, 1, 1, 1, 1",
283 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
284 "svremap 5, 1, 0, 2, 0, 0, 1",
285 "sv.fmuls 24, 0.v, 8.v",
286 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
287 "svremap 26, 0, 0, 0, 0, 1, 1",
288 "sv.ffadds 0.v, 24, 0.v",
289 "setvl. 0, 0, 1, 1, 0, 0",
293 runs a full in-place O(N log2 N) butterfly schedule for
294 Discrete Fourier Transform. also uses "Vertical First"
295 but also uses temporary scalars and ffadds rather than
298 this represents an incremental step towards complex FFT
300 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
302 * single fmuls FRT, FRA, FRC
303 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
304 (FRS is implicit / hidden in ff* operations)
306 multiply: # sv.fmuls FRT, FRA, FRC
307 temp1 = vec[jh] * exptable[k]
309 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
310 vec[jh] = temp2 - temp1
311 vec[jl] = temp2 + temp1
313 also see notes in complex fft test: here svremap is done in
314 "non-persistent" mode (as a demo) whereas in the complex fft
315 svremap is used in "persistent" mode, where by a complete
316 coincidence the REMAP arguments all happen to line up and
317 only one persistent svremap is needed. the exact same trick
318 *could* be applied here but for illustrative purposes it is not.
321 "svshape 8, 1, 1, 1, 1",
322 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
323 "svremap 5, 1, 0, 2, 0, 0, 0",
324 "sv.fmuls 24, 0.v, 8.v",
325 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
326 "svremap 26, 0, 0, 0, 0, 1, 0",
327 "sv.ffadds 0.v, 24, 0.v",
328 "setvl. 0, 0, 1, 1, 0, 0",
333 # array and coefficients to test
334 av
= [7.0, -9.8, 3.0, -32.3,
335 -2.0, 5.0, -9.8, 31.3] # array 0..7
336 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
340 for i
, c
in enumerate(coe
):
341 fprs
[i
+8] = fp64toselectable(c
)
342 for i
, a
in enumerate(av
):
343 fprs
[i
+0] = fp64toselectable(a
)
345 # set total. err don't know how to calculate how many there are...
346 # do it manually for now
352 tablestep
= n
// size
353 for i
in range(0, n
, size
):
354 for j
in range(i
, i
+ halfsize
):
358 # SVSTATE (calculated VL)
359 svstate
= SVP64State()
361 svstate
.maxvl
= VL
# MAXVL
362 print ("SVSTATE", bin(svstate
.asint()))
364 with
Program(lst
, bigendian
=False) as program
:
365 sim
= self
.run_tst_program(program
, svstate
=svstate
,
367 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
368 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
369 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
370 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
371 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
372 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
373 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
375 # work out the results with the twin mul/add-sub
376 res
= transform_radix2(av
, coe
)
378 for i
, expected
in enumerate(res
):
379 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
380 for i
, expected
in enumerate(res
):
381 # convert to Power single
382 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
383 expected
= float(expected
)
384 actual
= float(sim
.fpr(i
))
385 # approximate error calculation, good enough test
386 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
387 # and the rounding is different
388 err
= abs(actual
- expected
) / expected
389 self
.assertTrue(err
< 1e-7)
391 def test_sv_fpmadds_fft(self
):
392 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
394 four in-place vector mul-adds, four in-place vector mul-subs
396 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
397 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
399 there is the *option* to target a different location (non-in-place)
402 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
403 FRS to perform the two multiplies. one add, one subtract.
405 sv.ffmadds FRT, FRA, FRC, FRB actually does:
406 fmadds FRT , FRA, FRC, FRA
407 fnmsubs FRT+vl, FRA, FRC, FRB+vl
410 lst
= SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
415 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
416 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
417 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
419 # work out the results with the twin mul/add-sub
420 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
421 fprs
[i
+2] = fp64toselectable(a
)
422 fprs
[i
+6] = fp64toselectable(b
)
423 fprs
[i
+10] = fp64toselectable(c
)
427 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
428 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
430 print ("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
432 # SVSTATE (in this case, VL=2)
433 svstate
= SVP64State()
435 svstate
.maxvl
= 4 # MAXVL
436 print ("SVSTATE", bin(svstate
.asint()))
438 with
Program(lst
, bigendian
=False) as program
:
439 sim
= self
.run_tst_program(program
, svstate
=svstate
,
441 # confirm that the results are as expected
442 for i
, (t
, u
) in enumerate(res
):
443 self
.assertEqual(sim
.fpr(i
+2), t
)
444 self
.assertEqual(sim
.fpr(i
+6), u
)
446 def test_sv_ffadds_fft(self
):
447 """>>> lst = ["sv.ffadds 2.v, 2.v, 2.v"
449 four in-place vector adds, four in-place vector subs
451 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
452 FRS to perform the two multiplies. one add, one subtract.
454 sv.ffadds FRT, FRA, FRB actually does:
456 fsubs FRT+vl, FRA, FRB+vl
458 lst
= SVP64Asm(["sv.ffadds 2.v, 2.v, 2.v"
463 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
464 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
466 # work out the results with the twin add-sub
467 for i
, (a
, b
) in enumerate(zip(av
, bv
)):
468 fprs
[i
+2] = fp64toselectable(a
)
469 fprs
[i
+6] = fp64toselectable(b
)
472 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
473 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
475 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
477 # SVSTATE (in this case, VL=2)
478 svstate
= SVP64State()
480 svstate
.maxvl
= 4 # MAXVL
481 print ("SVSTATE", bin(svstate
.asint()))
483 with
Program(lst
, bigendian
=False) as program
:
484 sim
= self
.run_tst_program(program
, svstate
=svstate
,
486 # confirm that the results are as expected
487 for i
, (t
, u
) in enumerate(res
):
488 a
= float(sim
.fpr(i
+2))
489 b
= float(sim
.fpr(i
+6))
492 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
493 for i
, (t
, u
) in enumerate(res
):
494 self
.assertEqual(sim
.fpr(i
+2), t
)
495 self
.assertEqual(sim
.fpr(i
+6), u
)
497 def test_sv_remap_fpmadds_fft_svstep_complex(self
):
499 runs a full in-place O(N log2 N) butterfly schedule for
500 Discrete Fourier Transform. this version however uses
501 SVP64 "Vertical-First" Mode and so needs an explicit
504 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
505 (3 inputs, 2 outputs)
507 complex calculation (FFT):
509 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
510 vec_r[jh] = vec_r[jl] - tpre
513 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
514 vec_i[jh] = vec_i[jl] - tpim
517 real-only calculation (DFT):
519 temp1 = vec[jh] * exptable[k]
521 vec[jh] = temp2 - temp1
522 vec[jl] = temp2 + temp1
524 note: a rather nice convenience / coincidence. the meaning of
525 these two instructions is:
526 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
527 "svremap 5, 1, 0, 2, 0, 0, 1",
528 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
529 "svremap 26, 0, 0, 0, 0, 1, 1",
531 however it turns out that they can be *merged*, and for
532 the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
533 *ignore* their REMAPs (by definition, because you can't REMAP
534 scalar operands), and for the second one (sv.ffads) exactly the
535 right REMAPs are also ignored!
537 therefore we can merge:
538 "svremap 5, 1, 0, 2, 0, 0, 1",
539 "svremap 26, 0, 0, 0, 0, 1, 1",
541 "svremap 31, 1, 0, 2, 0, 1, 1",
542 and save one instruction.
545 # set triple butterfly mode with persistent "REMAP"
546 "svshape 8, 1, 1, 1, 1",
547 "svremap 31, 1, 0, 2, 0, 1, 1",
549 "sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r
550 "sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i
551 # tpre = mul1_r + mul2_r
553 "sv.fmuls 26, 0.v, 20.v", # mul1_i = r*sin_i
554 "sv.fmsubs 26, 8.v, 16.v, 26", # mul2_i = i*cos_r
555 # tpim = mul2_i - mul1_i
557 "sv.ffadds 0.v, 24, 0.v", # vh/vl +/- tpre
559 "sv.ffadds 8.v, 26, 8.v", # vh/vl +- tpim
562 "setvl. 0, 0, 1, 1, 0, 0",
567 # array and coefficients to test
568 ar
= [7.0, -9.8, 3.0, -32.3,
569 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
570 ai
= [1.0, -1.8, 3.0, 19.3,
571 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
572 coer
= [-0.25, 0.5, 3.1, 6.2] # coefficients real
573 coei
= [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
577 for i
, a
in enumerate(ar
):
578 fprs
[i
+0] = fp64toselectable(a
)
579 for i
, a
in enumerate(ai
):
580 fprs
[i
+8] = fp64toselectable(a
)
581 for i
, cr
in enumerate(coer
):
582 fprs
[i
+16] = fp64toselectable(cr
)
583 for i
, ci
in enumerate(coei
):
584 fprs
[i
+20] = fp64toselectable(ci
)
586 # set total. err don't know how to calculate how many there are...
587 # do it manually for now
593 tablestep
= n
// size
594 for i
in range(0, n
, size
):
595 for j
in range(i
, i
+ halfsize
):
599 # SVSTATE (calculated VL)
600 svstate
= SVP64State()
602 svstate
.maxvl
= VL
# MAXVL
603 print ("SVSTATE", bin(svstate
.asint()))
605 with
Program(lst
, bigendian
=False) as program
:
606 sim
= self
.run_tst_program(program
, svstate
=svstate
,
608 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
609 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
610 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
611 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
612 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
613 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
614 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
616 # work out the results with the twin mul/add-sub, explicit
618 res_r
, res_i
= transform_radix2_complex(ar
, ai
, coer
, coei
)
620 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
621 print ("i", i
, float(sim
.fpr(i
)), float(sim
.fpr(i
+8)),
622 "expected_r", expected_r
,
623 "expected_i", expected_i
)
624 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
625 # convert to Power single
626 expected_r
= DOUBLE2SINGLE(fp64toselectable(expected_r
))
627 expected_r
= float(expected_r
)
628 actual_r
= float(sim
.fpr(i
))
629 # approximate error calculation, good enough test
630 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
631 # and the rounding is different
632 err
= abs(actual_r
- expected_r
) / expected_r
633 self
.assertTrue(err
< 1e-6)
634 # convert to Power single
635 expected_i
= DOUBLE2SINGLE(fp64toselectable(expected_i
))
636 expected_i
= float(expected_i
)
637 actual_i
= float(sim
.fpr(i
+8))
638 # approximate error calculation, good enough test
639 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
640 # and the rounding is different
641 err
= abs(actual_i
- expected_i
) / expected_i
642 self
.assertTrue(err
< 1e-6)
644 def test_sv_ffadds_fft_scalar(self
):
645 """>>> lst = ["sv.ffadds 2.v, 12, 13"
647 four in-place vector adds and subs, but done with a scalar
650 lst
= SVP64Asm(["sv.ffadds 2.v, 12, 13"
657 fprs
[12] = fp64toselectable(scalar_a
)
658 fprs
[13] = fp64toselectable(scalar_b
)
660 # work out the results with the twin add-sub
662 t
= scalar_b
+ scalar_a
663 u
= scalar_b
- scalar_a
664 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
665 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
667 print ("FFT", i
, "res", t
, u
)
669 # SVSTATE (in this case, VL=2)
670 svstate
= SVP64State()
672 svstate
.maxvl
= 4 # MAXVL
673 print ("SVSTATE", bin(svstate
.asint()))
675 with
Program(lst
, bigendian
=False) as program
:
676 sim
= self
.run_tst_program(program
, svstate
=svstate
,
678 # confirm that the results are as expected
679 for i
, (t
, u
) in enumerate(res
):
680 a
= float(sim
.fpr(i
+2))
681 b
= float(sim
.fpr(i
+6))
684 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
685 for i
, (t
, u
) in enumerate(res
):
686 self
.assertEqual(sim
.fpr(i
+2), t
)
687 self
.assertEqual(sim
.fpr(i
+6), u
)
689 def test_sv_remap_fpmadds_fft_ldst(self
):
690 """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
691 "sv.lfssh 0.v, 4(0), 20", # bit-reversed
692 "svshape 8, 1, 1, 1, 0",
693 "svremap 31, 1, 0, 2, 0, 1, 0",
694 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
696 runs a full in-place O(N log2 N) butterfly schedule for
697 Discrete Fourier Transform, using bit-reversed LD/ST
699 lst
= SVP64Asm( ["svshape 8, 1, 1, 15, 0",
700 "svremap 1, 0, 0, 0, 0, 0, 0, 0",
701 "sv.lfssh 0.v, 4(0), 20", # shifted
702 "svshape 8, 1, 1, 1, 0",
703 "svremap 31, 1, 0, 2, 0, 1, 0",
704 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
708 # array and coefficients to test
709 av
= [7.0, -9.8, 3.0, -32.3,
710 -2.0, 5.0, -9.8, 31.3] # array 0..7
711 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
715 for i
, c
in enumerate(coe
):
716 fprs
[i
+8] = fp64toselectable(c
)
720 for i
, a
in enumerate(av
):
721 a
= SINGLE(fp64toselectable(a
)).value
726 mem
[(i
//2)*8] = val |
(a
<< 32)
728 with
Program(lst
, bigendian
=False) as program
:
729 sim
= self
.run_tst_program(program
, initial_mem
=mem
,
731 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
732 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
733 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
734 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
735 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
736 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
737 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
740 print (sim
.mem
.dump())
742 # work out the results with the twin mul/add-sub,
743 # note bit-reverse mode requested
744 res
= transform_radix2(av
, coe
, reverse
=True)
746 for i
, expected
in enumerate(res
):
747 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
748 for i
, expected
in enumerate(res
):
749 # convert to Power single
750 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
751 expected
= float(expected
)
752 actual
= float(sim
.fpr(i
))
753 # approximate error calculation, good enough test
754 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
755 # and the rounding is different
756 err
= abs(actual
- expected
) / expected
757 self
.assertTrue(err
< 1e-6)
759 def run_tst_program(self
, prog
, initial_regs
=None,
763 if initial_regs
is None:
764 initial_regs
= [0] * 32
765 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
766 initial_fprs
=initial_fprs
,
777 if __name__
== "__main__":