1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.power_decoder
import (create_pdecode
)
6 from openpower
.simulator
.program
import Program
7 from openpower
.decoder
.isa
.caller
import SVP64State
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from copy
import deepcopy
12 from openpower
.decoder
.helpers
import fp64toselectable
, SINGLE
13 from openpower
.decoder
.isafunctions
.double2single
import DOUBLE2SINGLE
16 def transform_radix2(vec
, exptable
, reverse
=False):
18 # FFT and convolution test (Python), based on Project Nayuki
20 # Copyright (c) 2020 Project Nayuki. (MIT License)
21 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
24 # bits of the integer 'val'.
25 def reverse_bits(val
, width
):
27 for _
in range(width
):
28 result
= (result
<< 1) |
(val
& 1)
34 levels
= n
.bit_length() - 1
36 # Copy with bit-reversed permutation
38 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
44 for i
in range(0, n
, size
):
46 for j
in range(i
, i
+ halfsize
):
47 # exact same actual computation, just embedded in
48 # triple-nested for-loops
49 jl
, jh
= j
, j
+halfsize
51 temp1
= vec
[jh
] * exptable
[k
]
53 vec
[jh
] = temp2
- temp1
54 vec
[jl
] = temp2
+ temp1
55 print ("xform jl jh k", jl
, jh
, k
,
56 "vj vjh ek", temp2
, vjh
, exptable
[k
],
57 "t1, t2", temp1
, temp2
,
58 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
65 def transform_radix2_complex(vec_r
, vec_i
, cos_r
, sin_i
, reverse
=False):
67 # FFT and convolution test (Python), based on Project Nayuki
69 # Copyright (c) 2020 Project Nayuki. (MIT License)
70 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
73 # bits of the integer 'val'.
74 def reverse_bits(val
, width
):
76 for _
in range(width
):
77 result
= (result
<< 1) |
(val
& 1)
83 levels
= n
.bit_length() - 1
85 # Copy with bit-reversed permutation
87 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
93 for i
in range(0, n
, size
):
95 for j
in range(i
, i
+ halfsize
):
96 # exact same actual computation, just embedded in
97 # triple-nested for-loops
98 jl
, jh
= j
, j
+halfsize
100 print ("xform jl jh k", jl
, jh
, k
,
101 "vr h l", vec_r
[jh
], vec_r
[jl
],
102 "vi h l", vec_i
[jh
], vec_i
[jl
])
103 print (" cr k", cos_r
[k
], "si k", sin_i
[k
])
104 mul1_r
= vec_r
[jh
] * cos_r
[k
]
105 mul2_r
= vec_i
[jh
] * sin_i
[k
]
106 tpre
= mul1_r
+ mul2_r
107 print (" vec_r[jh] * cos_r[k]", mul1_r
)
108 print (" vec_i[jh] * sin_i[k]", mul2_r
)
109 print (" tpre", tpre
)
110 mul1_i
= vec_r
[jh
] * sin_i
[k
]
111 mul2_i
= vec_i
[jh
] * cos_r
[k
]
112 tpim
= -mul1_i
+ mul2_i
113 print (" vec_r[jh] * sin_i[k]", mul1_i
)
114 print (" vec_i[jh] * cos_r[k]", mul2_i
)
115 print (" tpim", tpim
)
116 vec_r
[jh
] = vec_r
[jl
] - tpre
117 vec_i
[jh
] = vec_i
[jl
] - tpim
121 print (" xform jl jh k", jl
, jh
, k
,
122 "\n vr h l", vec_r
[jh
], vec_r
[jl
],
123 "\n vi h l", vec_i
[jh
], vec_i
[jl
])
130 class FFTTestCase(FHDLTestCase
):
132 def _check_regs(self
, sim
, expected
):
134 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
136 def test_sv_remap_fpmadds_fft(self
):
137 """>>> lst = ["svshape 8, 1, 1, 1, 0",
138 "svremap 31, 1, 0, 2, 0, 1, 0",
139 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
141 runs a full in-place O(N log2 N) butterfly schedule for
142 Discrete Fourier Transform.
144 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
145 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
147 there is the *option* to target a different location (non-in-place)
150 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
151 (3 inputs, 2 outputs)
153 lst
= SVP64Asm( ["svshape 8, 1, 1, 1, 0",
154 "svremap 31, 1, 0, 2, 0, 1, 0",
155 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
159 # array and coefficients to test
160 av
= [7.0, -9.8, 3.0, -32.3,
161 -2.0, 5.0, -9.8, 31.3] # array 0..7
162 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
166 for i
, c
in enumerate(coe
):
167 fprs
[i
+8] = fp64toselectable(c
)
168 for i
, a
in enumerate(av
):
169 fprs
[i
+0] = fp64toselectable(a
)
171 with
Program(lst
, bigendian
=False) as program
:
172 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
173 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
174 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
175 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
176 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
177 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
178 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
179 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
181 # work out the results with the twin mul/add-sub
182 res
= transform_radix2(av
, coe
)
184 for i
, expected
in enumerate(res
):
185 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
186 for i
, expected
in enumerate(res
):
187 # convert to Power single
188 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
189 expected
= float(expected
)
190 actual
= float(sim
.fpr(i
))
191 # approximate error calculation, good enough test
192 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
193 # and the rounding is different
194 err
= abs(actual
- expected
) / expected
195 self
.assertTrue(err
< 1e-7)
197 def test_sv_remap_fpmadds_fft_svstep(self
):
198 """>>> lst = SVP64Asm( [
199 "svshape 8, 1, 1, 1, 1",
200 "svremap 31, 1, 0, 2, 0, 1, 0",
201 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
202 "setvl. 0, 0, 1, 1, 0, 0",
205 runs a full in-place O(N log2 N) butterfly schedule for
206 Discrete Fourier Transform. this version however uses
207 SVP64 "Vertical-First" Mode and so needs an explicit
210 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
211 (3 inputs, 2 outputs)
214 "svshape 8, 1, 1, 1, 1",
215 "svremap 31, 1, 0, 2, 0, 1, 0",
216 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
217 "setvl. 0, 0, 1, 1, 0, 0",
222 # array and coefficients to test
223 av
= [7.0, -9.8, 3.0, -32.3,
224 -2.0, 5.0, -9.8, 31.3] # array 0..7
225 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
229 for i
, c
in enumerate(coe
):
230 fprs
[i
+8] = fp64toselectable(c
)
231 for i
, a
in enumerate(av
):
232 fprs
[i
+0] = fp64toselectable(a
)
234 # set total. err don't know how to calculate how many there are...
235 # do it manually for now
241 tablestep
= n
// size
242 for i
in range(0, n
, size
):
243 for j
in range(i
, i
+ halfsize
):
247 # SVSTATE (calculated VL)
248 svstate
= SVP64State()
250 svstate
.maxvl
= VL
# MAXVL
251 print ("SVSTATE", bin(svstate
.asint()))
253 with
Program(lst
, bigendian
=False) as program
:
254 sim
= self
.run_tst_program(program
, svstate
=svstate
,
256 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
257 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
258 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
259 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
260 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
261 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
262 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
264 # work out the results with the twin mul/add-sub
265 res
= transform_radix2(av
, coe
)
267 for i
, expected
in enumerate(res
):
268 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
269 for i
, expected
in enumerate(res
):
270 # convert to Power single
271 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
272 expected
= float(expected
)
273 actual
= float(sim
.fpr(i
))
274 # approximate error calculation, good enough test
275 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
276 # and the rounding is different
277 err
= abs(actual
- expected
) / expected
278 self
.assertTrue(err
< 1e-7)
280 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self
):
281 """>>> lst = SVP64Asm( [
282 "svshape 8, 1, 1, 1, 1",
283 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
284 "svremap 5, 1, 0, 2, 0, 0, 1",
285 "sv.fmuls 24, 0.v, 8.v",
286 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
287 "svremap 26, 0, 0, 0, 0, 1, 1",
288 "sv.ffadds 0.v, 24, 0.v",
289 "setvl. 0, 0, 1, 1, 0, 0",
293 runs a full in-place O(N log2 N) butterfly schedule for
294 Discrete Fourier Transform. also uses "Vertical First"
295 but also uses temporary scalars and ffadds rather than
298 this represents an incremental step towards complex FFT
300 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
302 * single fmuls FRT, FRA, FRC
303 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
304 (FRS is implicit / hidden in ff* operations)
306 multiply: # sv.fmuls FRT, FRA, FRC
307 temp1 = vec[jh] * exptable[k]
309 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
310 vec[jh] = temp2 - temp1
311 vec[jl] = temp2 + temp1
313 also see notes in complex fft test: here svremap is done in
314 "non-persistent" mode (as a demo) whereas in the complex fft
315 svremap is used in "persistent" mode, where by a complete
316 coincidence the REMAP arguments all happen to line up and
317 only one persistent svremap is needed. the exact same trick
318 *could* be applied here but for illustrative purposes it is not.
321 "svshape 8, 1, 1, 1, 1",
322 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
323 "svremap 5, 1, 0, 2, 0, 0, 0",
324 "sv.fmuls 24, 0.v, 8.v",
325 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
326 "svremap 26, 0, 0, 0, 0, 1, 0",
327 "sv.ffadds 0.v, 24, 0.v",
328 "setvl. 0, 0, 1, 1, 0, 0",
333 # array and coefficients to test
334 av
= [7.0, -9.8, 3.0, -32.3,
335 -2.0, 5.0, -9.8, 31.3] # array 0..7
336 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
340 for i
, c
in enumerate(coe
):
341 fprs
[i
+8] = fp64toselectable(c
)
342 for i
, a
in enumerate(av
):
343 fprs
[i
+0] = fp64toselectable(a
)
345 # set total. err don't know how to calculate how many there are...
346 # do it manually for now
352 tablestep
= n
// size
353 for i
in range(0, n
, size
):
354 for j
in range(i
, i
+ halfsize
):
358 # SVSTATE (calculated VL)
359 svstate
= SVP64State()
361 svstate
.maxvl
= VL
# MAXVL
362 print ("SVSTATE", bin(svstate
.asint()))
364 with
Program(lst
, bigendian
=False) as program
:
365 sim
= self
.run_tst_program(program
, svstate
=svstate
,
367 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
368 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
369 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
370 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
371 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
372 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
373 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
375 # work out the results with the twin mul/add-sub
376 res
= transform_radix2(av
, coe
)
378 for i
, expected
in enumerate(res
):
379 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
380 for i
, expected
in enumerate(res
):
381 # convert to Power single
382 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
383 expected
= float(expected
)
384 actual
= float(sim
.fpr(i
))
385 # approximate error calculation, good enough test
386 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
387 # and the rounding is different
388 err
= abs(actual
- expected
) / expected
389 self
.assertTrue(err
< 1e-7)
391 def test_sv_fpmadds_fft(self
):
392 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
394 four in-place vector mul-adds, four in-place vector mul-subs
396 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
397 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
399 there is the *option* to target a different location (non-in-place)
402 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
403 FRS to perform the two multiplies. one add, one subtract.
405 sv.ffmadds FRT, FRA, FRC, FRB actually does:
406 fmadds FRT , FRA, FRC, FRA
407 fnmsubs FRT+vl, FRA, FRC, FRB+vl
410 lst
= SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
415 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
416 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
417 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
419 # work out the results with the twin mul/add-sub
420 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
421 fprs
[i
+2] = fp64toselectable(a
)
422 fprs
[i
+6] = fp64toselectable(b
)
423 fprs
[i
+10] = fp64toselectable(c
)
427 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
428 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
430 print ("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
432 # SVSTATE (in this case, VL=2)
433 svstate
= SVP64State()
435 svstate
.maxvl
= 4 # MAXVL
436 print ("SVSTATE", bin(svstate
.asint()))
438 with
Program(lst
, bigendian
=False) as program
:
439 sim
= self
.run_tst_program(program
, svstate
=svstate
,
441 # confirm that the results are as expected
442 for i
, (t
, u
) in enumerate(res
):
443 self
.assertEqual(sim
.fpr(i
+2), t
)
444 self
.assertEqual(sim
.fpr(i
+6), u
)
446 def test_sv_ffadds_fft(self
):
447 """>>> lst = ["sv.ffadds 2.v, 2.v, 2.v"
449 four in-place vector adds, four in-place vector subs
451 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
452 FRS to perform the two multiplies. one add, one subtract.
454 sv.ffadds FRT, FRA, FRB actually does:
456 fsubs FRT+vl, FRA, FRB+vl
458 lst
= SVP64Asm(["sv.ffadds 2.v, 2.v, 2.v"
463 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
464 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
466 # work out the results with the twin add-sub
467 for i
, (a
, b
) in enumerate(zip(av
, bv
)):
468 fprs
[i
+2] = fp64toselectable(a
)
469 fprs
[i
+6] = fp64toselectable(b
)
472 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
473 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
475 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
477 # SVSTATE (in this case, VL=2)
478 svstate
= SVP64State()
480 svstate
.maxvl
= 4 # MAXVL
481 print ("SVSTATE", bin(svstate
.asint()))
483 with
Program(lst
, bigendian
=False) as program
:
484 sim
= self
.run_tst_program(program
, svstate
=svstate
,
486 # confirm that the results are as expected
487 for i
, (t
, u
) in enumerate(res
):
488 a
= float(sim
.fpr(i
+2))
489 b
= float(sim
.fpr(i
+6))
492 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
493 for i
, (t
, u
) in enumerate(res
):
494 self
.assertEqual(sim
.fpr(i
+2), t
)
495 self
.assertEqual(sim
.fpr(i
+6), u
)
497 def test_sv_remap_fpmadds_fft_svstep_complex(self
):
499 runs a full in-place O(N log2 N) butterfly schedule for
500 Discrete Fourier Transform. this version however uses
501 SVP64 "Vertical-First" Mode and so needs an explicit
504 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
505 (3 inputs, 2 outputs)
507 complex calculation (FFT):
509 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
510 vec_r[jh] = vec_r[jl] - tpre
513 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
514 vec_i[jh] = vec_i[jl] - tpim
517 real-only calculation (DFT):
519 temp1 = vec[jh] * exptable[k]
521 vec[jh] = temp2 - temp1
522 vec[jl] = temp2 + temp1
524 note: a rather nice convenience / coincidence. the meaning of
525 these two instructions is:
526 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
527 "svremap 5, 1, 0, 2, 0, 0, 1",
528 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
529 "svremap 26, 0, 0, 0, 0, 1, 1",
531 however it turns out that they can be *merged*, and for
532 the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
533 *ignore* their REMAPs (by definition), and for the second
534 one (sv.ffads) exactly the right REMAPs are also ignored!
536 "svremap 31, 1, 0, 2, 0, 1, 1",
539 # set triple butterfly mode with persistent "REMAP"
540 "svshape 8, 1, 1, 1, 1",
541 "svremap 31, 1, 0, 2, 0, 1, 1",
543 "sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r
544 "sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i
545 # tpre = mul1_r + mul2_r
547 "sv.fmuls 26, 0.v, 20.v", # mul1_i = r*sin_i
548 "sv.fmsubs 26, 8.v, 16.v, 26", # mul2_i = i*cos_r
549 # tpim = mul2_i - mul1_i
551 "sv.ffadds 0.v, 24, 0.v", # vh/vl +/- tpre
553 "sv.ffadds 8.v, 26, 8.v", # vh/vl +- tpim
556 "setvl. 0, 0, 1, 1, 0, 0",
561 # array and coefficients to test
562 ar
= [7.0, -9.8, 3.0, -32.3,
563 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
564 ai
= [1.0, -1.8, 3.0, 19.3,
565 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
566 coer
= [-0.25, 0.5, 3.1, 6.2] # coefficients real
567 coei
= [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
571 for i
, a
in enumerate(ar
):
572 fprs
[i
+0] = fp64toselectable(a
)
573 for i
, a
in enumerate(ai
):
574 fprs
[i
+8] = fp64toselectable(a
)
575 for i
, cr
in enumerate(coer
):
576 fprs
[i
+16] = fp64toselectable(cr
)
577 for i
, ci
in enumerate(coei
):
578 fprs
[i
+20] = fp64toselectable(ci
)
580 # set total. err don't know how to calculate how many there are...
581 # do it manually for now
587 tablestep
= n
// size
588 for i
in range(0, n
, size
):
589 for j
in range(i
, i
+ halfsize
):
593 # SVSTATE (calculated VL)
594 svstate
= SVP64State()
596 svstate
.maxvl
= VL
# MAXVL
597 print ("SVSTATE", bin(svstate
.asint()))
599 with
Program(lst
, bigendian
=False) as program
:
600 sim
= self
.run_tst_program(program
, svstate
=svstate
,
602 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
603 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
604 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
605 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
606 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
607 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
608 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
610 # work out the results with the twin mul/add-sub, explicit
612 res_r
, res_i
= transform_radix2_complex(ar
, ai
, coer
, coei
)
614 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
615 print ("i", i
, float(sim
.fpr(i
)), float(sim
.fpr(i
+8)),
616 "expected_r", expected_r
,
617 "expected_i", expected_i
)
618 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
619 # convert to Power single
620 expected_r
= DOUBLE2SINGLE(fp64toselectable(expected_r
))
621 expected_r
= float(expected_r
)
622 actual_r
= float(sim
.fpr(i
))
623 # approximate error calculation, good enough test
624 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
625 # and the rounding is different
626 err
= abs(actual_r
- expected_r
) / expected_r
627 self
.assertTrue(err
< 1e-6)
628 # convert to Power single
629 expected_i
= DOUBLE2SINGLE(fp64toselectable(expected_i
))
630 expected_i
= float(expected_i
)
631 actual_i
= float(sim
.fpr(i
+8))
632 # approximate error calculation, good enough test
633 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
634 # and the rounding is different
635 err
= abs(actual_i
- expected_i
) / expected_i
636 self
.assertTrue(err
< 1e-6)
638 def test_sv_ffadds_fft_scalar(self
):
639 """>>> lst = ["sv.ffadds 2.v, 12, 13"
641 four in-place vector adds and subs, but done with a scalar
644 lst
= SVP64Asm(["sv.ffadds 2.v, 12, 13"
651 fprs
[12] = fp64toselectable(scalar_a
)
652 fprs
[13] = fp64toselectable(scalar_b
)
654 # work out the results with the twin add-sub
656 t
= scalar_b
+ scalar_a
657 u
= scalar_b
- scalar_a
658 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
659 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
661 print ("FFT", i
, "res", t
, u
)
663 # SVSTATE (in this case, VL=2)
664 svstate
= SVP64State()
666 svstate
.maxvl
= 4 # MAXVL
667 print ("SVSTATE", bin(svstate
.asint()))
669 with
Program(lst
, bigendian
=False) as program
:
670 sim
= self
.run_tst_program(program
, svstate
=svstate
,
672 # confirm that the results are as expected
673 for i
, (t
, u
) in enumerate(res
):
674 a
= float(sim
.fpr(i
+2))
675 b
= float(sim
.fpr(i
+6))
678 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
679 for i
, (t
, u
) in enumerate(res
):
680 self
.assertEqual(sim
.fpr(i
+2), t
)
681 self
.assertEqual(sim
.fpr(i
+6), u
)
683 def test_sv_remap_fpmadds_fft_ldst(self
):
684 """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
685 "sv.lfsbr 0.v, 4(0), 20", # bit-reversed
686 "svshape 8, 1, 1, 1, 0",
687 "svremap 31, 1, 0, 2, 0, 1, 0",
688 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
690 runs a full in-place O(N log2 N) butterfly schedule for
691 Discrete Fourier Transform, using bit-reversed LD/ST
693 lst
= SVP64Asm( ["setvl 0, 0, 8, 0, 1, 1",
694 "sv.lfsbr 0.v, 4(0), 20", # bit-reversed
695 "svshape 8, 1, 1, 1, 0",
696 "svremap 31, 1, 0, 2, 0, 1, 0",
697 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
701 # array and coefficients to test
702 av
= [7.0, -9.8, 3.0, -32.3,
703 -2.0, 5.0, -9.8, 31.3] # array 0..7
704 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
708 for i
, c
in enumerate(coe
):
709 fprs
[i
+8] = fp64toselectable(c
)
713 for i
, a
in enumerate(av
):
714 a
= SINGLE(fp64toselectable(a
)).value
719 mem
[(i
//2)*8] = val |
(a
<< 32)
721 with
Program(lst
, bigendian
=False) as program
:
722 sim
= self
.run_tst_program(program
, initial_mem
=mem
,
724 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
725 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
726 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
727 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
728 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
729 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
730 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
733 print (sim
.mem
.dump())
735 # work out the results with the twin mul/add-sub,
736 # note bit-reverse mode requested
737 res
= transform_radix2(av
, coe
, reverse
=True)
739 for i
, expected
in enumerate(res
):
740 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
741 for i
, expected
in enumerate(res
):
742 # convert to Power single
743 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
744 expected
= float(expected
)
745 actual
= float(sim
.fpr(i
))
746 # approximate error calculation, good enough test
747 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
748 # and the rounding is different
749 err
= abs(actual
- expected
) / expected
750 self
.assertTrue(err
< 1e-6)
752 def run_tst_program(self
, prog
, initial_regs
=None,
756 if initial_regs
is None:
757 initial_regs
= [0] * 32
758 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
759 initial_fprs
=initial_fprs
,
770 if __name__
== "__main__":