1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.power_decoder
import (create_pdecode
)
6 from openpower
.simulator
.program
import Program
7 from openpower
.decoder
.isa
.caller
import SVP64State
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from copy
import deepcopy
12 from openpower
.decoder
.helpers
import fp64toselectable
, SINGLE
13 from openpower
.decoder
.isafunctions
.double2single
import ISACallerFnHelper
15 # really bad hack. need to access the DOUBLE2SINGLE function auto-generated
17 fph
= ISACallerFnHelper(XLEN
=64)
20 def transform_radix2(vec
, exptable
, reverse
=False):
22 # FFT and convolution test (Python), based on Project Nayuki
24 # Copyright (c) 2020 Project Nayuki. (MIT License)
25 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
28 # bits of the integer 'val'.
29 def reverse_bits(val
, width
):
31 for _
in range(width
):
32 result
= (result
<< 1) |
(val
& 1)
38 levels
= n
.bit_length() - 1
40 # Copy with bit-reversed permutation
42 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
48 for i
in range(0, n
, size
):
50 for j
in range(i
, i
+ halfsize
):
51 # exact same actual computation, just embedded in
52 # triple-nested for-loops
53 jl
, jh
= j
, j
+halfsize
55 temp1
= vec
[jh
] * exptable
[k
]
57 vec
[jh
] = temp2
- temp1
58 vec
[jl
] = temp2
+ temp1
59 print ("xform jl jh k", jl
, jh
, k
,
60 "vj vjh ek", temp2
, vjh
, exptable
[k
],
61 "t1, t2", temp1
, temp2
,
62 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
69 def transform_radix2_complex(vec_r
, vec_i
, cos_r
, sin_i
, reverse
=False):
71 # FFT and convolution test (Python), based on Project Nayuki
73 # Copyright (c) 2020 Project Nayuki. (MIT License)
74 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
77 # bits of the integer 'val'.
78 def reverse_bits(val
, width
):
80 for _
in range(width
):
81 result
= (result
<< 1) |
(val
& 1)
87 levels
= n
.bit_length() - 1
89 # Copy with bit-reversed permutation
91 vec
= [vec
[reverse_bits(i
, levels
)] for i
in range(n
)]
97 for i
in range(0, n
, size
):
99 for j
in range(i
, i
+ halfsize
):
100 # exact same actual computation, just embedded in
101 # triple-nested for-loops
102 jl
, jh
= j
, j
+halfsize
104 print ("xform jl jh k", jl
, jh
, k
,
105 "vr h l", vec_r
[jh
], vec_r
[jl
],
106 "vi h l", vec_i
[jh
], vec_i
[jl
])
107 print (" cr k", cos_r
[k
], "si k", sin_i
[k
])
108 mul1_r
= vec_r
[jh
] * cos_r
[k
]
109 mul2_r
= vec_i
[jh
] * sin_i
[k
]
110 tpre
= mul1_r
+ mul2_r
111 print (" vec_r[jh] * cos_r[k]", mul1_r
)
112 print (" vec_i[jh] * sin_i[k]", mul2_r
)
113 print (" tpre", tpre
)
114 mul1_i
= vec_r
[jh
] * sin_i
[k
]
115 mul2_i
= vec_i
[jh
] * cos_r
[k
]
116 tpim
= -mul1_i
+ mul2_i
117 print (" vec_r[jh] * sin_i[k]", mul1_i
)
118 print (" vec_i[jh] * cos_r[k]", mul2_i
)
119 print (" tpim", tpim
)
120 vec_r
[jh
] = vec_r
[jl
] - tpre
121 vec_i
[jh
] = vec_i
[jl
] - tpim
125 print (" xform jl jh k", jl
, jh
, k
,
126 "\n vr h l", vec_r
[jh
], vec_r
[jl
],
127 "\n vi h l", vec_i
[jh
], vec_i
[jl
])
134 class FFTTestCase(FHDLTestCase
):
136 def _check_regs(self
, sim
, expected
):
138 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
140 def test_sv_remap_fpmadds_fft(self
):
141 """>>> lst = ["svshape 8, 1, 1, 1, 0",
142 "svremap 31, 1, 0, 2, 0, 1, 0",
143 "sv.ffmadds *2, *2, *2, *10"
145 runs a full in-place O(N log2 N) butterfly schedule for
146 Discrete Fourier Transform.
148 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
149 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
151 there is the *option* to target a different location (non-in-place)
154 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
155 (3 inputs, 2 outputs)
157 lst
= SVP64Asm( ["svshape 8, 1, 1, 1, 0",
158 "svremap 31, 1, 0, 2, 0, 1, 0",
159 "sv.ffmadds *0, *0, *0, *8"
163 # array and coefficients to test
164 av
= [7.0, -9.8, 3.0, -32.3,
165 -2.0, 5.0, -9.8, 31.3] # array 0..7
166 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
170 for i
, c
in enumerate(coe
):
171 fprs
[i
+8] = fp64toselectable(c
)
172 for i
, a
in enumerate(av
):
173 fprs
[i
+0] = fp64toselectable(a
)
175 with
Program(lst
, bigendian
=False) as program
:
176 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
177 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
178 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
179 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
180 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
181 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
182 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
183 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
185 # work out the results with the twin mul/add-sub
186 res
= transform_radix2(av
, coe
)
188 for i
, expected
in enumerate(res
):
189 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
190 for i
, expected
in enumerate(res
):
191 # convert to Power single
192 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
193 expected
= float(expected
)
194 actual
= float(sim
.fpr(i
))
195 # approximate error calculation, good enough test
196 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
197 # and the rounding is different
198 err
= abs(actual
- expected
) / expected
199 self
.assertTrue(err
< 1e-7)
201 def test_sv_remap_fpmadds_fft_svstep(self
):
202 """>>> lst = SVP64Asm( [
203 "svshape 8, 1, 1, 1, 1",
204 "svremap 31, 1, 0, 2, 0, 1, 0",
205 "sv.ffmadds *0, *0, *0, *8",
206 "setvl. 0, 0, 1, 1, 0, 0",
209 runs a full in-place O(N log2 N) butterfly schedule for
210 Discrete Fourier Transform. this version however uses
211 SVP64 "Vertical-First" Mode and so needs an explicit
214 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
215 (3 inputs, 2 outputs)
218 "svshape 8, 1, 1, 1, 1",
219 "svremap 31, 1, 0, 2, 0, 1, 0",
220 "sv.ffmadds *0, *0, *0, *8",
221 "setvl. 0, 0, 1, 1, 0, 0",
226 # array and coefficients to test
227 av
= [7.0, -9.8, 3.0, -32.3,
228 -2.0, 5.0, -9.8, 31.3] # array 0..7
229 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
233 for i
, c
in enumerate(coe
):
234 fprs
[i
+8] = fp64toselectable(c
)
235 for i
, a
in enumerate(av
):
236 fprs
[i
+0] = fp64toselectable(a
)
238 # set total. err don't know how to calculate how many there are...
239 # do it manually for now
245 tablestep
= n
// size
246 for i
in range(0, n
, size
):
247 for j
in range(i
, i
+ halfsize
):
251 # SVSTATE (calculated VL)
252 svstate
= SVP64State()
254 svstate
.maxvl
= VL
# MAXVL
255 print ("SVSTATE", bin(svstate
.asint()))
257 with
Program(lst
, bigendian
=False) as program
:
258 sim
= self
.run_tst_program(program
, svstate
=svstate
,
260 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
261 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
262 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
263 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
264 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
265 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
266 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
268 # work out the results with the twin mul/add-sub
269 res
= transform_radix2(av
, coe
)
271 for i
, expected
in enumerate(res
):
272 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
273 for i
, expected
in enumerate(res
):
274 # convert to Power single
275 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
276 expected
= float(expected
)
277 actual
= float(sim
.fpr(i
))
278 # approximate error calculation, good enough test
279 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
280 # and the rounding is different
281 err
= abs(actual
- expected
) / expected
282 self
.assertTrue(err
< 1e-7)
284 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self
):
285 """>>> lst = SVP64Asm( [
286 "svshape 8, 1, 1, 1, 1",
287 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
288 "svremap 5, 1, 0, 2, 0, 0, 1",
289 "sv.fmuls 24, *0, *8",
290 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
291 "svremap 26, 0, 0, 0, 0, 1, 1",
292 "sv.ffadds *0, 24, *0",
293 "setvl. 0, 0, 1, 1, 0, 0",
297 runs a full in-place O(N log2 N) butterfly schedule for
298 Discrete Fourier Transform. also uses "Vertical First"
299 but also uses temporary scalars and ffadds rather than
302 this represents an incremental step towards complex FFT
304 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
306 * single fmuls FRT, FRA, FRC
307 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
308 (FRS is implicit / hidden in ff* operations)
310 multiply: # sv.fmuls FRT, FRA, FRC
311 temp1 = vec[jh] * exptable[k]
313 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
314 vec[jh] = temp2 - temp1
315 vec[jl] = temp2 + temp1
317 also see notes in complex fft test: here svremap is done in
318 "non-persistent" mode (as a demo) whereas in the complex fft
319 svremap is used in "persistent" mode, where by a complete
320 coincidence the REMAP arguments all happen to line up and
321 only one persistent svremap is needed. the exact same trick
322 *could* be applied here but for illustrative purposes it is not.
325 "svshape 8, 1, 1, 1, 1",
326 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
327 "svremap 5, 1, 0, 2, 0, 0, 0",
328 "sv.fmuls 24, *0, *8",
329 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
330 "svremap 26, 0, 0, 0, 0, 1, 0",
331 "sv.ffadds *0, 24, *0",
332 "setvl. 0, 0, 1, 1, 0, 0",
337 # array and coefficients to test
338 av
= [7.0, -9.8, 3.0, -32.3,
339 -2.0, 5.0, -9.8, 31.3] # array 0..7
340 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
344 for i
, c
in enumerate(coe
):
345 fprs
[i
+8] = fp64toselectable(c
)
346 for i
, a
in enumerate(av
):
347 fprs
[i
+0] = fp64toselectable(a
)
349 # set total. err don't know how to calculate how many there are...
350 # do it manually for now
356 tablestep
= n
// size
357 for i
in range(0, n
, size
):
358 for j
in range(i
, i
+ halfsize
):
362 # SVSTATE (calculated VL)
363 svstate
= SVP64State()
365 svstate
.maxvl
= VL
# MAXVL
366 print ("SVSTATE", bin(svstate
.asint()))
368 with
Program(lst
, bigendian
=False) as program
:
369 sim
= self
.run_tst_program(program
, svstate
=svstate
,
371 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
372 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
373 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
374 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
375 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
376 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
377 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
379 # work out the results with the twin mul/add-sub
380 res
= transform_radix2(av
, coe
)
382 for i
, expected
in enumerate(res
):
383 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
384 for i
, expected
in enumerate(res
):
385 # convert to Power single
386 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
387 expected
= float(expected
)
388 actual
= float(sim
.fpr(i
))
389 # approximate error calculation, good enough test
390 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
391 # and the rounding is different
392 err
= abs(actual
- expected
) / expected
393 self
.assertTrue(err
< 1e-7)
395 def test_sv_fpmadds_fft(self
):
396 """>>> lst = ["sv.ffmadds *2, *2, *2, *10"
398 four in-place vector mul-adds, four in-place vector mul-subs
400 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
401 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
403 there is the *option* to target a different location (non-in-place)
406 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
407 FRS to perform the two multiplies. one add, one subtract.
409 sv.ffmadds FRT, FRA, FRC, FRB actually does:
410 fmadds FRT , FRA, FRC, FRA
411 fnmsubs FRT+vl, FRA, FRC, FRB+vl
414 lst
= SVP64Asm(["sv.ffmadds *2, *2, *2, *10"
419 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
420 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
421 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
423 # work out the results with the twin mul/add-sub
424 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
425 fprs
[i
+2] = fp64toselectable(a
)
426 fprs
[i
+6] = fp64toselectable(b
)
427 fprs
[i
+10] = fp64toselectable(c
)
431 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
432 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
434 print ("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
436 # SVSTATE (in this case, VL=2)
437 svstate
= SVP64State()
439 svstate
.maxvl
= 4 # MAXVL
440 print ("SVSTATE", bin(svstate
.asint()))
442 with
Program(lst
, bigendian
=False) as program
:
443 sim
= self
.run_tst_program(program
, svstate
=svstate
,
445 # confirm that the results are as expected
446 for i
, (t
, u
) in enumerate(res
):
447 self
.assertEqual(sim
.fpr(i
+2), t
)
448 self
.assertEqual(sim
.fpr(i
+6), u
)
450 def test_sv_ffadds_fft(self
):
451 """>>> lst = ["sv.ffadds *2, *2, *2"
453 four in-place vector adds, four in-place vector subs
455 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
456 FRS to perform the two multiplies. one add, one subtract.
458 sv.ffadds FRT, FRA, FRB actually does:
460 fsubs FRT+vl, FRA, FRB+vl
462 lst
= SVP64Asm(["sv.ffadds *2, *2, *2"
467 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
468 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
470 # work out the results with the twin add-sub
471 for i
, (a
, b
) in enumerate(zip(av
, bv
)):
472 fprs
[i
+2] = fp64toselectable(a
)
473 fprs
[i
+6] = fp64toselectable(b
)
476 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
477 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
479 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
481 # SVSTATE (in this case, VL=2)
482 svstate
= SVP64State()
484 svstate
.maxvl
= 4 # MAXVL
485 print ("SVSTATE", bin(svstate
.asint()))
487 with
Program(lst
, bigendian
=False) as program
:
488 sim
= self
.run_tst_program(program
, svstate
=svstate
,
490 # confirm that the results are as expected
491 for i
, (t
, u
) in enumerate(res
):
492 a
= float(sim
.fpr(i
+2))
493 b
= float(sim
.fpr(i
+6))
496 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
497 for i
, (t
, u
) in enumerate(res
):
498 self
.assertEqual(sim
.fpr(i
+2), t
)
499 self
.assertEqual(sim
.fpr(i
+6), u
)
501 def test_sv_remap_fpmadds_fft_svstep_complex(self
):
503 runs a full in-place O(N log2 N) butterfly schedule for
504 Discrete Fourier Transform. this version however uses
505 SVP64 "Vertical-First" Mode and so needs an explicit
508 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
509 (3 inputs, 2 outputs)
511 complex calculation (FFT):
513 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
514 vec_r[jh] = vec_r[jl] - tpre
517 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
518 vec_i[jh] = vec_i[jl] - tpim
521 real-only calculation (DFT):
523 temp1 = vec[jh] * exptable[k]
525 vec[jh] = temp2 - temp1
526 vec[jl] = temp2 + temp1
528 note: a rather nice convenience / coincidence. the meaning of
529 these two instructions is:
530 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
531 "svremap 5, 1, 0, 2, 0, 0, 1",
532 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
533 "svremap 26, 0, 0, 0, 0, 1, 1",
535 however it turns out that they can be *merged*, and for
536 the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
537 *ignore* their REMAPs (by definition, because you can't REMAP
538 scalar operands), and for the second one (sv.ffads) exactly the
539 right REMAPs are also ignored!
541 therefore we can merge:
542 "svremap 5, 1, 0, 2, 0, 0, 1",
543 "svremap 26, 0, 0, 0, 0, 1, 1",
545 "svremap 31, 1, 0, 2, 0, 1, 1",
546 and save one instruction.
549 # set triple butterfly mode with persistent "REMAP"
550 "svshape 8, 1, 1, 1, 1",
551 "svremap 31, 1, 0, 2, 0, 1, 1",
553 "sv.fmuls 24, *0, *16", # mul1_r = r*cos_r
554 "sv.fmadds 24, *8, *20, 24", # mul2_r = i*sin_i
555 # tpre = mul1_r + mul2_r
557 "sv.fmuls 26, *0, *20", # mul1_i = r*sin_i
558 "sv.fmsubs 26, *8, *16, 26", # mul2_i = i*cos_r
559 # tpim = mul2_i - mul1_i
561 "sv.ffadds *0, 24, *0", # vh/vl +/- tpre
563 "sv.ffadds *8, 26, *8", # vh/vl +- tpim
566 "setvl. 0, 0, 1, 1, 0, 0",
571 # array and coefficients to test
572 ar
= [7.0, -9.8, 3.0, -32.3,
573 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
574 ai
= [1.0, -1.8, 3.0, 19.3,
575 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
576 coer
= [-0.25, 0.5, 3.1, 6.2] # coefficients real
577 coei
= [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
581 for i
, a
in enumerate(ar
):
582 fprs
[i
+0] = fp64toselectable(a
)
583 for i
, a
in enumerate(ai
):
584 fprs
[i
+8] = fp64toselectable(a
)
585 for i
, cr
in enumerate(coer
):
586 fprs
[i
+16] = fp64toselectable(cr
)
587 for i
, ci
in enumerate(coei
):
588 fprs
[i
+20] = fp64toselectable(ci
)
590 # set total. err don't know how to calculate how many there are...
591 # do it manually for now
597 tablestep
= n
// size
598 for i
in range(0, n
, size
):
599 for j
in range(i
, i
+ halfsize
):
603 # SVSTATE (calculated VL)
604 svstate
= SVP64State()
606 svstate
.maxvl
= VL
# MAXVL
607 print ("SVSTATE", bin(svstate
.asint()))
609 with
Program(lst
, bigendian
=False) as program
:
610 sim
= self
.run_tst_program(program
, svstate
=svstate
,
612 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
613 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
614 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
615 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
616 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
617 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
618 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
620 # work out the results with the twin mul/add-sub, explicit
622 res_r
, res_i
= transform_radix2_complex(ar
, ai
, coer
, coei
)
624 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
625 print ("i", i
, float(sim
.fpr(i
)), float(sim
.fpr(i
+8)),
626 "expected_r", expected_r
,
627 "expected_i", expected_i
)
628 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
629 # convert to Power single
630 expected_r
= fph
.DOUBLE2SINGLE(fp64toselectable(expected_r
))
631 expected_r
= float(expected_r
)
632 actual_r
= float(sim
.fpr(i
))
633 # approximate error calculation, good enough test
634 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
635 # and the rounding is different
636 err
= abs(actual_r
- expected_r
) / expected_r
637 self
.assertTrue(err
< 1e-6)
638 # convert to Power single
639 expected_i
= fph
.DOUBLE2SINGLE(fp64toselectable(expected_i
))
640 expected_i
= float(expected_i
)
641 actual_i
= float(sim
.fpr(i
+8))
642 # approximate error calculation, good enough test
643 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
644 # and the rounding is different
645 err
= abs(actual_i
- expected_i
) / expected_i
646 self
.assertTrue(err
< 1e-6)
648 def test_sv_ffadds_fft_scalar(self
):
649 """>>> lst = ["sv.ffadds *2, 12, 13"
651 four in-place vector adds and subs, but done with a scalar
654 lst
= SVP64Asm(["sv.ffadds *2, 12, 13"
661 fprs
[12] = fp64toselectable(scalar_a
)
662 fprs
[13] = fp64toselectable(scalar_b
)
664 # work out the results with the twin add-sub
666 t
= scalar_b
+ scalar_a
667 u
= scalar_b
- scalar_a
668 t
= fph
.DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
669 u
= fph
.DOUBLE2SINGLE(fp64toselectable(u
)) # from double
671 print ("FFT", i
, "res", t
, u
)
673 # SVSTATE (in this case, VL=2)
674 svstate
= SVP64State()
676 svstate
.maxvl
= 4 # MAXVL
677 print ("SVSTATE", bin(svstate
.asint()))
679 with
Program(lst
, bigendian
=False) as program
:
680 sim
= self
.run_tst_program(program
, svstate
=svstate
,
682 # confirm that the results are as expected
683 for i
, (t
, u
) in enumerate(res
):
684 a
= float(sim
.fpr(i
+2))
685 b
= float(sim
.fpr(i
+6))
688 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
689 for i
, (t
, u
) in enumerate(res
):
690 self
.assertEqual(sim
.fpr(i
+2), t
)
691 self
.assertEqual(sim
.fpr(i
+6), u
)
693 def test_sv_remap_fpmadds_fft_ldst(self
):
694 """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
695 "sv.lfssh *0, 4(0), 20", # bit-reversed
696 "svshape 8, 1, 1, 1, 0",
697 "svremap 31, 1, 0, 2, 0, 1, 0",
698 "sv.ffmadds *0, *0, *0, *8"
700 runs a full in-place O(N log2 N) butterfly schedule for
701 Discrete Fourier Transform, using bit-reversed LD/ST
703 lst
= SVP64Asm( ["svshape 8, 1, 1, 15, 0",
704 "svremap 1, 0, 0, 0, 0, 0, 0",
705 "sv.lfssh *0, 4(0), 20", # shifted
706 "svshape 8, 1, 1, 1, 0",
707 "svremap 31, 1, 0, 2, 0, 1, 0",
708 "sv.ffmadds *0, *0, *0, *8"
712 # array and coefficients to test
713 av
= [7.0, -9.8, 3.0, -32.3,
714 -2.0, 5.0, -9.8, 31.3] # array 0..7
715 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
719 for i
, c
in enumerate(coe
):
720 fprs
[i
+8] = fp64toselectable(c
)
724 for i
, a
in enumerate(av
):
725 a
= SINGLE(fp64toselectable(a
)).value
730 mem
[(i
//2)*8] = val |
(a
<< 32)
732 with
Program(lst
, bigendian
=False) as program
:
733 sim
= self
.run_tst_program(program
, initial_mem
=mem
,
735 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
736 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
737 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
738 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
739 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
740 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
741 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
744 print (sim
.mem
.dump())
746 # work out the results with the twin mul/add-sub,
747 # note bit-reverse mode requested
748 res
= transform_radix2(av
, coe
, reverse
=True)
750 for i
, expected
in enumerate(res
):
751 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
752 for i
, expected
in enumerate(res
):
753 # convert to Power single
754 expected
= fph
.DOUBLE2SINGLE(fp64toselectable(expected
))
755 expected
= float(expected
)
756 actual
= float(sim
.fpr(i
))
757 # approximate error calculation, good enough test
758 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
759 # and the rounding is different
760 err
= abs(actual
- expected
) / expected
761 self
.assertTrue(err
< 1e-6)
763 def run_tst_program(self
, prog
, initial_regs
=None,
767 if initial_regs
is None:
768 initial_regs
= [0] * 32
769 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
770 initial_fprs
=initial_fprs
,
781 if __name__
== "__main__":