1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.power_decoder
import (create_pdecode
)
6 from openpower
.simulator
.program
import Program
7 from openpower
.decoder
.isa
.caller
import SVP64State
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from copy
import deepcopy
12 from openpower
.decoder
.helpers
import fp64toselectable
13 from openpower
.decoder
.isafunctions
.double2single
import DOUBLE2SINGLE
16 def transform_radix2(vec
, exptable
):
18 # FFT and convolution test (Python), based on Project Nayuki
20 # Copyright (c) 2020 Project Nayuki. (MIT License)
21 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
24 # bits of the integer 'val'.
25 def reverse_bits(val
, width
):
27 for _
in range(width
):
28 result
= (result
<< 1) |
(val
& 1)
34 levels
= n
.bit_length() - 1
36 # Copy with bit-reversed permutation
37 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
43 for i
in range(0, n
, size
):
45 for j
in range(i
, i
+ halfsize
):
46 # exact same actual computation, just embedded in
47 # triple-nested for-loops
48 jl
, jh
= j
, j
+halfsize
50 temp1
= vec
[jh
] * exptable
[k
]
52 vec
[jh
] = temp2
- temp1
53 vec
[jl
] = temp2
+ temp1
54 print ("xform jl jh k", jl
, jh
, k
,
55 "vj vjh ek", temp2
, vjh
, exptable
[k
],
56 "t1, t2", temp1
, temp2
,
57 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
64 def transform_radix2_complex(vec_r
, vec_i
, cos_r
, sin_i
):
66 # FFT and convolution test (Python), based on Project Nayuki
68 # Copyright (c) 2020 Project Nayuki. (MIT License)
69 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
72 # bits of the integer 'val'.
73 def reverse_bits(val
, width
):
75 for _
in range(width
):
76 result
= (result
<< 1) |
(val
& 1)
82 levels
= n
.bit_length() - 1
84 # Copy with bit-reversed permutation
85 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
91 for i
in range(0, n
, size
):
93 for j
in range(i
, i
+ halfsize
):
94 # exact same actual computation, just embedded in
95 # triple-nested for-loops
96 jl
, jh
= j
, j
+halfsize
98 print ("xform jl jh k", jl
, jh
, k
,
99 "vr h l", vec_r
[jh
], vec_r
[jl
],
100 "vi h l", vec_i
[jh
], vec_i
[jl
])
101 print (" cr k", cos_r
[k
], "si k", sin_i
[k
])
102 mul1_r
= vec_r
[jh
] * cos_r
[k
]
103 mul2_r
= vec_i
[jh
] * sin_i
[k
]
104 tpre
= mul1_r
+ mul2_r
105 print (" vec_r[jh] * cos_r[k]", mul1_r
)
106 print (" vec_i[jh] * sin_i[k]", mul2_r
)
107 print (" tpre", tpre
)
108 mul1_i
= vec_r
[jh
] * sin_i
[k
]
109 mul2_i
= vec_i
[jh
] * cos_r
[k
]
110 tpim
= -mul1_i
+ mul2_i
111 print (" vec_r[jh] * sin_i[k]", mul1_i
)
112 print (" vec_i[jh] * cos_r[k]", mul2_i
)
113 print (" tpim", tpim
)
114 vec_r
[jh
] = vec_r
[jl
] - tpre
115 vec_i
[jh
] = vec_i
[jl
] - tpim
119 print (" xform jl jh k", jl
, jh
, k
,
120 "\n vr h l", vec_r
[jh
], vec_r
[jl
],
121 "\n vi h l", vec_i
[jh
], vec_i
[jl
])
128 class FFTTestCase(FHDLTestCase
):
130 def _check_regs(self
, sim
, expected
):
132 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
134 def test_sv_remap_fpmadds_fft(self
):
135 """>>> lst = ["svshape 8, 1, 1, 1, 0",
136 "svremap 31, 1, 0, 2, 0, 1",
137 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
139 runs a full in-place O(N log2 N) butterfly schedule for
140 Discrete Fourier Transform.
142 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
143 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
145 there is the *option* to target a different location (non-in-place)
148 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
149 (3 inputs, 2 outputs)
151 lst
= SVP64Asm( ["svshape 8, 1, 1, 1, 0",
152 "svremap 31, 1, 0, 2, 0, 1",
153 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
157 # array and coefficients to test
158 av
= [7.0, -9.8, 3.0, -32.3,
159 -2.0, 5.0, -9.8, 31.3] # array 0..7
160 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
164 for i
, c
in enumerate(coe
):
165 fprs
[i
+8] = fp64toselectable(c
)
166 for i
, a
in enumerate(av
):
167 fprs
[i
+0] = fp64toselectable(a
)
169 with
Program(lst
, bigendian
=False) as program
:
170 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
171 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
172 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
173 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
174 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
175 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
176 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
177 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
179 # work out the results with the twin mul/add-sub
180 res
= transform_radix2(av
, coe
)
182 for i
, expected
in enumerate(res
):
183 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
184 for i
, expected
in enumerate(res
):
185 # convert to Power single
186 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
187 expected
= float(expected
)
188 actual
= float(sim
.fpr(i
))
189 # approximate error calculation, good enough test
190 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
191 # and the rounding is different
192 err
= abs(actual
- expected
) / expected
193 self
.assertTrue(err
< 1e-7)
195 def test_sv_remap_fpmadds_fft_svstep(self
):
196 """>>> lst = SVP64Asm( [
197 "svshape 8, 1, 1, 1, 1",
198 "svremap 31, 1, 0, 2, 0, 1",
199 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
200 "setvl. 0, 0, 0, 1, 0, 0",
203 runs a full in-place O(N log2 N) butterfly schedule for
204 Discrete Fourier Transform. this version however uses
205 SVP64 "Vertical-First" Mode and so needs an explicit
208 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
209 (3 inputs, 2 outputs)
212 "svshape 8, 1, 1, 1, 1",
213 "svremap 31, 1, 0, 2, 0, 1",
214 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
215 "setvl. 0, 0, 0, 1, 0, 0",
220 # array and coefficients to test
221 av
= [7.0, -9.8, 3.0, -32.3,
222 -2.0, 5.0, -9.8, 31.3] # array 0..7
223 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
227 for i
, c
in enumerate(coe
):
228 fprs
[i
+8] = fp64toselectable(c
)
229 for i
, a
in enumerate(av
):
230 fprs
[i
+0] = fp64toselectable(a
)
232 # set total. err don't know how to calculate how many there are...
233 # do it manually for now
239 tablestep
= n
// size
240 for i
in range(0, n
, size
):
241 for j
in range(i
, i
+ halfsize
):
245 # SVSTATE (calculated VL)
246 svstate
= SVP64State()
247 svstate
.vl
[0:7] = VL
# VL
248 svstate
.maxvl
[0:7] = VL
# MAXVL
249 print ("SVSTATE", bin(svstate
.spr
.asint()))
251 with
Program(lst
, bigendian
=False) as program
:
252 sim
= self
.run_tst_program(program
, svstate
=svstate
,
254 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
255 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
256 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
257 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
258 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
259 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
260 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
262 # work out the results with the twin mul/add-sub
263 res
= transform_radix2(av
, coe
)
265 for i
, expected
in enumerate(res
):
266 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
267 for i
, expected
in enumerate(res
):
268 # convert to Power single
269 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
270 expected
= float(expected
)
271 actual
= float(sim
.fpr(i
))
272 # approximate error calculation, good enough test
273 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
274 # and the rounding is different
275 err
= abs(actual
- expected
) / expected
276 self
.assertTrue(err
< 1e-7)
278 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self
):
279 """>>> lst = SVP64Asm( [
280 "svshape 8, 1, 1, 1, 1",
281 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
282 "svremap 5, 1, 0, 2, 0, 0",
283 "sv.fmuls 24, 0.v, 8.v",
284 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
285 "svremap 26, 0, 0, 0, 0, 1",
286 "sv.ffadds 0.v, 24, 0.v",
287 "setvl. 0, 0, 0, 1, 0, 0",
291 runs a full in-place O(N log2 N) butterfly schedule for
292 Discrete Fourier Transform. also uses "Vertical First"
293 but also uses temporary scalars and ffadds rather than
296 this represents an incremental step towards complex FFT
298 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
300 * single fmuls FRT, FRA, FRC
301 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
302 (FRS is implicit / hidden in ff* operations)
304 multiply: # sv.fmuls FRT, FRA, FRC
305 temp1 = vec[jh] * exptable[k]
307 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
308 vec[jh] = temp2 - temp1
309 vec[jl] = temp2 + temp1
312 "svshape 8, 1, 1, 1, 1",
313 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
314 "svremap 5, 1, 0, 2, 0, 0",
315 "sv.fmuls 24, 0.v, 8.v",
316 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
317 "svremap 26, 0, 0, 0, 0, 1",
318 "sv.ffadds 0.v, 24, 0.v",
319 "setvl. 0, 0, 0, 1, 0, 0",
324 # array and coefficients to test
325 av
= [7.0, -9.8, 3.0, -32.3,
326 -2.0, 5.0, -9.8, 31.3] # array 0..7
327 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
331 for i
, c
in enumerate(coe
):
332 fprs
[i
+8] = fp64toselectable(c
)
333 for i
, a
in enumerate(av
):
334 fprs
[i
+0] = fp64toselectable(a
)
336 # set total. err don't know how to calculate how many there are...
337 # do it manually for now
343 tablestep
= n
// size
344 for i
in range(0, n
, size
):
345 for j
in range(i
, i
+ halfsize
):
349 # SVSTATE (calculated VL)
350 svstate
= SVP64State()
351 svstate
.vl
[0:7] = VL
# VL
352 svstate
.maxvl
[0:7] = VL
# MAXVL
353 print ("SVSTATE", bin(svstate
.spr
.asint()))
355 with
Program(lst
, bigendian
=False) as program
:
356 sim
= self
.run_tst_program(program
, svstate
=svstate
,
358 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
359 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
360 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
361 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
362 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
363 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
364 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
366 # work out the results with the twin mul/add-sub
367 res
= transform_radix2(av
, coe
)
369 for i
, expected
in enumerate(res
):
370 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
371 for i
, expected
in enumerate(res
):
372 # convert to Power single
373 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
374 expected
= float(expected
)
375 actual
= float(sim
.fpr(i
))
376 # approximate error calculation, good enough test
377 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
378 # and the rounding is different
379 err
= abs(actual
- expected
) / expected
380 self
.assertTrue(err
< 1e-7)
382 def test_sv_fpmadds_fft(self
):
383 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
385 four in-place vector mul-adds, four in-place vector mul-subs
387 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
388 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
390 there is the *option* to target a different location (non-in-place)
393 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
394 FRS to perform the two multiplies. one add, one subtract.
396 sv.ffmadds FRT, FRA, FRC, FRB actually does:
397 fmadds FRT , FRA, FRC, FRA
398 fnmsubs FRT+vl, FRA, FRC, FRB+vl
400 lst
= SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
405 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
406 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
407 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
409 # work out the results with the twin mul/add-sub
410 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
411 fprs
[i
+2] = fp64toselectable(a
)
412 fprs
[i
+6] = fp64toselectable(b
)
413 fprs
[i
+10] = fp64toselectable(c
)
417 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
418 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
420 print ("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
422 # SVSTATE (in this case, VL=2)
423 svstate
= SVP64State()
424 svstate
.vl
[0:7] = 4 # VL
425 svstate
.maxvl
[0:7] = 4 # MAXVL
426 print ("SVSTATE", bin(svstate
.spr
.asint()))
428 with
Program(lst
, bigendian
=False) as program
:
429 sim
= self
.run_tst_program(program
, svstate
=svstate
,
431 # confirm that the results are as expected
432 for i
, (t
, u
) in enumerate(res
):
433 self
.assertEqual(sim
.fpr(i
+2), t
)
434 self
.assertEqual(sim
.fpr(i
+6), u
)
436 def test_sv_ffadds_fft(self
):
437 """>>> lst = ["sv.ffadds 2.v, 2.v, 2.v"
439 four in-place vector adds, four in-place vector subs
441 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
442 FRS to perform the two multiplies. one add, one subtract.
444 sv.ffadds FRT, FRA, FRB actually does:
446 fsubs FRT+vl, FRA, FRB+vl
448 lst
= SVP64Asm(["sv.ffadds 2.v, 2.v, 2.v"
453 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
454 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
456 # work out the results with the twin add-sub
457 for i
, (a
, b
) in enumerate(zip(av
, bv
)):
458 fprs
[i
+2] = fp64toselectable(a
)
459 fprs
[i
+6] = fp64toselectable(b
)
462 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
463 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
465 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
467 # SVSTATE (in this case, VL=2)
468 svstate
= SVP64State()
469 svstate
.vl
[0:7] = 4 # VL
470 svstate
.maxvl
[0:7] = 4 # MAXVL
471 print ("SVSTATE", bin(svstate
.spr
.asint()))
473 with
Program(lst
, bigendian
=False) as program
:
474 sim
= self
.run_tst_program(program
, svstate
=svstate
,
476 # confirm that the results are as expected
477 for i
, (t
, u
) in enumerate(res
):
478 a
= float(sim
.fpr(i
+2))
479 b
= float(sim
.fpr(i
+6))
482 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
483 for i
, (t
, u
) in enumerate(res
):
484 self
.assertEqual(sim
.fpr(i
+2), t
)
485 self
.assertEqual(sim
.fpr(i
+6), u
)
487 def test_sv_remap_fpmadds_fft_svstep_complex(self
):
489 runs a full in-place O(N log2 N) butterfly schedule for
490 Discrete Fourier Transform. this version however uses
491 SVP64 "Vertical-First" Mode and so needs an explicit
494 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
495 (3 inputs, 2 outputs)
497 complex calculation (FFT):
499 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
500 vec_r[jh] = vec_r[jl] - tpre
503 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
504 vec_i[jh] = vec_i[jl] - tpim
507 real-only calculation (DFT):
509 temp1 = vec[jh] * exptable[k]
511 vec[jh] = temp2 - temp1
512 vec[jl] = temp2 + temp1
515 # set triple butterfly mode
516 "svshape 8, 1, 1, 1, 1",
518 "svremap 5, 1, 0, 2, 0, 0",
519 "sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r
520 "svremap 5, 1, 0, 2, 0, 0",
521 "sv.fmuls 25, 8.v, 20.v", # mul2_r = i*sin_i
522 "fadds 24, 24, 25", # tpre = mul1_r + mul2_r
524 "svremap 5, 1, 0, 2, 0, 0",
525 "sv.fmuls 26, 0.v, 20.v", # mul1_i = r*sin_i
526 "svremap 5, 1, 0, 2, 0, 0",
527 "sv.fmuls 27, 8.v, 16.v", # mul2_i = i*cos_r
528 "fsubs 26, 27, 26", # tpim = mul2_i - mul1_i
530 "svremap 26, 0, 0, 0, 0, 1",
531 "sv.ffadds 0.v, 24, 0.v", # vh/vl +/- tpre
533 "svremap 26, 0, 0, 0, 0, 1",
534 "sv.ffadds 8.v, 26, 8.v", # vh/vl +- tpim
537 "setvl. 0, 0, 0, 1, 0, 0",
542 # array and coefficients to test
543 ar
= [7.0, -9.8, 3.0, -32.3,
544 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
545 ai
= [1.0, -1.8, 3.0, 19.3,
546 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
547 coer
= [-0.25, 0.5, 3.1, 6.2] # coefficients real
548 coei
= [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
552 for i
, a
in enumerate(ar
):
553 fprs
[i
+0] = fp64toselectable(a
)
554 for i
, a
in enumerate(ai
):
555 fprs
[i
+8] = fp64toselectable(a
)
556 for i
, cr
in enumerate(coer
):
557 fprs
[i
+16] = fp64toselectable(cr
)
558 for i
, ci
in enumerate(coei
):
559 fprs
[i
+20] = fp64toselectable(ci
)
561 # set total. err don't know how to calculate how many there are...
562 # do it manually for now
568 tablestep
= n
// size
569 for i
in range(0, n
, size
):
570 for j
in range(i
, i
+ halfsize
):
574 # SVSTATE (calculated VL)
575 svstate
= SVP64State()
576 svstate
.vl
[0:7] = VL
# VL
577 svstate
.maxvl
[0:7] = VL
# MAXVL
578 print ("SVSTATE", bin(svstate
.spr
.asint()))
580 with
Program(lst
, bigendian
=False) as program
:
581 sim
= self
.run_tst_program(program
, svstate
=svstate
,
583 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
584 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
585 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
586 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
587 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
588 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
589 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
591 # work out the results with the twin mul/add-sub, explicit
593 res_r
, res_i
= transform_radix2_complex(ar
, ai
, coer
, coei
)
595 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
596 print ("i", i
, float(sim
.fpr(i
)), float(sim
.fpr(i
+8)),
597 "expected_r", expected_r
,
598 "expected_i", expected_i
)
599 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
600 # convert to Power single
601 expected_r
= DOUBLE2SINGLE(fp64toselectable(expected_r
))
602 expected_r
= float(expected_r
)
603 actual_r
= float(sim
.fpr(i
))
604 # approximate error calculation, good enough test
605 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
606 # and the rounding is different
607 err
= abs(actual_r
- expected_r
) / expected_r
608 self
.assertTrue(err
< 1e-6)
609 # convert to Power single
610 expected_i
= DOUBLE2SINGLE(fp64toselectable(expected_i
))
611 expected_i
= float(expected_i
)
612 actual_i
= float(sim
.fpr(i
+8))
613 # approximate error calculation, good enough test
614 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
615 # and the rounding is different
616 err
= abs(actual_i
- expected_i
) / expected_i
617 self
.assertTrue(err
< 1e-6)
619 def test_sv_ffadds_fft_scalar(self
):
620 """>>> lst = ["sv.ffadds 2.v, 12, 13"
622 four in-place vector adds and subs, but done with a scalar
625 lst
= SVP64Asm(["sv.ffadds 2.v, 12, 13"
632 fprs
[12] = fp64toselectable(scalar_a
)
633 fprs
[13] = fp64toselectable(scalar_b
)
635 # work out the results with the twin add-sub
637 t
= scalar_b
+ scalar_a
638 u
= scalar_b
- scalar_a
639 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
640 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
642 print ("FFT", i
, "res", t
, u
)
644 # SVSTATE (in this case, VL=2)
645 svstate
= SVP64State()
646 svstate
.vl
[0:7] = 4 # VL
647 svstate
.maxvl
[0:7] = 4 # MAXVL
648 print ("SVSTATE", bin(svstate
.spr
.asint()))
650 with
Program(lst
, bigendian
=False) as program
:
651 sim
= self
.run_tst_program(program
, svstate
=svstate
,
653 # confirm that the results are as expected
654 for i
, (t
, u
) in enumerate(res
):
655 a
= float(sim
.fpr(i
+2))
656 b
= float(sim
.fpr(i
+6))
659 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
660 for i
, (t
, u
) in enumerate(res
):
661 self
.assertEqual(sim
.fpr(i
+2), t
)
662 self
.assertEqual(sim
.fpr(i
+6), u
)
664 def run_tst_program(self
, prog
, initial_regs
=None,
668 if initial_regs
is None:
669 initial_regs
= [0] * 32
670 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
671 initial_fprs
=initial_fprs
,
682 if __name__
== "__main__":