d7f9ae9a84ce7ece0fd0c63ad704a7fb4dd89e2b
1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.power_decoder
import (create_pdecode
)
6 from openpower
.simulator
.program
import Program
7 from openpower
.decoder
.isa
.caller
import SVP64State
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from copy
import deepcopy
12 from openpower
.decoder
.helpers
import fp64toselectable
13 from openpower
.decoder
.isafunctions
.double2single
import DOUBLE2SINGLE
16 def transform_radix2(vec
, exptable
):
18 # FFT and convolution test (Python), based on Project Nayuki
20 # Copyright (c) 2020 Project Nayuki. (MIT License)
21 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
24 # bits of the integer 'val'.
25 def reverse_bits(val
, width
):
27 for _
in range(width
):
28 result
= (result
<< 1) |
(val
& 1)
34 levels
= n
.bit_length() - 1
36 # Copy with bit-reversed permutation
37 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
43 for i
in range(0, n
, size
):
45 for j
in range(i
, i
+ halfsize
):
46 # exact same actual computation, just embedded in
47 # triple-nested for-loops
48 jl
, jh
= j
, j
+halfsize
50 temp1
= vec
[jh
] * exptable
[k
]
52 vec
[jh
] = temp2
- temp1
53 vec
[jl
] = temp2
+ temp1
54 print ("xform jl jh k", jl
, jh
, k
,
55 "vj vjh ek", temp2
, vjh
, exptable
[k
],
56 "t1, t2", temp1
, temp2
,
57 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
64 def transform_radix2_complex(vec_r
, vec_i
, cos_r
, sin_i
):
66 # FFT and convolution test (Python), based on Project Nayuki
68 # Copyright (c) 2020 Project Nayuki. (MIT License)
69 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
72 # bits of the integer 'val'.
73 def reverse_bits(val
, width
):
75 for _
in range(width
):
76 result
= (result
<< 1) |
(val
& 1)
82 levels
= n
.bit_length() - 1
84 # Copy with bit-reversed permutation
85 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
91 for i
in range(0, n
, size
):
93 for j
in range(i
, i
+ halfsize
):
94 # exact same actual computation, just embedded in
95 # triple-nested for-loops
96 jl
, jh
= j
, j
+halfsize
98 print ("xform jl jh k", jl
, jh
, k
,
99 "vr h l", vec_r
[jh
], vec_r
[jl
],
100 "vi h l", vec_i
[jh
], vec_i
[jl
])
101 print (" cr k", cos_r
[k
], "si k", sin_i
[k
])
102 tpre
= vec_r
[jh
] * cos_r
[k
] + vec_i
[jh
] * sin_i
[k
]
103 tpim
= -vec_r
[jh
] * sin_i
[k
] + vec_i
[jh
] * cos_r
[k
]
104 vec_r
[jh
] = vec_r
[jl
] - tpre
105 vec_i
[jh
] = vec_i
[jl
] - tpim
109 print (" xform jl jh k", jl
, jh
, k
,
110 "\n vr h l", vec_r
[jh
], vec_r
[jl
],
111 "\n vi h l", vec_i
[jh
], vec_i
[jl
])
118 class FFTTestCase(FHDLTestCase
):
120 def _check_regs(self
, sim
, expected
):
122 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
124 def test_sv_remap_fpmadds_fft(self
):
125 """>>> lst = ["svshape 8, 1, 1, 1, 0",
126 "svremap 31, 1, 0, 2, 0, 1",
127 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
129 runs a full in-place O(N log2 N) butterfly schedule for
130 Discrete Fourier Transform.
132 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
133 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
135 there is the *option* to target a different location (non-in-place)
138 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
139 (3 inputs, 2 outputs)
141 lst
= SVP64Asm( ["svshape 8, 1, 1, 1, 0",
142 "svremap 31, 1, 0, 2, 0, 1",
143 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
147 # array and coefficients to test
148 av
= [7.0, -9.8, 3.0, -32.3,
149 -2.0, 5.0, -9.8, 31.3] # array 0..7
150 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
154 for i
, c
in enumerate(coe
):
155 fprs
[i
+8] = fp64toselectable(c
)
156 for i
, a
in enumerate(av
):
157 fprs
[i
+0] = fp64toselectable(a
)
159 with
Program(lst
, bigendian
=False) as program
:
160 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
161 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
162 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
163 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
164 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
165 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
166 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
167 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
169 # work out the results with the twin mul/add-sub
170 res
= transform_radix2(av
, coe
)
172 for i
, expected
in enumerate(res
):
173 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
174 for i
, expected
in enumerate(res
):
175 # convert to Power single
176 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
177 expected
= float(expected
)
178 actual
= float(sim
.fpr(i
))
179 # approximate error calculation, good enough test
180 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
181 # and the rounding is different
182 err
= abs(actual
- expected
) / expected
183 self
.assertTrue(err
< 1e-7)
185 def test_sv_remap_fpmadds_fft_svstep(self
):
186 """>>> lst = SVP64Asm( [
187 "svshape 8, 1, 1, 1, 1",
188 "svremap 31, 1, 0, 2, 0, 1",
189 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
190 "setvl. 0, 0, 0, 1, 0, 0",
193 runs a full in-place O(N log2 N) butterfly schedule for
194 Discrete Fourier Transform. this version however uses
195 SVP64 "Vertical-First" Mode and so needs an explicit
198 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
199 (3 inputs, 2 outputs)
202 "svshape 8, 1, 1, 1, 1",
203 "svremap 31, 1, 0, 2, 0, 1",
204 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
205 "setvl. 0, 0, 0, 1, 0, 0",
210 # array and coefficients to test
211 av
= [7.0, -9.8, 3.0, -32.3,
212 -2.0, 5.0, -9.8, 31.3] # array 0..7
213 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
217 for i
, c
in enumerate(coe
):
218 fprs
[i
+8] = fp64toselectable(c
)
219 for i
, a
in enumerate(av
):
220 fprs
[i
+0] = fp64toselectable(a
)
222 # set total. err don't know how to calculate how many there are...
223 # do it manually for now
229 tablestep
= n
// size
230 for i
in range(0, n
, size
):
231 for j
in range(i
, i
+ halfsize
):
235 # SVSTATE (calculated VL)
236 svstate
= SVP64State()
237 svstate
.vl
[0:7] = VL
# VL
238 svstate
.maxvl
[0:7] = VL
# MAXVL
239 print ("SVSTATE", bin(svstate
.spr
.asint()))
241 with
Program(lst
, bigendian
=False) as program
:
242 sim
= self
.run_tst_program(program
, svstate
=svstate
,
244 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
245 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
246 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
247 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
248 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
249 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
250 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
252 # work out the results with the twin mul/add-sub
253 res
= transform_radix2(av
, coe
)
255 for i
, expected
in enumerate(res
):
256 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
257 for i
, expected
in enumerate(res
):
258 # convert to Power single
259 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
260 expected
= float(expected
)
261 actual
= float(sim
.fpr(i
))
262 # approximate error calculation, good enough test
263 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
264 # and the rounding is different
265 err
= abs(actual
- expected
) / expected
266 self
.assertTrue(err
< 1e-7)
268 def test_sv_fpmadds_fft(self
):
269 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
271 four in-place vector mul-adds, four in-place vector mul-subs
273 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
274 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
276 there is the *option* to target a different location (non-in-place)
279 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
280 FRS to perform the two multiplies. one add, one subtract.
282 sv.ffmadds FRT, FRA, FRC, FRB actually does:
283 fmadds FRT , FRA, FRC, FRA
284 fnmsubs FRT+vl, FRA, FRC, FRB+vl
286 lst
= SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
291 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
292 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
293 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
295 # work out the results with the twin mul/add-sub
296 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
297 fprs
[i
+2] = fp64toselectable(a
)
298 fprs
[i
+6] = fp64toselectable(b
)
299 fprs
[i
+10] = fp64toselectable(c
)
303 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
304 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
306 print ("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
308 # SVSTATE (in this case, VL=2)
309 svstate
= SVP64State()
310 svstate
.vl
[0:7] = 4 # VL
311 svstate
.maxvl
[0:7] = 4 # MAXVL
312 print ("SVSTATE", bin(svstate
.spr
.asint()))
314 with
Program(lst
, bigendian
=False) as program
:
315 sim
= self
.run_tst_program(program
, svstate
=svstate
,
317 # confirm that the results are as expected
318 for i
, (t
, u
) in enumerate(res
):
319 self
.assertEqual(sim
.fpr(i
+2), t
)
320 self
.assertEqual(sim
.fpr(i
+6), u
)
322 def test_sv_ffadds_fft(self
):
323 """>>> lst = ["sv.ffadds 2.v, 2.v, 2.v"
325 four in-place vector adds, four in-place vector subs
327 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
328 FRS to perform the two multiplies. one add, one subtract.
330 sv.ffadds FRT, FRA, FRB actually does:
332 fsubs FRT+vl, FRA, FRB+vl
334 lst
= SVP64Asm(["sv.ffadds 2.v, 2.v, 2.v"
339 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
340 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
342 # work out the results with the twin add-sub
343 for i
, (a
, b
) in enumerate(zip(av
, bv
)):
344 fprs
[i
+2] = fp64toselectable(a
)
345 fprs
[i
+6] = fp64toselectable(b
)
348 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
349 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
351 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
353 # SVSTATE (in this case, VL=2)
354 svstate
= SVP64State()
355 svstate
.vl
[0:7] = 4 # VL
356 svstate
.maxvl
[0:7] = 4 # MAXVL
357 print ("SVSTATE", bin(svstate
.spr
.asint()))
359 with
Program(lst
, bigendian
=False) as program
:
360 sim
= self
.run_tst_program(program
, svstate
=svstate
,
362 # confirm that the results are as expected
363 for i
, (t
, u
) in enumerate(res
):
364 a
= float(sim
.fpr(i
+2))
365 b
= float(sim
.fpr(i
+6))
368 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
369 for i
, (t
, u
) in enumerate(res
):
370 self
.assertEqual(sim
.fpr(i
+2), t
)
371 self
.assertEqual(sim
.fpr(i
+6), u
)
373 def tst_sv_remap_fpmadds_fft_svstep_complex(self
):
375 runs a full in-place O(N log2 N) butterfly schedule for
376 Discrete Fourier Transform. this version however uses
377 SVP64 "Vertical-First" Mode and so needs an explicit
380 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
381 (3 inputs, 2 outputs)
385 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
386 vec_r[jh] = vec_r[jl] - tpre
389 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
390 vec_i[jh] = vec_i[jl] - tpim
393 temp1 = vec[jh] * exptable[k]
395 vec[jh] = temp2 - temp1
396 vec[jl] = temp2 + temp1
398 lst
= SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
399 "svshape 8, 1, 1, 1, 1",
401 "svremap 31, 1, 0, 2, 0, 1",
402 "sv.fmuls 24, 0.v, 16.v",
403 "svremap 31, 1, 0, 2, 0, 1",
404 "sv.fmuls 25, 8.v, 20.v",
407 "svremap 31, 1, 0, 2, 0, 1",
408 "sv.fmuls 26, 0.v, 20.v",
409 "svremap 31, 1, 0, 2, 0, 1",
410 "sv.fmuls 26, 8.v, 16.v",
413 "svremap 31, 1, 0, 2, 0, 1",
414 "sv.ffadds 0.v, 24, 25",
416 "svremap 31, 1, 0, 2, 0, 1",
417 "sv.ffadds 8.v, 26, 27",
420 "setvl. 0, 0, 0, 1, 0, 0",
425 # array and coefficients to test
426 ar
= [7.0, -9.8, 3.0, -32.3,
427 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
428 ai
= [1.0, -1.8, 3.0, 19.3,
429 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
430 coer
= [-0.25, 0.5, 3.1, 6.2] # coefficients real
431 coei
= [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
435 for i
, a
in enumerate(ar
):
436 fprs
[i
+0] = fp64toselectable(a
)
437 for i
, a
in enumerate(ai
):
438 fprs
[i
+8] = fp64toselectable(a
)
439 for i
, c
in enumerate(coer
):
440 fprs
[i
+16] = fp64toselectable(c
)
441 for i
, c
in enumerate(coei
):
442 fprs
[i
+20] = fp64toselectable(c
)
444 # set total. err don't know how to calculate how many there are...
445 # do it manually for now
451 tablestep
= n
// size
452 for i
in range(0, n
, size
):
453 for j
in range(i
, i
+ halfsize
):
457 # SVSTATE (calculated VL)
458 svstate
= SVP64State()
459 svstate
.vl
[0:7] = VL
# VL
460 svstate
.maxvl
[0:7] = VL
# MAXVL
461 print ("SVSTATE", bin(svstate
.spr
.asint()))
463 with
Program(lst
, bigendian
=False) as program
:
464 sim
= self
.run_tst_program(program
, svstate
=svstate
,
466 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
467 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
468 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
469 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
470 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
471 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
472 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
474 # work out the results with the twin mul/add-sub, explicit
476 res_r
, res_i
= transform_radix2_complex(ar
, ai
, coer
, coei
)
478 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
479 print ("i", i
, float(sim
.fpr(i
)), float(sim
.fpr(i
+8)),
480 "expected_r", expected_r
,
481 "expected_i", expected_i
)
482 for i
, (expected_r
, expected_i
) in enumerate(zip(res_r
, res_i
)):
483 # convert to Power single
484 expected_r
= DOUBLE2SINGLE(fp64toselectable(expected_r
))
485 expected_r
= float(expected_r
)
486 actual_r
= float(sim
.fpr(i
))
487 # approximate error calculation, good enough test
488 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
489 # and the rounding is different
490 err
= abs(actual_r
- expected_r
) / expected_r
491 self
.assertTrue(err
< 1e-7)
492 # convert to Power single
493 expected_i
= DOUBLE2SINGLE(fp64toselectable(expected_i
))
494 expected_i
= float(expected_i
)
495 actual_i
= float(sim
.fpr(i
+8))
496 # approximate error calculation, good enough test
497 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
498 # and the rounding is different
499 err
= abs(actual_i
- expected_i
) / expected_i
500 self
.assertTrue(err
< 1e-7)
502 def test_sv_ffadds_fft_scalar(self
):
503 """>>> lst = ["sv.ffadds 2.v, 12, 13"
505 four in-place vector adds and subs, but done with a scalar
508 lst
= SVP64Asm(["sv.ffadds 2.v, 12, 13"
515 fprs
[12] = fp64toselectable(scalar_a
)
516 fprs
[13] = fp64toselectable(scalar_b
)
518 # work out the results with the twin add-sub
520 t
= scalar_b
+ scalar_a
521 u
= scalar_b
- scalar_a
522 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
523 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
525 print ("FFT", i
, "res", t
, u
)
527 # SVSTATE (in this case, VL=2)
528 svstate
= SVP64State()
529 svstate
.vl
[0:7] = 4 # VL
530 svstate
.maxvl
[0:7] = 4 # MAXVL
531 print ("SVSTATE", bin(svstate
.spr
.asint()))
533 with
Program(lst
, bigendian
=False) as program
:
534 sim
= self
.run_tst_program(program
, svstate
=svstate
,
536 # confirm that the results are as expected
537 for i
, (t
, u
) in enumerate(res
):
538 a
= float(sim
.fpr(i
+2))
539 b
= float(sim
.fpr(i
+6))
542 print ("FFT", i
, "in", a
, b
, "res", t
, u
)
543 for i
, (t
, u
) in enumerate(res
):
544 self
.assertEqual(sim
.fpr(i
+2), t
)
545 self
.assertEqual(sim
.fpr(i
+6), u
)
547 def run_tst_program(self
, prog
, initial_regs
=None,
551 if initial_regs
is None:
552 initial_regs
= [0] * 32
553 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
554 initial_fprs
=initial_fprs
,
565 if __name__
== "__main__":