62b8609e80ebee4ec4cf75cf149d62bd8d4f0f4d
1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.power_decoder
import (create_pdecode
)
6 from openpower
.simulator
.program
import Program
7 from openpower
.decoder
.isa
.caller
import SVP64State
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from copy
import deepcopy
12 from openpower
.decoder
.helpers
import fp64toselectable
13 from openpower
.decoder
.isafunctions
.double2single
import DOUBLE2SINGLE
16 def transform_radix2(vec
, exptable
):
18 # FFT and convolution test (Python), based on Project Nayuki
20 # Copyright (c) 2020 Project Nayuki. (MIT License)
21 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
24 # bits of the integer 'val'.
25 def reverse_bits(val
, width
):
27 for _
in range(width
):
28 result
= (result
<< 1) |
(val
& 1)
34 levels
= n
.bit_length() - 1
36 # Copy with bit-reversed permutation
37 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
43 for i
in range(0, n
, size
):
45 for j
in range(i
, i
+ halfsize
):
46 # exact same actual computation, just embedded in
47 # triple-nested for-loops
48 jl
, jh
= j
, j
+halfsize
50 temp1
= vec
[jh
] * exptable
[k
]
52 vec
[jh
] = temp2
- temp1
53 vec
[jl
] = temp2
+ temp1
54 print ("xform jl jh k", jl
, jh
, k
,
55 "vj vjh ek", temp2
, vjh
, exptable
[k
],
56 "t1, t2", temp1
, temp2
,
57 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
64 class FFTTestCase(FHDLTestCase
):
66 def _check_regs(self
, sim
, expected
):
68 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
70 def test_sv_remap_fpmadds_fft(self
):
71 """>>> lst = ["svremap 8, 1, 1, 1",
72 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
74 runs a full in-place O(N log2 N) butterfly schedule for
75 Discrete Fourier Transform.
77 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
78 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
80 there is the *option* to target a different location (non-in-place)
83 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
86 lst
= SVP64Asm( ["svremap 8, 1, 1, 1",
87 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
91 # array and coefficients to test
92 av
= [7.0, -9.8, 3.0, -32.3,
93 -2.0, 5.0, -9.8, 31.3] # array 0..7
94 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
98 for i
, c
in enumerate(coe
):
99 fprs
[i
+8] = fp64toselectable(c
)
100 for i
, a
in enumerate(av
):
101 fprs
[i
+0] = fp64toselectable(a
)
103 # set total. err don't know how to calculate how many there are...
104 # do it manually for now
110 tablestep
= n
// size
111 for i
in range(0, n
, size
):
112 for j
in range(i
, i
+ halfsize
):
116 # SVSTATE (calculated VL)
117 svstate
= SVP64State()
118 svstate
.vl
[0:7] = VL
# VL
119 svstate
.maxvl
[0:7] = VL
# MAXVL
120 print ("SVSTATE", bin(svstate
.spr
.asint()))
122 with
Program(lst
, bigendian
=False) as program
:
123 sim
= self
.run_tst_program(program
, svstate
=svstate
,
125 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
126 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
127 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
128 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
129 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
130 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
131 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
133 # work out the results with the twin mul/add-sub
134 res
= transform_radix2(av
, coe
)
136 for i
, expected
in enumerate(res
):
137 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
138 for i
, expected
in enumerate(res
):
139 # convert to Power single
140 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
141 expected
= float(expected
)
142 actual
= float(sim
.fpr(i
))
143 # approximate error calculation, good enough test
144 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
145 # and the rounding is different
146 err
= abs(actual
- expected
) / expected
147 self
.assertTrue(err
< 1e-7)
149 def test_sv_remap_fpmadds_fft_svstep(self
):
150 """>>> lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
151 "svremap 8, 1, 1, 1",
152 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
153 "setvl. 0, 0, 0, 1, 0, 0",
156 runs a full in-place O(N log2 N) butterfly schedule for
157 Discrete Fourier Transform. this version however uses
158 SVP64 "Vertical-First" Mode and so needs an explicit
161 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
162 (3 inputs, 2 outputs)
164 lst
= SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
165 "svremap 8, 1, 1, 1",
166 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
167 "setvl. 0, 0, 0, 1, 0, 0",
172 # array and coefficients to test
173 av
= [7.0, -9.8, 3.0, -32.3,
174 -2.0, 5.0, -9.8, 31.3] # array 0..7
175 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
179 for i
, c
in enumerate(coe
):
180 fprs
[i
+8] = fp64toselectable(c
)
181 for i
, a
in enumerate(av
):
182 fprs
[i
+0] = fp64toselectable(a
)
184 # set total. err don't know how to calculate how many there are...
185 # do it manually for now
191 tablestep
= n
// size
192 for i
in range(0, n
, size
):
193 for j
in range(i
, i
+ halfsize
):
197 # SVSTATE (calculated VL)
198 svstate
= SVP64State()
199 svstate
.vl
[0:7] = VL
# VL
200 svstate
.maxvl
[0:7] = VL
# MAXVL
201 print ("SVSTATE", bin(svstate
.spr
.asint()))
203 with
Program(lst
, bigendian
=False) as program
:
204 sim
= self
.run_tst_program(program
, svstate
=svstate
,
206 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
207 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
208 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
209 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
210 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
211 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
212 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
214 # work out the results with the twin mul/add-sub
215 res
= transform_radix2(av
, coe
)
217 for i
, expected
in enumerate(res
):
218 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
219 for i
, expected
in enumerate(res
):
220 # convert to Power single
221 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
222 expected
= float(expected
)
223 actual
= float(sim
.fpr(i
))
224 # approximate error calculation, good enough test
225 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
226 # and the rounding is different
227 err
= abs(actual
- expected
) / expected
228 self
.assertTrue(err
< 1e-7)
230 def test_sv_fpmadds_fft(self
):
231 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
233 four in-place vector mul-adds, four in-place vector mul-subs
235 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
236 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
238 there is the *option* to target a different location (non-in-place)
241 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
242 FRS to perform the two multiplies. one add, one subtract.
244 sv.ffmadds FRT, FRA, FRC, FRB actually does:
245 fmadds FRT , FRA, FRC, FRA
246 fnmsubs FRT+vl, FRA, FRC, FRB+vl
248 lst
= SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
253 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
254 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
255 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
257 # work out the results with the twin mul/add-sub
258 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
259 fprs
[i
+2] = fp64toselectable(a
)
260 fprs
[i
+6] = fp64toselectable(b
)
261 fprs
[i
+10] = fp64toselectable(c
)
265 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
266 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
268 print ("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
270 # SVSTATE (in this case, VL=2)
271 svstate
= SVP64State()
272 svstate
.vl
[0:7] = 4 # VL
273 svstate
.maxvl
[0:7] = 4 # MAXVL
274 print ("SVSTATE", bin(svstate
.spr
.asint()))
276 with
Program(lst
, bigendian
=False) as program
:
277 sim
= self
.run_tst_program(program
, svstate
=svstate
,
279 # confirm that the results are as expected
280 for i
, (t
, u
) in enumerate(res
):
281 self
.assertEqual(sim
.fpr(i
+2), t
)
282 self
.assertEqual(sim
.fpr(i
+6), u
)
284 def run_tst_program(self
, prog
, initial_regs
=None,
288 if initial_regs
is None:
289 initial_regs
= [0] * 32
290 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
291 initial_fprs
=initial_fprs
,
302 if __name__
== "__main__":