1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.isa
.caller
import ISACaller
6 from openpower
.decoder
.power_decoder
import (create_pdecode
)
7 from openpower
.decoder
.power_decoder2
import (PowerDecode2
)
8 from openpower
.simulator
.program
import Program
9 from openpower
.decoder
.isa
.caller
import ISACaller
, SVP64State
10 from openpower
.decoder
.selectable_int
import SelectableInt
11 from openpower
.decoder
.orderedset
import OrderedSet
12 from openpower
.decoder
.isa
.all
import ISA
13 from openpower
.decoder
.isa
.test_caller
import Register
, run_tst
14 from openpower
.sv
.trans
.svp64
import SVP64Asm
15 from openpower
.consts
import SVP64CROffs
16 from copy
import deepcopy
17 from openpower
.decoder
.helpers
import fp64toselectable
18 from openpower
.decoder
.isafunctions
.double2single
import DOUBLE2SINGLE
21 def transform_radix2(vec
, exptable
):
23 # FFT and convolution test (Python), based on Project Nayuki
25 # Copyright (c) 2020 Project Nayuki. (MIT License)
26 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
29 # bits of the integer 'val'.
30 def reverse_bits(val
, width
):
32 for _
in range(width
):
33 result
= (result
<< 1) |
(val
& 1)
39 levels
= n
.bit_length() - 1
41 # Copy with bit-reversed permutation
42 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
48 for i
in range(0, n
, size
):
50 for j
in range(i
, i
+ halfsize
):
51 # exact same actual computation, just embedded in
52 # triple-nested for-loops
53 jl
, jh
= j
, j
+halfsize
55 temp1
= vec
[jh
] * exptable
[k
]
57 vec
[jh
] = temp2
- temp1
58 vec
[jl
] = temp2
+ temp1
59 print ("xform jl jh k", jl
, jh
, k
,
60 "vj vjh ek", temp2
, vjh
, exptable
[k
],
61 "t1, t2", temp1
, temp2
,
62 "v[jh] v[jl]", vec
[jh
], vec
[jl
])
69 class DecoderTestCase(FHDLTestCase
):
71 def _check_regs(self
, sim
, expected
):
73 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
75 def test_sv_remap_fpmadds_fft(self
):
76 """>>> lst = ["svremap 8, 1, 1, 1",
77 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
79 runs a full in-place O(N log2 N) butterfly schedule for
80 Discrete Fourier Transform.
82 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
83 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
85 there is the *option* to target a different location (non-in-place)
88 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
91 lst
= SVP64Asm( ["svremap 8, 1, 1, 1",
92 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
96 # array and coefficients to test
97 av
= [7.0, -9.8, 3.0, -32.3,
98 -2.0, 5.0, -9.8, 31.3] # array 0..7
99 coe
= [-0.25, 0.5, 3.1, 6.2] # coefficients
103 for i
, c
in enumerate(coe
):
104 fprs
[i
+8] = fp64toselectable(c
)
105 for i
, a
in enumerate(av
):
106 fprs
[i
+0] = fp64toselectable(a
)
108 # set total. err don't know how to calculate how many there are...
109 # do it manually for now
115 tablestep
= n
// size
116 for i
in range(0, n
, size
):
117 for j
in range(i
, i
+ halfsize
):
121 # SVSTATE (calculated VL)
122 svstate
= SVP64State()
123 svstate
.vl
[0:7] = VL
# VL
124 svstate
.maxvl
[0:7] = VL
# MAXVL
125 print ("SVSTATE", bin(svstate
.spr
.asint()))
127 with
Program(lst
, bigendian
=False) as program
:
128 sim
= self
.run_tst_program(program
, svstate
=svstate
,
130 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
131 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
132 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
133 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
134 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
135 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
136 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
138 # work out the results with the twin mul/add-sub
139 res
= transform_radix2(av
, coe
)
141 for i
, expected
in enumerate(res
):
142 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
143 for i
, expected
in enumerate(res
):
144 # convert to Power single
145 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
146 expected
= float(expected
)
147 actual
= float(sim
.fpr(i
))
148 # approximate error calculation, good enough test
149 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
150 # and the rounding is different
151 err
= abs(actual
- expected
) / expected
152 self
.assertTrue(err
< 1e-7)
155 def test_sv_fpmadds_fft(self
):
156 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
158 four in-place vector mul-adds, four in-place vector mul-subs
160 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
161 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
163 there is the *option* to target a different location (non-in-place)
166 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
167 FRS to perform the two multiplies. one add, one subtract.
169 sv.ffmadds FRT, FRA, FRC, FRB actually does:
170 fmadds FRT , FRA, FRC, FRA
171 fnmsubs FRT+vl, FRA, FRC, FRB+vl
173 lst
= SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
178 av
= [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
179 bv
= [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
180 coe
= [-1.0, 4.0, 3.1, 6.2] # coefficients
182 # work out the results with the twin mul/add-sub
183 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, coe
)):
184 fprs
[i
+2] = fp64toselectable(a
)
185 fprs
[i
+6] = fp64toselectable(b
)
186 fprs
[i
+10] = fp64toselectable(c
)
190 t
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
191 u
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
193 print ("FFT", i
, "in", a
, b
, "coeff", c
, "mul", mul
, "res", t
, u
)
195 # SVSTATE (in this case, VL=2)
196 svstate
= SVP64State()
197 svstate
.vl
[0:7] = 4 # VL
198 svstate
.maxvl
[0:7] = 4 # MAXVL
199 print ("SVSTATE", bin(svstate
.spr
.asint()))
201 with
Program(lst
, bigendian
=False) as program
:
202 sim
= self
.run_tst_program(program
, svstate
=svstate
,
204 # confirm that the results are as expected
205 for i
, (t
, u
) in enumerate(res
):
206 self
.assertEqual(sim
.fpr(i
+2), t
)
207 self
.assertEqual(sim
.fpr(i
+6), u
)
209 def run_tst_program(self
, prog
, initial_regs
=None,
213 if initial_regs
is None:
214 initial_regs
= [0] * 32
215 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
216 initial_fprs
=initial_fprs
,
227 if __name__
== "__main__":