ffmuls test, had to add to b not a in expected results
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_fft.py
1 from nmigen import Module, Signal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 import unittest
5 from openpower.decoder.isa.caller import ISACaller
6 from openpower.decoder.power_decoder import (create_pdecode)
7 from openpower.decoder.power_decoder2 import (PowerDecode2)
8 from openpower.simulator.program import Program
9 from openpower.decoder.isa.caller import ISACaller, SVP64State
10 from openpower.decoder.selectable_int import SelectableInt
11 from openpower.decoder.orderedset import OrderedSet
12 from openpower.decoder.isa.all import ISA
13 from openpower.decoder.isa.test_caller import Register, run_tst
14 from openpower.sv.trans.svp64 import SVP64Asm
15 from openpower.consts import SVP64CROffs
16 from copy import deepcopy
17 from openpower.decoder.helpers import fp64toselectable
18 from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
21 def transform_radix2(vec, exptable):
22 """
23 # FFT and convolution test (Python), based on Project Nayuki
24 #
25 # Copyright (c) 2020 Project Nayuki. (MIT License)
26 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
28 """
29 # bits of the integer 'val'.
30 def reverse_bits(val, width):
31 result = 0
32 for _ in range(width):
33 result = (result << 1) | (val & 1)
34 val >>= 1
35 return result
37 # Initialization
38 n = len(vec)
39 levels = n.bit_length() - 1
41 # Copy with bit-reversed permutation
42 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
44 size = 2
45 while size <= n:
46 halfsize = size // 2
47 tablestep = n // size
48 for i in range(0, n, size):
49 k = 0
50 for j in range(i, i + halfsize):
51 # exact same actual computation, just embedded in
52 # triple-nested for-loops
53 jl, jh = j, j+halfsize
54 vjh = vec[jh]
55 temp1 = vec[jh] * exptable[k]
56 temp2 = vec[jl]
57 vec[jh] = temp2 - temp1
58 vec[jl] = temp2 + temp1
59 print ("xform jl jh k", jl, jh, k,
60 "vj vjh ek", temp2, vjh, exptable[k],
61 "t1, t2", temp1, temp2,
62 "v[jh] v[jl]", vec[jh], vec[jl])
63 k += tablestep
64 size *= 2
66 return vec
69 class DecoderTestCase(FHDLTestCase):
71 def _check_regs(self, sim, expected):
72 for i in range(32):
73 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
75 def test_sv_remap_fpmadds_fft(self):
76 """>>> lst = ["svremap 8, 1, 1, 1",
77 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
78 ]
79 runs a full in-place O(N log2 N) butterfly schedule for
80 Discrete Fourier Transform.
82 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
83 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
85 there is the *option* to target a different location (non-in-place)
86 just in case.
88 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
89 (3 inputs, 2 outputs)
90 """
91 lst = SVP64Asm( ["svremap 8, 1, 1, 1",
92 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
93 ])
94 lst = list(lst)
96 # array and coefficients to test
97 av = [7.0, -9.8, 3.0, -32.3,
98 -2.0, 5.0, -9.8, 31.3] # array 0..7
99 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
101 # store in regfile
102 fprs = [0] * 32
103 for i, c in enumerate(coe):
104 fprs[i+8] = fp64toselectable(c)
105 for i, a in enumerate(av):
106 fprs[i+0] = fp64toselectable(a)
108 # set total. err don't know how to calculate how many there are...
109 # do it manually for now
110 VL = 0
111 size = 2
112 n = len(av)
113 while size <= n:
114 halfsize = size // 2
115 tablestep = n // size
116 for i in range(0, n, size):
117 for j in range(i, i + halfsize):
118 VL += 1
119 size *= 2
121 # SVSTATE (calculated VL)
122 svstate = SVP64State()
123 svstate.vl[0:7] = VL # VL
124 svstate.maxvl[0:7] = VL # MAXVL
125 print ("SVSTATE", bin(svstate.spr.asint()))
127 with Program(lst, bigendian=False) as program:
128 sim = self.run_tst_program(program, svstate=svstate,
129 initial_fprs=fprs)
130 print ("spr svshape0", sim.spr['SVSHAPE0'])
131 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
132 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
133 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
134 print ("spr svshape1", sim.spr['SVSHAPE1'])
135 print ("spr svshape2", sim.spr['SVSHAPE2'])
136 print ("spr svshape3", sim.spr['SVSHAPE3'])
138 # work out the results with the twin mul/add-sub
139 res = transform_radix2(av, coe)
141 for i, expected in enumerate(res):
142 print ("i", i, float(sim.fpr(i)), "expected", expected)
143 for i, expected in enumerate(res):
144 # convert to Power single
145 expected = DOUBLE2SINGLE(fp64toselectable(expected))
146 expected = float(expected)
147 actual = float(sim.fpr(i))
148 # approximate error calculation, good enough test
149 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
150 # and the rounding is different
151 err = abs(actual - expected) / expected
152 self.assertTrue(err < 1e-7)
155 def test_sv_fpmadds_fft(self):
156 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
157 ]
158 four in-place vector mul-adds, four in-place vector mul-subs
160 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
161 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
163 there is the *option* to target a different location (non-in-place)
164 just in case.
166 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
167 FRS to perform the two multiplies. one add, one subtract.
169 sv.ffmadds FRT, FRA, FRC, FRB actually does:
170 fmadds FRT , FRA, FRC, FRA
171 fnmsubs FRT+vl, FRA, FRC, FRB+vl
172 """
173 lst = SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
174 ])
175 lst = list(lst)
177 fprs = [0] * 32
178 av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
179 bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
180 coe = [-1.0, 4.0, 3.1, 6.2] # coefficients
181 res = []
182 # work out the results with the twin mul/add-sub
183 for i, (a, b, c) in enumerate(zip(av, bv, coe)):
184 fprs[i+2] = fp64toselectable(a)
185 fprs[i+6] = fp64toselectable(b)
186 fprs[i+10] = fp64toselectable(c)
187 mul = a * c
188 t = b + mul
189 u = b - mul
190 t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
191 u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
192 res.append((t, u))
193 print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
195 # SVSTATE (in this case, VL=2)
196 svstate = SVP64State()
197 svstate.vl[0:7] = 4 # VL
198 svstate.maxvl[0:7] = 4 # MAXVL
199 print ("SVSTATE", bin(svstate.spr.asint()))
201 with Program(lst, bigendian=False) as program:
202 sim = self.run_tst_program(program, svstate=svstate,
203 initial_fprs=fprs)
204 # confirm that the results are as expected
205 for i, (t, u) in enumerate(res):
206 self.assertEqual(sim.fpr(i+2), t)
207 self.assertEqual(sim.fpr(i+6), u)
209 def run_tst_program(self, prog, initial_regs=None,
210 svstate=None,
211 initial_mem=None,
212 initial_fprs=None):
213 if initial_regs is None:
214 initial_regs = [0] * 32
215 simulator = run_tst(prog, initial_regs, mem=initial_mem,
216 initial_fprs=initial_fprs,
217 svstate=svstate)
219 print ("GPRs")
220 simulator.gpr.dump()
221 print ("FPRs")
222 simulator.fpr.dump()
224 return simulator
227 if __name__ == "__main__":
228 unittest.main()