ffmuls test, had to add to b not a in expected results
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_fft.py
1 from nmigen import Module, Signal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 import unittest
5 from openpower.decoder.isa.caller import ISACaller
6 from openpower.decoder.power_decoder import (create_pdecode)
7 from openpower.decoder.power_decoder2 import (PowerDecode2)
8 from openpower.simulator.program import Program
9 from openpower.decoder.isa.caller import ISACaller, SVP64State
10 from openpower.decoder.selectable_int import SelectableInt
11 from openpower.decoder.orderedset import OrderedSet
12 from openpower.decoder.isa.all import ISA
13 from openpower.decoder.isa.test_caller import Register, run_tst
14 from openpower.sv.trans.svp64 import SVP64Asm
15 from openpower.consts import SVP64CROffs
16 from copy import deepcopy
17 from openpower.decoder.helpers import fp64toselectable
18 from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
19
20
21 def transform_radix2(vec, exptable):
22 """
23 # FFT and convolution test (Python), based on Project Nayuki
24 #
25 # Copyright (c) 2020 Project Nayuki. (MIT License)
26 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
27
28 """
29 # bits of the integer 'val'.
30 def reverse_bits(val, width):
31 result = 0
32 for _ in range(width):
33 result = (result << 1) | (val & 1)
34 val >>= 1
35 return result
36
37 # Initialization
38 n = len(vec)
39 levels = n.bit_length() - 1
40
41 # Copy with bit-reversed permutation
42 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
43
44 size = 2
45 while size <= n:
46 halfsize = size // 2
47 tablestep = n // size
48 for i in range(0, n, size):
49 k = 0
50 for j in range(i, i + halfsize):
51 # exact same actual computation, just embedded in
52 # triple-nested for-loops
53 jl, jh = j, j+halfsize
54 vjh = vec[jh]
55 temp1 = vec[jh] * exptable[k]
56 temp2 = vec[jl]
57 vec[jh] = temp2 - temp1
58 vec[jl] = temp2 + temp1
59 print ("xform jl jh k", jl, jh, k,
60 "vj vjh ek", temp2, vjh, exptable[k],
61 "t1, t2", temp1, temp2,
62 "v[jh] v[jl]", vec[jh], vec[jl])
63 k += tablestep
64 size *= 2
65
66 return vec
67
68
69 class DecoderTestCase(FHDLTestCase):
70
71 def _check_regs(self, sim, expected):
72 for i in range(32):
73 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
74
75 def test_sv_remap_fpmadds_fft(self):
76 """>>> lst = ["svremap 8, 1, 1, 1",
77 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
78 ]
79 runs a full in-place O(N log2 N) butterfly schedule for
80 Discrete Fourier Transform.
81
82 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
83 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
84
85 there is the *option* to target a different location (non-in-place)
86 just in case.
87
88 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
89 (3 inputs, 2 outputs)
90 """
91 lst = SVP64Asm( ["svremap 8, 1, 1, 1",
92 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
93 ])
94 lst = list(lst)
95
96 # array and coefficients to test
97 av = [7.0, -9.8, 3.0, -32.3,
98 -2.0, 5.0, -9.8, 31.3] # array 0..7
99 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
100
101 # store in regfile
102 fprs = [0] * 32
103 for i, c in enumerate(coe):
104 fprs[i+8] = fp64toselectable(c)
105 for i, a in enumerate(av):
106 fprs[i+0] = fp64toselectable(a)
107
108 # set total. err don't know how to calculate how many there are...
109 # do it manually for now
110 VL = 0
111 size = 2
112 n = len(av)
113 while size <= n:
114 halfsize = size // 2
115 tablestep = n // size
116 for i in range(0, n, size):
117 for j in range(i, i + halfsize):
118 VL += 1
119 size *= 2
120
121 # SVSTATE (calculated VL)
122 svstate = SVP64State()
123 svstate.vl[0:7] = VL # VL
124 svstate.maxvl[0:7] = VL # MAXVL
125 print ("SVSTATE", bin(svstate.spr.asint()))
126
127 with Program(lst, bigendian=False) as program:
128 sim = self.run_tst_program(program, svstate=svstate,
129 initial_fprs=fprs)
130 print ("spr svshape0", sim.spr['SVSHAPE0'])
131 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
132 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
133 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
134 print ("spr svshape1", sim.spr['SVSHAPE1'])
135 print ("spr svshape2", sim.spr['SVSHAPE2'])
136 print ("spr svshape3", sim.spr['SVSHAPE3'])
137
138 # work out the results with the twin mul/add-sub
139 res = transform_radix2(av, coe)
140
141 for i, expected in enumerate(res):
142 print ("i", i, float(sim.fpr(i)), "expected", expected)
143 for i, expected in enumerate(res):
144 # convert to Power single
145 expected = DOUBLE2SINGLE(fp64toselectable(expected))
146 expected = float(expected)
147 actual = float(sim.fpr(i))
148 # approximate error calculation, good enough test
149 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
150 # and the rounding is different
151 err = abs(actual - expected) / expected
152 self.assertTrue(err < 1e-7)
153
154
155 def test_sv_fpmadds_fft(self):
156 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
157 ]
158 four in-place vector mul-adds, four in-place vector mul-subs
159
160 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
161 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
162
163 there is the *option* to target a different location (non-in-place)
164 just in case.
165
166 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
167 FRS to perform the two multiplies. one add, one subtract.
168
169 sv.ffmadds FRT, FRA, FRC, FRB actually does:
170 fmadds FRT , FRA, FRC, FRA
171 fnmsubs FRT+vl, FRA, FRC, FRB+vl
172 """
173 lst = SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
174 ])
175 lst = list(lst)
176
177 fprs = [0] * 32
178 av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
179 bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
180 coe = [-1.0, 4.0, 3.1, 6.2] # coefficients
181 res = []
182 # work out the results with the twin mul/add-sub
183 for i, (a, b, c) in enumerate(zip(av, bv, coe)):
184 fprs[i+2] = fp64toselectable(a)
185 fprs[i+6] = fp64toselectable(b)
186 fprs[i+10] = fp64toselectable(c)
187 mul = a * c
188 t = b + mul
189 u = b - mul
190 t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
191 u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
192 res.append((t, u))
193 print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
194
195 # SVSTATE (in this case, VL=2)
196 svstate = SVP64State()
197 svstate.vl[0:7] = 4 # VL
198 svstate.maxvl[0:7] = 4 # MAXVL
199 print ("SVSTATE", bin(svstate.spr.asint()))
200
201 with Program(lst, bigendian=False) as program:
202 sim = self.run_tst_program(program, svstate=svstate,
203 initial_fprs=fprs)
204 # confirm that the results are as expected
205 for i, (t, u) in enumerate(res):
206 self.assertEqual(sim.fpr(i+2), t)
207 self.assertEqual(sim.fpr(i+6), u)
208
209 def run_tst_program(self, prog, initial_regs=None,
210 svstate=None,
211 initial_mem=None,
212 initial_fprs=None):
213 if initial_regs is None:
214 initial_regs = [0] * 32
215 simulator = run_tst(prog, initial_regs, mem=initial_mem,
216 initial_fprs=initial_fprs,
217 svstate=svstate)
218
219 print ("GPRs")
220 simulator.gpr.dump()
221 print ("FPRs")
222 simulator.fpr.dump()
223
224 return simulator
225
226
227 if __name__ == "__main__":
228 unittest.main()