1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.power_decoder
import (create_pdecode
)
6 from openpower
.simulator
.program
import Program
7 from openpower
.decoder
.isa
.caller
import SVP64State
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from copy
import deepcopy
12 from openpower
.decoder
.helpers
import fp64toselectable
, SINGLE
13 from openpower
.decoder
.isafunctions
.double2single
import DOUBLE2SINGLE
14 from openpower
.decoder
.isa
.remap_dct_yield
import (halfrev2
, reverse_bits
,
15 iterate_dct_inner_butterfly_indices
)
18 def transform_inner_radix2(vec
, ctable
):
23 print ("transform2", n
)
24 levels
= n
.bit_length() - 1
26 # reference (read/write) the in-place data in *reverse-bit-order*
28 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
30 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
31 # TODO: merge these two
32 vec
= halfrev2(vec
, False)
33 vec
= [vec
[ri
[i
]] for i
in range(n
)]
47 SVSHAPE0
.lims
= [xdim
, ydim
, zdim
]
48 SVSHAPE0
.order
= [0,1,2] # experiment with different permutations, here
50 SVSHAPE0
.submode2
= 0b01
52 SVSHAPE0
.offset
= 0 # experiment with different offset, here
53 SVSHAPE0
.invxyz
= [1,0,0] # inversion if desired
56 SVSHAPE1
.lims
= [xdim
, ydim
, zdim
]
57 SVSHAPE1
.order
= [0,1,2] # experiment with different permutations, here
59 SVSHAPE1
.submode2
= 0b01
61 SVSHAPE1
.offset
= 0 # experiment with different offset, here
62 SVSHAPE1
.invxyz
= [1,0,0] # inversion if desired
64 # enumerate over the iterator function, getting new indices
65 i0
= iterate_dct_inner_butterfly_indices(SVSHAPE0
)
66 i1
= iterate_dct_inner_butterfly_indices(SVSHAPE1
)
67 for k
, ((jl
, jle
), (jh
, jhe
)) in enumerate(zip(i0
, i1
)):
68 t1
, t2
= vec
[jl
], vec
[jh
]
71 vec
[jh
] = (t1
- t2
) * (1/coeff
)
72 print ("coeff", "ci", k
,
74 "i/n", (k
+0.5), coeff
, vec
[jl
], vec
[jh
],
75 "end", bin(jle
), bin(jhe
))
76 if jle
== 0b111: # all loops end
82 class DCTTestCase(FHDLTestCase
):
84 def _check_regs(self
, sim
, expected
):
86 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
88 def tst_sv_ffadds_dct(self
):
89 """>>> lst = ["sv.fdmadds 0.v, 0.v, 0.v, 8.v"
91 four in-place vector adds, four in-place vector mul-subs
93 SVP64 "DCT" mode will *automatically* offset FRB and an implicit
94 FRS to perform the two multiplies. one add, one subtract.
96 sv.fdadds FRT, FRA, FRC, FRB actually does:
98 fsubs FRT+vl, FRA, FRB+vl
100 lst
= SVP64Asm(["sv.fdmadds 0.v, 0.v, 0.v, 8.v"
104 # cheat here with these values, they're selected so that
105 # rounding errors do not occur. sigh.
107 av
= [7.0, -0.8, 2.0, -2.3] # first half of array 0..3
108 bv
= [-2.0, 2.0, -0.8, 1.4] # second half of array 4..7
109 cv
= [-1.0, 0.5, 2.5, -0.25] # coefficients
111 # work out the results with the twin add-sub
112 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, cv
)):
113 fprs
[i
+0] = fp64toselectable(a
)
114 fprs
[i
+4] = fp64toselectable(b
)
115 fprs
[i
+8] = fp64toselectable(c
)
116 # this isn't quite a perfect replication of the
117 # FP32 mul-add-sub. better really to use FPMUL32, FPADD32
118 # and FPSUB32 directly to be honest.
121 diff
= DOUBLE2SINGLE(fp64toselectable(diff
)) # FP32 round
124 tc
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
125 uc
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
127 print ("DCT", i
, "in", a
, b
, "c", c
, "res", t
, u
)
129 # SVSTATE (in this case, VL=2)
130 svstate
= SVP64State()
132 svstate
.maxvl
= 4 # MAXVL
133 print ("SVSTATE", bin(svstate
.asint()))
135 with
Program(lst
, bigendian
=False) as program
:
136 sim
= self
.run_tst_program(program
, svstate
=svstate
,
138 # confirm that the results are as expected
139 for i
, (t
, u
) in enumerate(res
):
140 a
= float(sim
.fpr(i
+0))
141 b
= float(sim
.fpr(i
+4))
144 print ("DCT", i
, "in", a
, b
, "res", t
, u
)
145 for i
, (t
, u
) in enumerate(res
):
146 self
.assertEqual(sim
.fpr(i
+0), t
)
147 self
.assertEqual(sim
.fpr(i
+4), u
)
149 def test_sv_remap_fpmadds_dct(self
):
150 """>>> lst = ["svshape 4, 1, 1, 2, 0",
151 "svremap 31, 1, 0, 2, 0, 1, 0",
152 "sv.fdmadds 0.v, 0.v, 0.v, 8.v"
154 runs a full in-place O(N log2 N) butterfly schedule for
157 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
158 (3 inputs, 2 outputs)
160 lst
= SVP64Asm( ["svshape 4, 1, 1, 2, 0",
161 "svremap 31, 1, 0, 2, 0, 1, 0",
162 "sv.fdmadds 0.v, 0.v, 0.v, 8.v"
166 # array and coefficients to test
168 av
= [7.0, -9.8, 3.0, -32.3]
169 coe
= [-0.25, 0.5, 3.1, 6.2] # 4 coefficients
171 levels
= n
.bit_length() - 1
173 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
174 avi
= [7.0, -0.8, 2.0, -2.3] # first half of array 0..3
175 av
= halfrev2(avi
, False)
176 av
= [av
[ri
[i
]] for i
in range(n
)]
180 for i
, c
in enumerate(coe
):
181 fprs
[i
+8] = fp64toselectable(c
)
182 for i
, a
in enumerate(av
):
183 fprs
[i
+0] = fp64toselectable(a
)
185 with
Program(lst
, bigendian
=False) as program
:
186 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
187 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
188 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
189 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
190 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
191 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
192 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
193 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
195 # work out the results with the twin mul/add-sub
196 res
= transform_inner_radix2(avi
, coe
)
198 for i
, expected
in enumerate(res
):
199 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
200 for i
, expected
in enumerate(res
):
201 # convert to Power single
202 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
203 expected
= float(expected
)
204 actual
= float(sim
.fpr(i
))
205 # approximate error calculation, good enough test
206 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
207 # and the rounding is different
208 err
= abs((actual
- expected
) / expected
)
209 print ("err", i
, err
)
210 self
.assertTrue(err
< 1e-7)
212 def run_tst_program(self
, prog
, initial_regs
=None,
216 if initial_regs
is None:
217 initial_regs
= [0] * 32
218 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
219 initial_fprs
=initial_fprs
,
230 if __name__
== "__main__":