1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.power_decoder
import (create_pdecode
)
6 from openpower
.simulator
.program
import Program
7 from openpower
.decoder
.isa
.caller
import SVP64State
8 from openpower
.decoder
.selectable_int
import SelectableInt
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.sv
.trans
.svp64
import SVP64Asm
11 from copy
import deepcopy
12 from openpower
.decoder
.helpers
import fp64toselectable
, SINGLE
13 from openpower
.decoder
.isafunctions
.double2single
import DOUBLE2SINGLE
16 class DCTTestCase(FHDLTestCase
):
18 def _check_regs(self
, sim
, expected
):
20 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
22 def test_sv_ffadds_dct(self
):
23 """>>> lst = ["sv.fdmadds 0.v, 0.v, 0.v, 8.v"
25 four in-place vector adds, four in-place vector mul-subs
27 SVP64 "DCT" mode will *automatically* offset FRB and an implicit
28 FRS to perform the two multiplies. one add, one subtract.
30 sv.fdadds FRT, FRA, FRC, FRB actually does:
32 fsubs FRT+vl, FRA, FRB+vl
34 lst
= SVP64Asm(["sv.fdmadds 0.v, 0.v, 0.v, 8.v"
38 # cheat here with these values, they're selected so that
39 # rounding errors do not occur. sigh.
41 av
= [7.0, -0.8, 2.0, -2.3] # first half of array 0..3
42 bv
= [-2.0, 2.0, -0.8, 1.4] # second half of array 4..7
43 cv
= [-1.0, 0.5, 2.5, -0.25] # coefficients
45 # work out the results with the twin add-sub
46 for i
, (a
, b
, c
) in enumerate(zip(av
, bv
, cv
)):
47 fprs
[i
+0] = fp64toselectable(a
)
48 fprs
[i
+4] = fp64toselectable(b
)
49 fprs
[i
+8] = fp64toselectable(c
)
50 # this isn't quite a perfect replication of the
51 # FP32 mul-add-sub. better really to use FPMUL32, FPADD32
52 # and FPSUB32 directly to be honest.
55 diff
= DOUBLE2SINGLE(fp64toselectable(diff
)) # FP32 round
58 tc
= DOUBLE2SINGLE(fp64toselectable(t
)) # convert to Power single
59 uc
= DOUBLE2SINGLE(fp64toselectable(u
)) # from double
61 print ("DCT", i
, "in", a
, b
, "c", c
, "res", t
, u
)
63 # SVSTATE (in this case, VL=2)
64 svstate
= SVP64State()
66 svstate
.maxvl
= 4 # MAXVL
67 print ("SVSTATE", bin(svstate
.asint()))
69 with
Program(lst
, bigendian
=False) as program
:
70 sim
= self
.run_tst_program(program
, svstate
=svstate
,
72 # confirm that the results are as expected
73 for i
, (t
, u
) in enumerate(res
):
74 a
= float(sim
.fpr(i
+0))
75 b
= float(sim
.fpr(i
+4))
78 print ("DCT", i
, "in", a
, b
, "res", t
, u
)
79 for i
, (t
, u
) in enumerate(res
):
80 self
.assertEqual(sim
.fpr(i
+0), t
)
81 self
.assertEqual(sim
.fpr(i
+4), u
)
83 def tst_sv_remap_fpmadds_dct(self
):
84 """>>> lst = ["svshape 4, 1, 1, 2, 0",
85 "svremap 31, 1, 0, 2, 0, 1, 0",
86 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
88 runs a full in-place O(N log2 N) butterfly schedule for
91 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
94 lst
= SVP64Asm( ["svshape 4, 1, 1, 2, 0",
95 "svremap 31, 1, 0, 2, 0, 1, 0",
96 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
100 # array and coefficients to test
101 av
= [7.0, -9.8, 3.0, -32.3]
102 coe
= [-0.25, 0.5, 3.1, 6.2, 0.1, -0.2] # 6 coefficients
106 for i
, c
in enumerate(coe
):
107 fprs
[i
+8] = fp64toselectable(c
)
108 for i
, a
in enumerate(av
):
109 fprs
[i
+0] = fp64toselectable(a
)
111 with
Program(lst
, bigendian
=False) as program
:
112 sim
= self
.run_tst_program(program
, initial_fprs
=fprs
)
113 print ("spr svshape0", sim
.spr
['SVSHAPE0'])
114 print (" xdimsz", sim
.spr
['SVSHAPE0'].xdimsz
)
115 print (" ydimsz", sim
.spr
['SVSHAPE0'].ydimsz
)
116 print (" zdimsz", sim
.spr
['SVSHAPE0'].zdimsz
)
117 print ("spr svshape1", sim
.spr
['SVSHAPE1'])
118 print ("spr svshape2", sim
.spr
['SVSHAPE2'])
119 print ("spr svshape3", sim
.spr
['SVSHAPE3'])
123 # work out the results with the twin mul/add-sub
124 res
= transform_radix2(av
, coe
)
126 for i
, expected
in enumerate(res
):
127 print ("i", i
, float(sim
.fpr(i
)), "expected", expected
)
128 for i
, expected
in enumerate(res
):
129 # convert to Power single
130 expected
= DOUBLE2SINGLE(fp64toselectable(expected
))
131 expected
= float(expected
)
132 actual
= float(sim
.fpr(i
))
133 # approximate error calculation, good enough test
134 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
135 # and the rounding is different
136 err
= abs(actual
- expected
) / expected
137 self
.assertTrue(err
< 1e-7)
139 def run_tst_program(self
, prog
, initial_regs
=None,
143 if initial_regs
is None:
144 initial_regs
= [0] * 32
145 simulator
= run_tst(prog
, initial_regs
, mem
=initial_mem
,
146 initial_fprs
=initial_fprs
,
157 if __name__
== "__main__":