from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.sim import Simulator, Delay, Settle
from nmutil.formaltest import FHDLTestCase
import unittest
from openpower.decoder.power_decoder import (create_pdecode)
from openpower.decoder.isa.test_caller import run_tst
from openpower.sv.trans.svp64 import SVP64Asm
from copy import deepcopy
-from openpower.decoder.helpers import fp64toselectable
-from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
+from openpower.decoder.helpers import fp64toselectable, SINGLE
+from openpower.decoder.isafunctions.double2single import ISACallerFnHelper
+# really bad hack. need to access the DOUBLE2SINGLE function auto-generated
+# from pseudo-code.
+fph = ISACallerFnHelper(XLEN=64)
-def transform_radix2(vec, exptable):
+
+def transform_radix2(vec, exptable, reverse=False):
"""
# FFT and convolution test (Python), based on Project Nayuki
#
levels = n.bit_length() - 1
# Copy with bit-reversed permutation
- #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
+ if reverse:
+ vec = [vec[reverse_bits(i, levels)] for i in range(n)]
size = 2
while size <= n:
return vec
-def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i):
+def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i, reverse=False):
"""
# FFT and convolution test (Python), based on Project Nayuki
#
levels = n.bit_length() - 1
# Copy with bit-reversed permutation
- #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
+ if reverse:
+ vec = [vec[reverse_bits(i, levels)] for i in range(n)]
size = 2
while size <= n:
for i in range(32):
self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
+ def test_sv_remap_fpmadds_fft_4(self):
+ """>>> lst = ["svshape 2, 1, 1, 1, 0",
+ "svremap 31, 1, 0, 2, 0, 1, 0",
+ "sv.ffmadds. *2, *2, *2, *10"
+ ]
+ this is a cheap (cheating) way to run a single "ffmadds." to
+ get at least Rc=1 on sv.ffmadds to be activated. the results
+ are not actually tested because there's no checking yet on
+ FP Rc=1
+ """
+ lst = SVP64Asm( ["svshape 2, 1, 1, 1, 0",
+ "svremap 31, 1, 0, 2, 0, 1, 0",
+ "sv.ffmadds *0, *0, *0, *8"
+ ])
+ lst = list(lst)
+
+ # array and coefficients to test
+ av = [7.0, -9.8 ] # array 0..1
+ coe = [3.1] # coefficients
+
+ # store in regfile
+ fprs = [0] * 32
+ for i, c in enumerate(coe):
+ fprs[i+8] = fp64toselectable(c)
+ for i, a in enumerate(av):
+ fprs[i+0] = fp64toselectable(a)
+
+ with Program(lst, bigendian=False) as program:
+ sim = self.run_tst_program(program, initial_fprs=fprs)
+ print ("spr svshape0", sim.spr['SVSHAPE0'])
+ print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+ print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+ print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+ print ("spr svshape1", sim.spr['SVSHAPE1'])
+ print ("spr svshape2", sim.spr['SVSHAPE2'])
+ print ("spr svshape3", sim.spr['SVSHAPE3'])
+
+ # work out the results with the twin mul/add-sub
+ res = transform_radix2(av, coe)
+
+ for i, expected in enumerate(res):
+ print ("i", i, float(sim.fpr(i)), "expected", expected)
+ for i, expected in enumerate(res):
+ # convert to Power single
+ expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
+ expected = float(expected)
+ actual = float(sim.fpr(i))
+ # approximate error calculation, good enough test
+ # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
+ # and the rounding is different
+ err = abs(actual - expected) / expected
+ self.assertTrue(err < 1e-7)
+
def test_sv_remap_fpmadds_fft(self):
""">>> lst = ["svshape 8, 1, 1, 1, 0",
"svremap 31, 1, 0, 2, 0, 1, 0",
- "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
+ "sv.ffmadds *2, *2, *2, *10"
]
runs a full in-place O(N log2 N) butterfly schedule for
Discrete Fourier Transform.
"""
lst = SVP64Asm( ["svshape 8, 1, 1, 1, 0",
"svremap 31, 1, 0, 2, 0, 1, 0",
- "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
+ "sv.ffmadds *0, *0, *0, *8"
])
lst = list(lst)
print ("i", i, float(sim.fpr(i)), "expected", expected)
for i, expected in enumerate(res):
# convert to Power single
- expected = DOUBLE2SINGLE(fp64toselectable(expected))
+ expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
expected = float(expected)
actual = float(sim.fpr(i))
# approximate error calculation, good enough test
""">>> lst = SVP64Asm( [
"svshape 8, 1, 1, 1, 1",
"svremap 31, 1, 0, 2, 0, 1, 0",
- "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
+ "sv.ffmadds *0, *0, *0, *8",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -16"
+ "bc 6, 3, -16"
])
runs a full in-place O(N log2 N) butterfly schedule for
Discrete Fourier Transform. this version however uses
lst = SVP64Asm( [
"svshape 8, 1, 1, 1, 1",
"svremap 31, 1, 0, 2, 0, 1, 0",
- "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
+ "sv.ffmadds *0, *0, *0, *8",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -16"
+ "bc 6, 3, -16"
])
lst = list(lst)
print ("i", i, float(sim.fpr(i)), "expected", expected)
for i, expected in enumerate(res):
# convert to Power single
- expected = DOUBLE2SINGLE(fp64toselectable(expected))
+ expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
expected = float(expected)
actual = float(sim.fpr(i))
# approximate error calculation, good enough test
"svshape 8, 1, 1, 1, 1",
# RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
"svremap 5, 1, 0, 2, 0, 0, 1",
- "sv.fmuls 24, 0.v, 8.v",
+ "sv.fmuls 24, *0, *8",
# RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
"svremap 26, 0, 0, 0, 0, 1, 1",
- "sv.ffadds 0.v, 24, 0.v",
+ "sv.ffadds *0, 24, *0",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -28"
+ "bc 6, 3, -28"
])
runs a full in-place O(N log2 N) butterfly schedule for
"svshape 8, 1, 1, 1, 1",
# RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
"svremap 5, 1, 0, 2, 0, 0, 0",
- "sv.fmuls 24, 0.v, 8.v",
+ "sv.fmuls 24, *0, *8",
# RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
"svremap 26, 0, 0, 0, 0, 1, 0",
- "sv.ffadds 0.v, 24, 0.v",
+ "sv.ffadds *0, 24, *0",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -28"
+ "bc 6, 3, -28"
])
lst = list(lst)
print ("i", i, float(sim.fpr(i)), "expected", expected)
for i, expected in enumerate(res):
# convert to Power single
- expected = DOUBLE2SINGLE(fp64toselectable(expected))
+ expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
expected = float(expected)
actual = float(sim.fpr(i))
# approximate error calculation, good enough test
self.assertTrue(err < 1e-7)
def test_sv_fpmadds_fft(self):
- """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
+ """>>> lst = ["sv.ffmadds *2, *2, *2, *10"
]
four in-place vector mul-adds, four in-place vector mul-subs
fnmsubs FRT+vl, FRA, FRC, FRB+vl
"""
- lst = SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
+ lst = SVP64Asm(["sv.ffmadds *2, *2, *2, *10"
])
lst = list(lst)
mul = a * c
t = b + mul
u = b - mul
- t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
- u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
+ t = fph.DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
+ u = fph.DOUBLE2SINGLE(fp64toselectable(u)) # from double
res.append((t, u))
print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
self.assertEqual(sim.fpr(i+6), u)
def test_sv_ffadds_fft(self):
- """>>> lst = ["sv.ffadds 2.v, 2.v, 2.v"
+ """>>> lst = ["sv.ffadds *2, *2, *2"
]
four in-place vector adds, four in-place vector subs
fadds FRT , FRB, FRA
fsubs FRT+vl, FRA, FRB+vl
"""
- lst = SVP64Asm(["sv.ffadds 2.v, 2.v, 2.v"
+ lst = SVP64Asm(["sv.ffadds *2, *2, *2"
])
lst = list(lst)
fprs[i+6] = fp64toselectable(b)
t = b + a
u = b - a
- t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
- u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
+ t = fph.DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
+ u = fph.DOUBLE2SINGLE(fp64toselectable(u)) # from double
res.append((t, u))
print ("FFT", i, "in", a, b, "res", t, u)
however it turns out that they can be *merged*, and for
the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
- *ignore* their REMAPs (by definition), and for the second
- one (sv.ffads) exactly the right REMAPs are also ignored!
+ *ignore* their REMAPs (by definition, because you can't REMAP
+ scalar operands), and for the second one (sv.ffads) exactly the
+ right REMAPs are also ignored!
+ therefore we can merge:
+ "svremap 5, 1, 0, 2, 0, 0, 1",
+ "svremap 26, 0, 0, 0, 0, 1, 1",
+ into:
"svremap 31, 1, 0, 2, 0, 1, 1",
+ and save one instruction.
"""
lst = SVP64Asm( [
- # set triple butterfly mode
+ # set triple butterfly mode with persistent "REMAP"
"svshape 8, 1, 1, 1, 1",
- # tpre
"svremap 31, 1, 0, 2, 0, 1, 1",
- "sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r
- "sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i
+ # tpre
+ "sv.fmuls 24, *0, *16", # mul1_r = r*cos_r
+ "sv.fmadds 24, *8, *20, 24", # mul2_r = i*sin_i
# tpre = mul1_r + mul2_r
# tpim
- "sv.fmuls 26, 0.v, 20.v", # mul1_i = r*sin_i
- "sv.fmsubs 26, 8.v, 16.v, 26", # mul2_i = i*cos_r
+ "sv.fmuls 26, *0, *20", # mul1_i = r*sin_i
+ "sv.fmsubs 26, *8, *16, 26", # mul2_i = i*cos_r
# tpim = mul2_i - mul1_i
# vec_r jh/jl
- "sv.ffadds 0.v, 24, 0.v", # vh/vl +/- tpre
+ "sv.ffadds *0, 24, *0", # vh/vl +/- tpre
# vec_i jh/jl
- "sv.ffadds 8.v, 26, 8.v", # vh/vl +- tpim
+ "sv.ffadds *8, 26, *8", # vh/vl +- tpim
# svstep loop
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -56"
+ "bc 6, 3, -56"
])
lst = list(lst)
"expected_i", expected_i)
for i, (expected_r, expected_i) in enumerate(zip(res_r, res_i)):
# convert to Power single
- expected_r = DOUBLE2SINGLE(fp64toselectable(expected_r ))
+ expected_r = fph.DOUBLE2SINGLE(fp64toselectable(expected_r ))
expected_r = float(expected_r)
actual_r = float(sim.fpr(i))
# approximate error calculation, good enough test
err = abs(actual_r - expected_r ) / expected_r
self.assertTrue(err < 1e-6)
# convert to Power single
- expected_i = DOUBLE2SINGLE(fp64toselectable(expected_i ))
+ expected_i = fph.DOUBLE2SINGLE(fp64toselectable(expected_i ))
expected_i = float(expected_i)
actual_i = float(sim.fpr(i+8))
# approximate error calculation, good enough test
self.assertTrue(err < 1e-6)
def test_sv_ffadds_fft_scalar(self):
- """>>> lst = ["sv.ffadds 2.v, 12, 13"
+ """>>> lst = ["sv.ffadds *2, 12, 13"
]
four in-place vector adds and subs, but done with a scalar
pair (fp12, fp13)
"""
- lst = SVP64Asm(["sv.ffadds 2.v, 12, 13"
+ lst = SVP64Asm(["sv.ffadds *2, 12, 13"
])
lst = list(lst)
for i in range(4):
t = scalar_b + scalar_a
u = scalar_b - scalar_a
- t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
- u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
+ t = fph.DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
+ u = fph.DOUBLE2SINGLE(fp64toselectable(u)) # from double
res.append((t, u))
print ("FFT", i, "res", t, u)
self.assertEqual(sim.fpr(i+2), t)
self.assertEqual(sim.fpr(i+6), u)
+ def test_sv_remap_fpmadds_fft_ldst(self):
+ """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
+ "sv.lfs/els *0, 4(0)",
+ "svshape 8, 1, 1, 1, 0",
+ "svremap 31, 1, 0, 2, 0, 1, 0",
+ "sv.ffmadds *0, *0, *0, *8"
+
+ runs a full in-place O(N log2 N) butterfly schedule for
+ Discrete Fourier Transform, using bit-reversed LD/ST
+ """
+ lst = SVP64Asm( ["svshape 8, 1, 1, 15, 0",
+ "svremap 1, 0, 0, 0, 0, 0, 0",
+ "sv.lfs/els *0, 4(0)",
+ "svshape 8, 1, 1, 1, 0",
+ "svremap 31, 1, 0, 2, 0, 1, 0",
+ "sv.ffmadds *0, *0, *0, *8"
+ ])
+ lst = list(lst)
+
+ # array and coefficients to test
+ av = [7.0, -9.8, 3.0, -32.3,
+ -2.0, 5.0, -9.8, 31.3] # array 0..7
+ coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
+
+ # store in regfile
+ fprs = [0] * 32
+ for i, c in enumerate(coe):
+ fprs[i+8] = fp64toselectable(c)
+ # store in memory
+ mem = {}
+ val = 0
+ for i, a in enumerate(av):
+ a = SINGLE(fp64toselectable(a)).value
+ shift = (i % 2) == 1
+ if shift == 0:
+ val = a
+ else:
+ mem[(i//2)*8] = val | (a << 32)
+
+ with Program(lst, bigendian=False) as program:
+ sim = self.run_tst_program(program, initial_mem=mem,
+ initial_fprs=fprs)
+ print ("spr svshape0", sim.spr['SVSHAPE0'])
+ print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+ print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+ print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+ print ("spr svshape1", sim.spr['SVSHAPE1'])
+ print ("spr svshape2", sim.spr['SVSHAPE2'])
+ print ("spr svshape3", sim.spr['SVSHAPE3'])
+
+ print ("mem dump")
+ print (sim.mem.dump())
+
+ # work out the results with the twin mul/add-sub,
+ # note bit-reverse mode requested
+ res = transform_radix2(av, coe, reverse=True)
+
+ for i, expected in enumerate(res):
+ print ("i", i, float(sim.fpr(i)), "expected", expected)
+ for i, expected in enumerate(res):
+ # convert to Power single
+ expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
+ expected = float(expected)
+ actual = float(sim.fpr(i))
+ # approximate error calculation, good enough test
+ # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
+ # and the rounding is different
+ err = abs(actual - expected) / expected
+ self.assertTrue(err < 1e-6)
+
def run_tst_program(self, prog, initial_regs=None,
svstate=None,
initial_mem=None,