from openpower.decoder.isa.test_caller import run_tst
from openpower.sv.trans.svp64 import SVP64Asm
from copy import deepcopy
-from openpower.decoder.helpers import fp64toselectable
+from openpower.decoder.helpers import fp64toselectable, SINGLE
from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
-def transform_radix2(vec, exptable):
+def transform_radix2(vec, exptable, reverse=False):
"""
# FFT and convolution test (Python), based on Project Nayuki
#
levels = n.bit_length() - 1
# Copy with bit-reversed permutation
- #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
+ if reverse:
+ vec = [vec[reverse_bits(i, levels)] for i in range(n)]
size = 2
while size <= n:
return vec
-def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i):
+def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i, reverse=False):
"""
# FFT and convolution test (Python), based on Project Nayuki
#
levels = n.bit_length() - 1
# Copy with bit-reversed permutation
- #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
+ if reverse:
+ vec = [vec[reverse_bits(i, levels)] for i in range(n)]
size = 2
while size <= n:
"svremap 31, 1, 0, 2, 0, 1, 0",
"sv.ffmadds 0.v, 0.v, 0.v, 8.v",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -16"
+ "bc 6, 3, -16"
])
runs a full in-place O(N log2 N) butterfly schedule for
Discrete Fourier Transform. this version however uses
"svremap 31, 1, 0, 2, 0, 1, 0",
"sv.ffmadds 0.v, 0.v, 0.v, 8.v",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -16"
+ "bc 6, 3, -16"
])
lst = list(lst)
"svremap 26, 0, 0, 0, 0, 1, 1",
"sv.ffadds 0.v, 24, 0.v",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -28"
+ "bc 6, 3, -28"
])
runs a full in-place O(N log2 N) butterfly schedule for
"svremap 26, 0, 0, 0, 0, 1, 0",
"sv.ffadds 0.v, 24, 0.v",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -28"
+ "bc 6, 3, -28"
])
lst = list(lst)
however it turns out that they can be *merged*, and for
the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
- *ignore* their REMAPs (by definition), and for the second
- one (sv.ffads) exactly the right REMAPs are also ignored!
+ *ignore* their REMAPs (by definition, because you can't REMAP
+ scalar operands), and for the second one (sv.ffads) exactly the
+ right REMAPs are also ignored!
+ therefore we can merge:
+ "svremap 5, 1, 0, 2, 0, 0, 1",
+ "svremap 26, 0, 0, 0, 0, 1, 1",
+ into:
"svremap 31, 1, 0, 2, 0, 1, 1",
+ and save one instruction.
"""
lst = SVP64Asm( [
- # set triple butterfly mode
+ # set triple butterfly mode with persistent "REMAP"
"svshape 8, 1, 1, 1, 1",
- # tpre
"svremap 31, 1, 0, 2, 0, 1, 1",
+ # tpre
"sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r
"sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i
# tpre = mul1_r + mul2_r
# svstep loop
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -56"
+ "bc 6, 3, -56"
])
lst = list(lst)
self.assertEqual(sim.fpr(i+2), t)
self.assertEqual(sim.fpr(i+6), u)
+ def test_sv_remap_fpmadds_fft_ldst(self):
+ """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
+ "sv.lfssh 0.v, 4(0), 20", # bit-reversed
+ "svshape 8, 1, 1, 1, 0",
+ "svremap 31, 1, 0, 2, 0, 1, 0",
+ "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
+
+ runs a full in-place O(N log2 N) butterfly schedule for
+ Discrete Fourier Transform, using bit-reversed LD/ST
+ """
+ lst = SVP64Asm( ["svshape 8, 1, 1, 15, 0",
+ "svremap 1, 0, 0, 0, 0, 0, 0, 0",
+ "sv.lfssh 0.v, 4(0), 20", # shifted
+ "svshape 8, 1, 1, 1, 0",
+ "svremap 31, 1, 0, 2, 0, 1, 0",
+ "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
+ ])
+ lst = list(lst)
+
+ # array and coefficients to test
+ av = [7.0, -9.8, 3.0, -32.3,
+ -2.0, 5.0, -9.8, 31.3] # array 0..7
+ coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
+
+ # store in regfile
+ fprs = [0] * 32
+ for i, c in enumerate(coe):
+ fprs[i+8] = fp64toselectable(c)
+ # store in memory
+ mem = {}
+ val = 0
+ for i, a in enumerate(av):
+ a = SINGLE(fp64toselectable(a)).value
+ shift = (i % 2) == 1
+ if shift == 0:
+ val = a
+ else:
+ mem[(i//2)*8] = val | (a << 32)
+
+ with Program(lst, bigendian=False) as program:
+ sim = self.run_tst_program(program, initial_mem=mem,
+ initial_fprs=fprs)
+ print ("spr svshape0", sim.spr['SVSHAPE0'])
+ print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+ print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+ print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+ print ("spr svshape1", sim.spr['SVSHAPE1'])
+ print ("spr svshape2", sim.spr['SVSHAPE2'])
+ print ("spr svshape3", sim.spr['SVSHAPE3'])
+
+ print ("mem dump")
+ print (sim.mem.dump())
+
+ # work out the results with the twin mul/add-sub,
+ # note bit-reverse mode requested
+ res = transform_radix2(av, coe, reverse=True)
+
+ for i, expected in enumerate(res):
+ print ("i", i, float(sim.fpr(i)), "expected", expected)
+ for i, expected in enumerate(res):
+ # convert to Power single
+ expected = DOUBLE2SINGLE(fp64toselectable(expected))
+ expected = float(expected)
+ actual = float(sim.fpr(i))
+ # approximate error calculation, good enough test
+ # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
+ # and the rounding is different
+ err = abs(actual - expected) / expected
+ self.assertTrue(err < 1e-6)
+
def run_tst_program(self, prog, initial_regs=None,
svstate=None,
initial_mem=None,