from openpower.decoder.isa.test_caller import run_tst
from openpower.sv.trans.svp64 import SVP64Asm
from copy import deepcopy
-from openpower.decoder.helpers import fp64toselectable
+from openpower.decoder.helpers import fp64toselectable, SINGLE
from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
"svremap 31, 1, 0, 2, 0, 1, 0",
"sv.ffmadds 0.v, 0.v, 0.v, 8.v",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -16"
+ "bc 6, 3, -16"
])
runs a full in-place O(N log2 N) butterfly schedule for
Discrete Fourier Transform. this version however uses
"svremap 31, 1, 0, 2, 0, 1, 0",
"sv.ffmadds 0.v, 0.v, 0.v, 8.v",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -16"
+ "bc 6, 3, -16"
])
lst = list(lst)
"svremap 26, 0, 0, 0, 0, 1, 1",
"sv.ffadds 0.v, 24, 0.v",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -28"
+ "bc 6, 3, -28"
])
runs a full in-place O(N log2 N) butterfly schedule for
"svremap 26, 0, 0, 0, 0, 1, 0",
"sv.ffadds 0.v, 24, 0.v",
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -28"
+ "bc 6, 3, -28"
])
lst = list(lst)
however it turns out that they can be *merged*, and for
the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
- *ignore* their REMAPs (by definition), and for the second
- one (sv.ffads) exactly the right REMAPs are also ignored!
+ *ignore* their REMAPs (by definition, because you can't REMAP
+ scalar operands), and for the second one (sv.ffads) exactly the
+ right REMAPs are also ignored!
+ therefore we can merge:
+ "svremap 5, 1, 0, 2, 0, 0, 1",
+ "svremap 26, 0, 0, 0, 0, 1, 1",
+ into:
"svremap 31, 1, 0, 2, 0, 1, 1",
+ and save one instruction.
"""
lst = SVP64Asm( [
# set triple butterfly mode with persistent "REMAP"
# svstep loop
"setvl. 0, 0, 1, 1, 0, 0",
- "bc 4, 2, -56"
+ "bc 6, 3, -56"
])
lst = list(lst)
self.assertEqual(sim.fpr(i+6), u)
def test_sv_remap_fpmadds_fft_ldst(self):
- """>>> lst = ["svshape 8, 1, 1, 1, 0",
- "svremap 31, 1, 0, 2, 0, 1, 0",
- "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
- ]
+ """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
+ "sv.lfssh 0.v, 4(0), 20", # bit-reversed
+ "svshape 8, 1, 1, 1, 0",
+ "svremap 31, 1, 0, 2, 0, 1, 0",
+ "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
+
runs a full in-place O(N log2 N) butterfly schedule for
Discrete Fourier Transform, using bit-reversed LD/ST
"""
- lst = SVP64Asm( ["setvl 0, 0, 8, 0, 1, 1",
- "sv.lfsbr 0.v, 4(0), 20", # bit-reversed
- #"svshape 8, 1, 1, 1, 0",
- #"svremap 31, 1, 0, 2, 0, 1, 0",
- #"sv.ffmadds 0.v, 0.v, 0.v, 8.v"
+ lst = SVP64Asm( ["svshape 8, 1, 1, 15, 0",
+ "svremap 1, 0, 0, 0, 0, 0, 0, 0",
+ "sv.lfssh 0.v, 4(0), 20", # shifted
+ "svshape 8, 1, 1, 1, 0",
+ "svremap 31, 1, 0, 2, 0, 1, 0",
+ "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
])
lst = list(lst)
fprs[i+8] = fp64toselectable(c)
# store in memory
mem = {}
+ val = 0
for i, a in enumerate(av):
+ a = SINGLE(fp64toselectable(a)).value
shift = (i % 2) == 1
if shift == 0:
- mem[(i//2)*8] = fp64toselectable(a).value
+ val = a
else:
- mem[(i//2)*8] |= fp64toselectable(a).value << 32
+ mem[(i//2)*8] = val | (a << 32)
with Program(lst, bigendian=False) as program:
sim = self.run_tst_program(program, initial_mem=mem,
print ("mem dump")
print (sim.mem.dump())
- # work out the results with the twin mul/add-sub
- res = transform_radix2(av, coe)
+ # work out the results with the twin mul/add-sub,
+ # note bit-reverse mode requested
+ res = transform_radix2(av, coe, reverse=True)
for i, expected in enumerate(res):
print ("i", i, float(sim.fpr(i)), "expected", expected)
# reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
# and the rounding is different
err = abs(actual - expected) / expected
- self.assertTrue(err < 1e-7)
+ self.assertTrue(err < 1e-6)
def run_tst_program(self, prog, initial_regs=None,
svstate=None,