SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
# for cos coefficient
SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
+ SVSHAPE2[12:17] <- 0b000000 # reset costable "striding" to 1
if (SVrm != 0b0100) & (SVrm != 0b1100) then
SVSHAPE3[28:29] <- 0b11 # size schedule
# set schedule up for (i)DCT Outer butterfly
SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
# copy
- SVSHAPE1[0:31] <- SVSHAPE0[0:31]
- SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE1[0:31] <- SVSHAPE0[0:31] # j+halfstep schedule
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31] # costable coefficients
# for FRA and FRT
SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
+ # reset costable "striding" to 1
+ SVSHAPE2[12:17] <- 0b000000
# set schedule up for DCT COS table generation
if (SVrm = 0b0101) | (SVrm = 0b1101) then
# calculate O(N log2 N)
# set VL, MVL and Vertical-First
m[0:12] <- vlen * mscale
maxvl[0:6] <- m[6:12]
- SVSTATE[0:6] <- vlen # VL
- SVSTATE[7:13] <- maxvl # MAVXL
+ SVSTATE[0:6] <- maxvl # MAVXL
+ SVSTATE[7:13] <- vlen # VL
SVSTATE[63] <- vf
Special Registers Altered:
self.assertEqual(sim.fpr(i+0), t)
self.assertEqual(sim.fpr(i+4), u)
- def test_sv_remap_fpmadds_dct_inner_4(self, stride=1):
+ def test_sv_remap_fpmadds_dct_inner_4_stride_1(self):
+ self.sv_remap_fpmadds_dct_inner_4(stride=2)
+
+ def test_sv_remap_fpmadds_dct_inner_4_stride_1(self):
+ self.sv_remap_fpmadds_dct_inner_4(stride=1)
+
+ def sv_remap_fpmadds_dct_inner_4(self, stride=2):
""">>> lst = ["svshape 4, 1, 1, 2, 0",
"svremap 27, 1, 0, 2, 0, 1, 0",
"sv.fdmadds *0, *0, *0, *32"
"""
lst = SVP64Asm( ["svshape 4, 1, %d, 2, 0" % stride,
"svremap 27, 1, 0, 2, 0, 1, 0",
- "sv.fdmadds *0, *0, *0, *32"
+ "sv.fdmadds *0, *0, *0, *16"
])
lst = list(lst)
# store in regfile
fprs = [0] * 64
for i, c in enumerate(coe):
- fprs[i*stride+32] = fp64toselectable(1.0 / c) # invert
+ fprs[i+16] = fp64toselectable(1.0 / c) # invert
for i, a in enumerate(av):
fprs[i*stride+0] = fp64toselectable(a)
print ("err", i, err)
self.assertTrue(err < 1e-6)
- def test_sv_remap_fpmadds_idct_inner_4(self):
+ def test_sv_remap_fpmadds_idct_inner_4(self, stride=2):
""">>> lst = ["svshape 4, 1, 1, 10, 0",
"svremap 27, 0, 1, 2, 1, 0, 0",
"sv.ffmadds *0, *0, *0, *8"
is straight Vectorised (0123...) because DCT coefficients
cannot be shared between butterfly layers (due to +0.5)
"""
- lst = SVP64Asm( ["svshape 4, 1, 1, 10, 0",
+ lst = SVP64Asm( ["svshape 4, 1, %d, 10, 0" % stride,
"svremap 27, 0, 1, 2, 1, 0, 0",
- "sv.ffmadds *0, *0, *0, *8"
+ "sv.ffmadds *0, *0, *0, *16"
])
lst = list(lst)
av = halfrev2(avi, False)
# store in regfile
- fprs = [0] * 32
+ fprs = [0] * 64
for i, c in enumerate(coe):
- fprs[i+8] = fp64toselectable(1.0 / c) # invert
+ fprs[i+16] = fp64toselectable(1.0 / c) # invert
for i, a in enumerate(av):
- fprs[i+0] = fp64toselectable(a)
+ fprs[i*stride+0] = fp64toselectable(a)
with Program(lst, bigendian=False) as program:
sim = self.run_tst_program(program, initial_fprs=fprs)
res = transform_inner_radix2_idct(avi, coe)
for i, expected in enumerate(res):
- print ("i", i, float(sim.fpr(i)), "expected", expected)
+ print ("i", i*stride, float(sim.fpr(i*stride)),
+ "expected", expected)
for i, expected in enumerate(res):
# convert to Power single
expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
expected = float(expected)
- actual = float(sim.fpr(i))
+ actual = float(sim.fpr(i*stride))
# approximate error calculation, good enough test
# reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
# and the rounding is different