def demo():
# set the dimension sizes here
- xdim = 16
+ xdim = 8
ydim = 0 # not needed
zdim = 0 # again, not needed
SVSHAPE0 = SVSHAPE()
SVSHAPE0.lims = [xdim, ydim, zdim]
SVSHAPE0.order = [0,1,2] # experiment with different permutations, here
- SVSHAPE0.mode = 0b00
+ SVSHAPE0.mode = 0b01
+ SVSHAPE0.skip = 0b00
SVSHAPE0.offset = 0 # experiment with different offset, here
SVSHAPE0.invxyz = [0,0,0] # inversion if desired
# j+halfstep schedule
SVSHAPE1 = SVSHAPE()
SVSHAPE1.lims = [xdim, ydim, zdim]
SVSHAPE1.order = [0,1,2] # experiment with different permutations, here
- SVSHAPE1.mode = 0b01
+ SVSHAPE0.mode = 0b01
+ SVSHAPE1.skip = 0b01
SVSHAPE1.offset = 0 # experiment with different offset, here
SVSHAPE1.invxyz = [0,0,0] # inversion if desired
# k schedule
SVSHAPE2 = SVSHAPE()
SVSHAPE2.lims = [xdim, ydim, zdim]
SVSHAPE2.order = [0,1,2] # experiment with different permutations, here
- SVSHAPE2.mode = 0b10
+ SVSHAPE0.mode = 0b01
+ SVSHAPE2.skip = 0b10
SVSHAPE2.offset = 0 # experiment with different offset, here
SVSHAPE2.invxyz = [0,0,0] # inversion if desired
# enumerate over the iterator function, getting new indices
schedule = []
- for idx, (jl, jh, k) in enumerate(zip(iterate_indices(SVSHAPE0),
- iterate_indices(SVSHAPE1),
- iterate_indices(SVSHAPE2))):
+ for idx, (jl, jh, k) in enumerate(zip(iterate_butterfly_indices(SVSHAPE0),
+ iterate_butterfly_indices(SVSHAPE1),
+ iterate_butterfly_indices(SVSHAPE2))):
if idx >= VL:
break
schedule.append((jl, jh, k))
for i in range(32):
self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
- def test_sv_fpmadds_fft(self):
+ def tst_sv_fpmadds_fft(self):
""">>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
]
four in-place vector mul-adds, four in-place vector mul-subs
self.assertEqual(sim.fpr(i+2), t)
self.assertEqual(sim.fpr(i+6), u)
+ def test_sv_remap_fpmadds_fft(self):
+ """>>> lst = ["svremap 8, 1, 1, 1",
+ "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
+ ]
+ four in-place vector mul-adds, four in-place vector mul-subs
+
+ this is the twin "butterfly" mul-add-sub from Cooley-Tukey
+ https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
+
+ there is the *option* to target a different location (non-in-place)
+ just in case.
+
+ SVP64 "FFT" mode will *automatically* offset FRB and an implicit
+ FRS to perform the two multiplies. one add, one subtract.
+
+ sv.ffmadds FRT, FRA, FRC, FRB actually does:
+ fmadds FRT , FRA, FRC, FRA
+ fnmsubs FRT+vl, FRA, FRC, FRB+vl
+ """
+ lst = SVP64Asm( ["svremap 8, 1, 1, 1",
+ "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
+ ])
+ lst = list(lst)
+
+ fprs = [0] * 32
+ av = [7.0, -9.8, 2.0, -32.3,
+ -2.0, 2.0, -9.8, 32.3] # array 0..7
+ coe = [-1.0, 4.0, 3.1, 6.2] # coefficients
+ # store in regfile
+ for i, c in enumerate(coe):
+ fprs[i+10] = fp64toselectable(c)
+ for i, a in enumerate(av):
+ fprs[i+2] = fp64toselectable(a)
+ # work out the results with the twin mul/add-sub
+ res = []
+ for i, (a, c) in enumerate(zip(av, coe)):
+ continue
+ fprs[i+2] = fp64toselectable(a)
+ fprs[i+10] = fp64toselectable(c)
+ mul = a * c
+ t = a + mul
+ u = b - mul
+ t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
+ u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
+ res.append((t, u))
+ print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
+
+ # set total. err don't know how to calculate how many there are...
+ # do it manually for now
+ VL = 0
+ size = 2
+ n = len(av)
+ while size <= n:
+ halfsize = size // 2
+ tablestep = n // size
+ for i in range(0, n, size):
+ for j in range(i, i + halfsize):
+ VL += 1
+ size *= 2
+
+ # SVSTATE (calculated VL)
+ svstate = SVP64State()
+ svstate.vl[0:7] = VL # VL
+ svstate.maxvl[0:7] = VL # MAXVL
+ print ("SVSTATE", bin(svstate.spr.asint()))
+
+ with Program(lst, bigendian=False) as program:
+ sim = self.run_tst_program(program, svstate=svstate,
+ initial_fprs=fprs)
+ print ("spr svshape0", sim.spr['SVSHAPE0'])
+ print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+ print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+ print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+ print ("spr svshape1", sim.spr['SVSHAPE1'])
+ print ("spr svshape2", sim.spr['SVSHAPE2'])
+ print ("spr svshape3", sim.spr['SVSHAPE3'])
+ for i in range(8):
+ print ("i", i, float(sim.fpr(i)))
+
+ return
+ # confirm that the results are as expected
+ for i, (t, u) in enumerate(res):
+ self.assertEqual(sim.fpr(i+2), t)
+ self.assertEqual(sim.fpr(i+6), u)
+
def run_tst_program(self, prog, initial_regs=None,
svstate=None,
initial_mem=None,
insn |= (fields[2]-1) << (31-20) # SVzd , bits 16-20
insn |= (fields[3]) << (31-25) # SVRM , bits 21-25
insn |= 0b00001 << (31-30) # XO , bits 26..30
+ #insn &= ((1<<32)-1)
log ("svremap", bin(insn))
yield ".long 0x%x" % insn
return
#'sv.lhzbr 5.v, 11(9.v), 15',
#'sv.lwzbr 5.v, 11(9.v), 15',
'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
- 'svremap 2, 2, 3, 0',
]
lst = [
'sv.fmadds 0.v, 8.v, 16.v, 4.v',
+ 'svremap 8, 1, 1, 1',
]
isa = SVP64Asm(lst, macros=macros)
print ("list", list(isa))