'width' bits of the integer 'val'
"""
result = 0
- width = VL.bit_length()
+ width = VL.bit_length()-1
for _ in range(width):
result = (result << 1) | (val & 1)
val >>= 1
from openpower.decoder.power_enums import SVPtype
-from openpower.decoder.helpers import exts, gtu, ltu, undefined
+from openpower.decoder.helpers import (exts, gtu, ltu, undefined, bitrev)
from openpower.consts import PIb, MSRb # big-endian (PowerISA versions)
from openpower.consts import SVP64CROffs
from openpower.decoder.power_svp64 import SVP64RM, decode_extra
# use info.form to detect
replace_d = False # update / replace constant in pseudocode
if self.is_svp64_mode:
- D = yield self.dec2.dec.fields.FormD.D[0:16]
- D = exts(D, 16) # sign-extend to integer
ldstmode = yield self.dec2.rm_dec.ldstmode
+ # bitreverse mode reads SVD (or SVDS - TODO)
+ # *BUT*... because this is "overloading" of LD operations,
+ # it gets *STORED* into D (or DS, TODO)
+ if ldstmode == SVP64LDSTmode.BITREVERSE.value:
+ imm = yield self.dec2.dec.fields.FormSVD.SVD[0:11]
+ imm = exts(imm, 11) # sign-extend to integer
+ print ("bitrev SVD", imm)
+ replace_d = True
+ else:
+ imm = yield self.dec2.dec.fields.FormD.D[0:16]
+ imm = exts(imm, 16) # sign-extend to integer
# get the right step. LD is from srcstep, ST is dststep
op = yield self.dec2.e.do.insn_type
offsmul = 0
if op == MicrOp.OP_LOAD.value:
offsmul = srcstep
- log("D-field src", D, offsmul)
+ log("D-field src", imm, offsmul)
elif op == MicrOp.OP_STORE.value:
offsmul = dststep
- log("D-field dst", D, offsmul)
+ log("D-field dst", imm, offsmul)
+ # bit-reverse mode
+ if ldstmode == SVP64LDSTmode.BITREVERSE.value:
+ # manually look up RC, sigh
+ RC = yield self.dec2.dec.RC[0:5]
+ RC = self.gpr(RC)
+ log ("RC", RC.value, "imm", imm, "offs", bin(offsmul),
+ "rev", bin(bitrev(offsmul, vl)))
+ imm = SelectableInt((imm * bitrev(offsmul, vl)) << RC.value, 32)
# Unit-Strided LD/ST adds offset*width to immediate
- if ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
+ elif ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
ldst_len = yield self.dec2.e.do.data_len
- D = SelectableInt(D + offsmul * ldst_len, 32)
+ imm = SelectableInt(imm + offsmul * ldst_len, 32)
replace_d = True
# Element-strided multiplies the immediate by element step
elif ldstmode == SVP64LDSTmode.ELSTRIDE.value:
- D = SelectableInt(D * offsmul, 32)
+ imm = SelectableInt(imm * offsmul, 32)
replace_d = True
ldst_ra_vec = yield self.dec2.rm_dec.ldst_ra_vec
ldst_imz_in = yield self.dec2.rm_dec.ldst_imz_in
- log("LDSTmode", ldstmode, offsmul, D, ldst_ra_vec, ldst_imz_in)
+ log("LDSTmode", ldstmode, SVP64LDSTmode.BITREVERSE.value,
+ offsmul, imm, ldst_ra_vec, ldst_imz_in)
# new replacement D
if replace_d:
- self.namespace['D'] = D
+ self.namespace['D'] = imm
# "special" registers
for special in info.special_regs:
for i in range(32):
self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
- def test_sv_load_store_elementstride(self):
+ def tst_sv_load_store_elementstride(self):
""">>> lst = ["addi 1, 0, 0x0010",
"addi 2, 0, 0x0008",
"addi 5, 0, 0x1234",
self.assertEqual(sim.gpr(9), SelectableInt(0x1234, 64))
self.assertEqual(sim.gpr(10), SelectableInt(0x1235, 64))
- def test_sv_load_store_unitstride(self):
+ def tst_sv_load_store_unitstride(self):
""">>> lst = ["addi 1, 0, 0x0010",
"addi 2, 0, 0x0008",
"addi 5, 0, 0x1234",
self.assertEqual(sim.gpr(9), SelectableInt(0x1234, 64))
self.assertEqual(sim.gpr(10), SelectableInt(0x1235, 64))
+ def test_sv_load_store_bitreverse(self):
+ """>>> lst = ["addi 1, 0, 0x0010",
+ "addi 2, 0, 0x0004",
+ "addi 3, 0, 0x0002",
+ "addi 5, 0, 0x101",
+ "addi 6, 0, 0x202",
+ "addi 7, 0, 0x303",
+ "addi 8, 0, 0x404",
+ "sv.stw 5.v, 0(1)",
+ "sv.lwzbr 9.v, 4(1), 2"]
+
+ note: bitreverse mode is... odd. it's the butterfly generator
+ from Cooley-Tukey FFT:
+ https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
+
+ bitreverse LD is computed as:
+ for i in range(VL):
+ EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
+
+ bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
+ produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
+
+ and thus creates the butterfly needed for one iteration of FFT.
+ the RC (shift) is to be able to offset the LDs by Radix-2 spans
+ """
+ lst = SVP64Asm(["addi 1, 0, 0x0010",
+ "addi 2, 0, 0x0000",
+ "addi 5, 0, 0x101",
+ "addi 6, 0, 0x202",
+ "addi 7, 0, 0x303",
+ "addi 8, 0, 0x404",
+ "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
+ "sv.lwzbr 9.v, 4(1), 2"]) # bit-reversed
+ lst = list(lst)
+
+ # SVSTATE (in this case, VL=4)
+ svstate = SVP64State()
+ svstate.vl[0:7] = 4 # VL
+ svstate.maxvl[0:7] = 4 # MAXVL
+ print ("SVSTATE", bin(svstate.spr.asint()))
+
+ with Program(lst, bigendian=False) as program:
+ sim = self.run_tst_program(program, svstate=svstate)
+ mem = sim.mem.dump(printout=False)
+ print (mem)
+
+ self.assertEqual(mem, [(16, 0x020200000101),
+ (24, 0x040400000303)])
+ print(sim.gpr(1))
+ # from STs
+ self.assertEqual(sim.gpr(5), SelectableInt(0x101, 64))
+ self.assertEqual(sim.gpr(6), SelectableInt(0x202, 64))
+ self.assertEqual(sim.gpr(7), SelectableInt(0x303, 64))
+ self.assertEqual(sim.gpr(8), SelectableInt(0x404, 64))
+ # r1=0x10, RC=0, offs=4: contents of memory expected at:
+ # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
+ # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
+ # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
+ # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
+ # therefore loaded from (bit-reversed indexing):
+ # r9 => mem[0x10] which was stored from r5
+ # r10 => mem[0x18] which was stored from r6
+ # r11 => mem[0x18] which was stored from r7
+ # r12 => mem[0x1c] which was stored from r8
+ self.assertEqual(sim.gpr(9), SelectableInt(0x101, 64))
+ self.assertEqual(sim.gpr(10), SelectableInt(0x303, 64))
+ self.assertEqual(sim.gpr(11), SelectableInt(0x202, 64))
+ self.assertEqual(sim.gpr(12), SelectableInt(0x404, 64))
+
def run_tst_program(self, prog, initial_regs=None,
svstate=None):
if initial_regs is None:
"RT": self.FormX.RT,
"RA": self.FormX.RA,
"RB": self.FormX.RB,
- "RC": self.FormVA.RB,
+ "RC": self.FormVA.RC,
"SI": self.FormD.SI,
"UI": self.FormD.UI,
"L": self.FormD.L,
'sv.addi win2.v, win.v, -1',
'sv.add./mrr 5.v, 2.v, 1.v',
'sv.lhzbr 5.v, 11(9.v), 15',
+ 'sv.lwzbr 5.v, 11(9.v), 15',
]
isa = SVP64Asm(lst, macros=macros)
print ("list", list(isa))