SVSHAPE1[28:29] <- 0b10 # ci schedule
SVSHAPE2[28:29] <- 0b11 # size schedule
# set schedule up for iDCT / DCT inverse of half-swapped ordering
- if (SVRM = 0b0110) | (SVRM = 0b1110) then
+ if (SVRM = 0b0110) | (SVRM = 0b1110) | (SVRM = 0b1111) then
vlen[0:6] <- (0b00 || SVxd) + 0b0000001
# set up template in SVSHAPE0
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
if (SVRM = 0b1110) then
SVSHAPE0[18:20] <- 0b001 # DCT opposite half-swap
- SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
+ if (SVRM = 0b1111) then
+ SVSHAPE0[30:31] <- 0b01 # FFT mode
+ else
+ SVSHAPE0[30:31] <- 0b11 # DCT mode
SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
# set VL, MVL and Vertical-First
SVSTATE[0:6] <- vlen
b <- (RA|0)
n <- (RC)[58:63]
- EA <- b + SHL64(EXTS(SVD), n)
+ EA <- b + SHL64(srcstep * EXTS(SVD), n)
RT <- [0]*56 || MEM(EA, 1)
Special Registers Altered:
Pseudo-code:
n <- (RC)[58:63]
- EA <- (RA) + SHL64(EXTS(SVD), n)
+ EA <- (RA) + SHL64(srcstep * EXTS(SVD), n)
RT <- [0] * 56 || MEM(EA, 1)
RA <- EA
b <- (RA|0)
n <- (RC)[58:63]
- EA <- b + SHL64(EXTS(SVD), n)
+ EA <- b + SHL64(srcstep * EXTS(SVD), n)
RT <- [0] * 48 || MEM(EA, 2)
Special Registers Altered:
Pseudo-code:
n <- (RC)[58:63]
- EA <- (RA) + SHL64(EXTS(SVD), n)
+ EA <- (RA) + SHL64(srcstep * EXTS(SVD), n)
RT <- [0] * 48 || MEM(EA, 2)
RA <- EA
b <- (RA|0)
n <- (RC)[58:63]
- EA <- b + SHL64(EXTS(SVD), n)
+ EA <- b + SHL64(srcstep * EXTS(SVD), n)
RT <- EXTS(MEM(EA, 2))
Special Registers Altered:
Pseudo-code:
n <- (RC)[58:63]
- EA <- (RA) + SHL64(EXTS(SVD), n)
+ EA <- (RA) + SHL64(srcstep * EXTS(SVD), n)
RT <- EXTS(MEM(EA, 2))
RA <- EA
b <- (RA|0)
n <- (RC)[58:63]
- EA <- b + SHL64(EXTS(SVD), n)
+ EA <- b + SHL64(srcstep * EXTS(SVD), n)
RT <- [0] * 32 || MEM(EA, 4)
Special Registers Altered:
Pseudo-code:
n <- (RC)[58:63]
- EA <- (RA) + SHL64(EXTS(SVD), n)
+ EA <- (RA) + SHL64(srcstep * EXTS(SVD), n)
RT <- [0]*32 || MEM(EA, 4)
RA <- EA
b <- (RA|0)
n <- (RC)[58:63]
- EA <- b + SHL64(bitrev(EXTS(SVDS || 0b00), n)
+ EA <- b + SHL64(srcstep * EXTS(SVDS || 0b00), n)
RT <- EXTS(MEM(EA, 4))
Special Registers Altered:
b <- (RA|0)
n <- (RC)[58:63]
- EA <- b + SHL64(EXTS(SVDS || 0b00), n)
+ EA <- b + SHL64(srcstep * EXTS(SVDS || 0b00), n)
RT <- MEM(EA, 8)
Special Registers Altered:
Pseudo-code:
n <- (RC)[58:63]
- EA <- (RA) + SHL64(EXTS(SVDS || 0b00), n)
+ EA <- (RA) + SHL64(srcstep * EXTS(SVDS || 0b00), n)
RT <- MEM(EA, 8)
RA <- EA
SVD-Form
-* lfsbr FRT,SVD(RA),RC
+* lfssh FRT,SVD(RA),RC
Pseudo-code:
b <- (RA|0)
n <- (RC)[58:63]
- EA <- b + SHL64(bitrev(srcstep, VL) * EXTS(SVD), n)
+ EA <- b + SHL64(srcstep * EXTS(SVD), n)
FRT <- DOUBLE(MEM(EA, 4))
Special Registers Altered:
SVD-Form
-* lfsubr FRT,SVD(RA),RC
+* lfsush FRT,SVD(RA),RC
Pseudo-code:
n <- (RC)[58:63]
- EA <- (RA) + SHL64(bitrev(srcstep, VL) * EXTS(SVD), n)
+ EA <- (RA) + SHL64(srcstep * EXTS(SVD), n)
FRT <- DOUBLE(MEM(EA, 4))
RA <- EA
SVD-Form
-* lfdbr FRT,SVD(RA),RC
+* lfdsh FRT,SVD(RA),RC
Pseudo-code:
b <- (RA|0)
n <- (RC)[58:63]
- EA <- b + SHL64(bitrev(srcstep, VL) * EXTS(SVD), n)
+ EA <- b + SHL64(srcstep * EXTS(SVD), n)
FRT <- MEM(EA, 8)
Special Registers Altered:
SVD-Form
-* lfdubr FRT,SVD(RA),RC
+* lfdush FRT,SVD(RA),RC
Pseudo-code:
n <- (RC)[58:63]
- EA <- (RA) + SHL64(bitrev(srcstep, VL) * EXTS(SVD), n)
+ EA <- (RA) + SHL64(srcstep * EXTS(SVD), n)
FRT <- MEM(EA, 8)
RA <- EA
# mode bits
MOD2_MSB = 0
MOD2_LSB = 1
- LDST_BITREV = 2 # set =1 for bitreverse mode
+ LDST_SHIFT = 2 # set =1 for shift mode
# when predicate not set: 0=ignore/skip 1=zero
DZ = 3 # for destination
SZ = 4 # for source
from openpower.decoder.power_enums import SVPtype
-from openpower.decoder.helpers import (exts, gtu, ltu, undefined, bitrev)
+from openpower.decoder.helpers import (exts, gtu, ltu, undefined)
from openpower.consts import PIb, MSRb # big-endian (PowerISA versions)
from openpower.consts import SVP64CROffs
from openpower.decoder.power_svp64 import SVP64RM, decode_extra
yield self.dec2.dec.raw_opcode_in.eq(ins & 0xffffffff) # v3.0B suffix
yield self.dec2.sv_rm.eq(sv_rm) # svp64 prefix
yield Settle()
- # store this for use in get_src_dststeps()
- self.ldstmode = yield self.dec2.rm_dec.ldstmode
def execute_one(self):
"""execute one instruction
replace_d = False # update / replace constant in pseudocode
if self.is_svp64_mode:
ldstmode = yield self.dec2.rm_dec.ldstmode
- # bitreverse mode reads SVD (or SVDS - TODO)
+ # shift mode reads SVD (or SVDS - TODO)
# *BUT*... because this is "overloading" of LD operations,
# it gets *STORED* into D (or DS, TODO)
- if ldstmode == SVP64LDSTmode.BITREVERSE.value:
+ if ldstmode == SVP64LDSTmode.SHIFT.value:
imm = yield self.dec2.dec.fields.FormSVD.SVD[0:11]
imm = exts(imm, 11) # sign-extend to integer
- log ("bitrev SVD", imm)
+ log ("shift SVD", imm)
replace_d = True
else:
if info.form == 'DS':
offsmul = dststep
log("D-field dst", imm, offsmul)
# bit-reverse mode, rev already done through get_src_dst_steps()
- if ldstmode == SVP64LDSTmode.BITREVERSE.value:
+ if ldstmode == SVP64LDSTmode.SHIFT.value:
# manually look up RC, sigh
RC = yield self.dec2.dec.RC[0:5]
RC = self.gpr(RC)
- log ("LD-BITREVERSE:", "VL", vl,
+ log ("LD-SHIFT:", "VL", vl,
"RC", RC.value, "imm", imm,
"offs", bin(offsmul),
)
log (" new dststep", dststep)
def get_src_dststeps(self):
- """gets srcstep and dststep but performs bit-reversal on srcstep if
- required. use this ONLY to perform calculations, do NOT update
- SVSTATE with the bit-reversed value of srcstep
-
- ARGH, had to store self.ldstmode and VL due to yield issues
+ """gets srcstep and dststep
"""
- srcstep, dststep = self.new_srcstep, self.new_dststep
- if self.is_svp64_mode:
- if self.ldstmode == SVP64LDSTmode.BITREVERSE.value:
- vl = self.svstate.vl
- log ("SRCSTEP-BITREVERSE:", "VL", vl, "srcstep", srcstep,
- "rev", bin(bitrev(srcstep, vl)))
- srcstep = bitrev(srcstep, vl)
-
- return (srcstep, dststep)
+ return self.new_srcstep, self.new_dststep
def update_new_svstate_steps(self):
# note, do not get the bit-reversed srcstep here!
ji = list(range(n))
levels = n.bit_length() - 1
- if SVSHAPE.submode2 == 0b001:
+ ri = [reverse_bits(i, levels) for i in range(n)]
+
+ if SVSHAPE.mode == 0b01: # FFT, bitrev only
+ ji = [ji[ri[i]] for i in range(n)]
+ elif SVSHAPE.submode2 == 0b001:
+ ji = [ji[ri[i]] for i in range(n)]
ji = halfrev2(ji, True)
else:
ji = halfrev2(ji, False)
-
- if False: # swap: TODO, add extra bit-reverse mode
- ri = [reverse_bits(i, levels) for i in range(n)]
ji = [ji[ri[i]] for i in range(n)]
# invert order if requested
iterate_fn = iterate_dct_outer_butterfly_indices
elif self.ydimsz in [5, 13]:
iterate_fn = iterate_dct_inner_costable_indices
- elif self.ydimsz == 6:
+ elif self.ydimsz in [6, 14, 15]:
iterate_fn = iterate_dct_inner_halfswap_loadstore
# create a **NEW** iterator each time this is called
return iterate_fn(deepcopy(self))
""">>> lst = [# LOAD bit-reversed with half-swap
"svshape 8, 1, 1, 6, 0",
"svremap 1, 0, 0, 0, 0, 0, 0, 1",
- "sv.lfsbr 0.v, 4(1), 2",
+ "sv.lfssh 0.v, 4(1), 2",
# Inner butterfly, twin +/- MUL-ADD-SUB
"svremap 31, 1, 0, 2, 0, 1, 1",
"svshape 8, 1, 1, 4, 0",
lst = SVP64Asm( ["addi 1, 0, 0x000",
"svshape 8, 1, 1, 6, 0",
"svremap 1, 0, 0, 0, 0, 0, 0, 1",
- "sv.lfsbr 0.v, 4(1), 2",
+ "sv.lfssh 0.v, 4(1), 2",
"svremap 31, 1, 0, 2, 0, 1, 1",
"svshape 8, 1, 1, 4, 0",
"sv.fdmadds 0.v, 0.v, 0.v, 8.v",
however it turns out that they can be *merged*, and for
the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
- *ignore* their REMAPs (by definition), and for the second
- one (sv.ffads) exactly the right REMAPs are also ignored!
+ *ignore* their REMAPs (by definition, because you can't REMAP
+ scalar operands), and for the second one (sv.ffads) exactly the
+ right REMAPs are also ignored!
+ therefore we can merge:
+ "svremap 5, 1, 0, 2, 0, 0, 1",
+ "svremap 26, 0, 0, 0, 0, 1, 1",
+ into:
"svremap 31, 1, 0, 2, 0, 1, 1",
+ and save one instruction.
"""
lst = SVP64Asm( [
# set triple butterfly mode with persistent "REMAP"
def test_sv_remap_fpmadds_fft_ldst(self):
""">>>lst = ["setvl 0, 0, 8, 0, 1, 1",
- "sv.lfsbr 0.v, 4(0), 20", # bit-reversed
+ "sv.lfssh 0.v, 4(0), 20", # bit-reversed
"svshape 8, 1, 1, 1, 0",
"svremap 31, 1, 0, 2, 0, 1, 0",
"sv.ffmadds 0.v, 0.v, 0.v, 8.v"
runs a full in-place O(N log2 N) butterfly schedule for
Discrete Fourier Transform, using bit-reversed LD/ST
"""
- lst = SVP64Asm( ["setvl 0, 0, 8, 0, 1, 1",
- "sv.lfsbr 0.v, 4(0), 20", # bit-reversed
+ lst = SVP64Asm( ["svshape 8, 1, 1, 15, 0",
+ "svremap 1, 0, 0, 0, 0, 0, 0, 0",
+ "sv.lfssh 0.v, 4(0), 20", # shifted
"svshape 8, 1, 1, 1, 0",
"svremap 31, 1, 0, 2, 0, 1, 0",
"sv.ffmadds 0.v, 0.v, 0.v, 8.v"
self.assertEqual(sim.gpr(12), SelectableInt(0x1234, 64))
self.assertEqual(sim.gpr(13), SelectableInt(0x1235, 64))
- def test_sv_load_store_bitreverse(self):
+ def test_sv_load_store_shifted(self):
""">>> lst = ["addi 1, 0, 0x0010",
"addi 2, 0, 0x0004",
"addi 3, 0, 0x0002",
"addi 7, 0, 0x303",
"addi 8, 0, 0x404",
"sv.stw 5.v, 0(1)",
- "sv.lwzbr 12.v, 4(1), 2"]
+ "sv.lwzsh 12.v, 4(1), 2"]
- note: bitreverse mode is... odd. it's the butterfly generator
- from Cooley-Tukey FFT:
- https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
-
- bitreverse LD is computed as:
+ shifted LD is computed as:
for i in range(VL):
- EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
-
- bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
- produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
-
- and thus creates the butterfly needed for one iteration of FFT.
- the RC (shift) is to be able to offset the LDs by Radix-2 spans
+ EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
"""
lst = SVP64Asm(["addi 1, 0, 0x0010",
"addi 2, 0, 0x0000",
"addi 7, 0, 0x303",
"addi 8, 0, 0x404",
"sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
- "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
+ "sv.lwzsh 12.v, 4(1), 2"]) # bit-reversed
lst = list(lst)
# SVSTATE (in this case, VL=4)
self.assertEqual(sim.gpr(7), SelectableInt(0x303, 64))
self.assertEqual(sim.gpr(8), SelectableInt(0x404, 64))
# r1=0x10, RC=0, offs=4: contents of memory expected at:
- # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
- # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
- # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
- # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
+ # element 0: EA = r1 + 0b00*4 => 0x10 + 0b00*4 => 0x10
+ # element 1: EA = r1 + 0b01*4 => 0x10 + 0b01*4 => 0x18
+ # element 2: EA = r1 + 0b10*4 => 0x10 + 0b10*4 => 0x14
+ # element 3: EA = r1 + 0b11*4 => 0x10 + 0b11*4 => 0x1c
# therefore loaded from (bit-reversed indexing):
# r9 => mem[0x10] which was stored from r5
# r10 => mem[0x18] which was stored from r6
# r11 => mem[0x18] which was stored from r7
# r12 => mem[0x1c] which was stored from r8
self.assertEqual(sim.gpr(12), SelectableInt(0x101, 64))
- self.assertEqual(sim.gpr(13), SelectableInt(0x303, 64))
- self.assertEqual(sim.gpr(14), SelectableInt(0x202, 64))
+ self.assertEqual(sim.gpr(13), SelectableInt(0x202, 64))
+ self.assertEqual(sim.gpr(14), SelectableInt(0x303, 64))
self.assertEqual(sim.gpr(15), SelectableInt(0x404, 64))
- def test_sv_load_store_bitreverse_fp(self):
+ def test_sv_load_store_shifted_fp(self):
""">>> lst = ["addi 1, 0, 0x0010",
"addi 2, 0, 0x0004",
"addi 3, 0, 0x0002",
"sv.std 5.v, 0(1)",
"sv.lfdbr 12.v, 4(1), 2"]
- note: bitreverse mode is... odd. it's the butterfly generator
- from Cooley-Tukey FFT:
- https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
-
- bitreverse LD is computed as:
+ shifted LD is computed as:
for i in range(VL):
- EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
-
- bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
- produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
-
- and thus creates the butterfly needed for one iteration of FFT.
- the RC (shift) is to be able to offset the LDs by Radix-2 spans
+ EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
"""
lst = SVP64Asm(["addi 1, 0, 0x0010",
"addi 2, 0, 0x0000",
"addi 7, 0, 0x303",
"addi 8, 0, 0x404",
"sv.std 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
- "sv.lfdbr 12.v, 8(1), 2"]) # bit-reversed
+ "sv.lfdsh 12.v, 8(1), 2"]) # shifted
lst = list(lst)
# SVSTATE (in this case, VL=4)
# r11 => mem[0x18] which was stored from r7
# r12 => mem[0x1c] which was stored from r8
self.assertEqual(sim.fpr(12), SelectableInt(0x101, 64))
- self.assertEqual(sim.fpr(13), SelectableInt(0x303, 64))
- self.assertEqual(sim.fpr(14), SelectableInt(0x202, 64))
+ self.assertEqual(sim.fpr(13), SelectableInt(0x202, 64))
+ self.assertEqual(sim.fpr(14), SelectableInt(0x303, 64))
self.assertEqual(sim.fpr(15), SelectableInt(0x404, 64))
- def test_sv_load_store_bitreverse2(self):
+ def test_sv_load_store_shifted2(self):
""">>> lst = ["addi 1, 0, 0x0010",
"addi 2, 0, 0x0004",
"addi 3, 0, 0x0002",
"sv.stfs 4.v, 0(1)",
- "sv.lfsbr 12.v, 4(1), 2"]
-
- note: bitreverse mode is... odd. it's the butterfly generator
- from Cooley-Tukey FFT:
- https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
+ "sv.lfssh 12.v, 4(1), 2"]
- bitreverse LD is computed as:
+ shifted LD is computed as:
for i in range(VL):
- EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
-
- bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
- produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
+ EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
- and thus creates the butterfly needed for one iteration of FFT.
- the RC (shift) is to be able to offset the LDs by Radix-2 spans
"""
lst = SVP64Asm(["addi 1, 0, 0x0010",
"addi 2, 0, 0x0000",
"sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
- "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed
+ "sv.lfssh 12.v, 4(1), 2"]) # shifted (by zero, but hey)
lst = list(lst)
# SVSTATE (in this case, VL=4)
# expected results, remember that bit-reversed load has been done
expected_fprs = deepcopy(fprs)
expected_fprs[12] = fprs[4] # 0b00 -> 0b00
- expected_fprs[13] = fprs[6] # 0b01 -> 0b10
- expected_fprs[14] = fprs[5] # 0b10 -> 0b01
+ expected_fprs[13] = fprs[5] # 0b10 -> 0b01
+ expected_fprs[14] = fprs[6] # 0b01 -> 0b10
expected_fprs[15] = fprs[7] # 0b11 -> 0b11
with Program(lst, bigendian=False) as program:
"svshape 3, 3, 4, 0, 0",
"svremap 1, 1, 2, 0, 0, 0, 0, 1",
"sv.lwz 20.v, 0(1)",
- #"sv.lwzbr 12.v, 4(1), 2", # bit-reversed
+ #"sv.lwzsh 12.v, 4(1), 2", # bit-reversed
])
lst = list(lst)
"sv.stw 5.v, 0(1)",
"svshape 8, 1, 1, 6, 0",
"svremap 31, 1, 2, 3, 0, 0, 0, 0",
- "sv.lwzbr 12.v, 4(1), 2"]
+ "sv.lwzsh 12.v, 4(1), 2"]
- bitreverse LD is computed as:
+ shifted LD is computed as:
for i in range(VL):
- EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
+ EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
"svshape 8, 1, 1, 6, 0",
"svremap 1, 0, 0, 0, 0, 0, 0, 1",
#"setvl 0, 0, 8, 0, 1, 1",
- "sv.lwzbr 12.v, 4(1), 2", # bit-reversed
+ "sv.lwzsh 12.v, 4(1), 2", # bit-reversed
#"sv.lwz 12.v, 0(1)"
])
lst = list(lst)
"sv.stw 5.v, 0(1)",
"svshape 8, 1, 1, 6, 0",
"svremap 31, 1, 2, 3, 0, 0, 0, 0",
- "sv.lwzbr 12.v, 4(1), 2"]
+ "sv.lwzsh 12.v, 4(1), 2"]
bitreverse LD is computed as:
for i in range(VL):
- EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
+ EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
"svshape 8, 1, 1, 14, 0",
"svremap 16, 0, 0, 0, 0, 0, 0, 1",
#"setvl 0, 0, 8, 0, 1, 1",
- "sv.lwzbr 12.v, 4(1), 2", # bit-reversed
+ "sv.lwzsh 12.v, 4(1), 2", # bit-reversed
#"sv.lwz 12.v, 0(1)"
])
lst = list(lst)
with m.If(self.is_svp64_mode & is_major_ld):
# straight-up: "it's a LD". this gives enough info
# for SVP64 RM Mode decoding to detect LD/ST, and
- # consequently detect the BITREVERSE mode. sigh
+ # consequently detect the SHIFT mode. sigh
comb += rm_dec.fn_in.eq(Function.LDST)
with m.Else():
comb += rm_dec.fn_in.eq(fn) # decode needs to know Fn type
# main PowerDecoder2 determines if different SVP64 modes enabled
if not self.final:
- # if bit-reverse mode requested
- bitrev = rm_dec.ldstmode == SVP64LDSTmode.BITREVERSE
- comb += self.use_svp64_ldst_dec.eq(bitrev)
+ # if shift mode requested
+ shiftmode = rm_dec.ldstmode == SVP64LDSTmode.SHIFT
+ comb += self.use_svp64_ldst_dec.eq(shiftmode)
# detect if SVP64 FFT mode enabled (really bad hack),
# exclude fcfids and others
# XXX this is a REALLY bad hack, REALLY has to be done better.
INDEXED = 1
ELSTRIDE = 2
UNITSTRIDE = 3
- BITREVERSE = 4
+ SHIFT = 4
# supported instructions: make sure to keep up-to-date with CSV files
with m.If(self.rc_in):
comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
- # Bit-reversed Mode
- with m.If(mode[SVP64MODE.LDST_BITREV]):
- comb += self.ldstmode.eq(SVP64LDSTmode.BITREVERSE)
+ # Shifted Mode
+ with m.If(mode[SVP64MODE.LDST_SHIFT]):
+ comb += self.ldstmode.eq(SVP64LDSTmode.SHIFT)
# RA is vectorised
with m.Elif(self.ldst_ra_vec):
comb += self.ldstmode.eq(SVP64LDSTmode.INDEXED)
v30b_op = v30b_op[:-1]
# sigh again, have to recognised LD/ST bit-reverse instructions
- # this has to be "processed" to fit into a v3.0B without the "br"
- # e.g. ldbr is actually ld
- ldst_bitreverse = v30b_op.startswith("l") and v30b_op.endswith("br")
+ # this has to be "processed" to fit into a v3.0B without the "sh"
+ # e.g. ldsh is actually ld
+ ldst_shift = v30b_op.startswith("l") and v30b_op.endswith("sh")
if v30b_op not in isa.instr:
raise Exception("opcode %s of '%s' not supported" % \
(v30b_op, insn))
- if ldst_bitreverse:
+ if ldst_shift:
# okaay we need to process the fields and make this:
- # ldbr RT, SVD(RA), RC - 11 bits for SVD, 5 for RC
+ # ldsh RT, SVD(RA), RC - 11 bits for SVD, 5 for RC
# into this:
# ld RT, D(RA) - 16 bits
# likewise same for SVDS (9 bits for SVDS, 5 for RC, 14 bits for DS)
newfields[1] = "%d(%s)" % (immed, RA)
fields = newfields
- # and strip off "br" from end, and add "br" to opmodes, instead
+ # and strip off "sh" from end, and add "sh" to opmodes, instead
v30b_op = v30b_op[:-2]
- opmodes.append("br")
+ opmodes.append("sh")
log ("rewritten", v30b_op, opmodes, fields)
if v30b_op not in svp64.instrs:
smmode, smask = decode_predicate(encmode[3:])
mmode = smmode
has_smask = True
- # bitreverse LD/ST
- elif encmode.startswith("br"):
- ldst_bitreverse = True
+ # shifted LD/ST
+ elif encmode.startswith("sh"):
+ ldst_shift = True
# vec2/3/4
elif encmode.startswith("vec"):
subvl = decode_subvl(encmode[3:])
assert has_pmask or mask_m_specified, \
"dest zeroing requires a dest predicate"
- # check LDST bitreverse, only available in "normal" mode
- if is_ldst and ldst_bitreverse:
+ # check LDST shifted, only available in "normal" mode
+ if is_ldst and ldst_shift:
assert sv_mode is None, \
- "LD bit-reverse cannot have modes (%s) applied" % sv_mode
+ "LD shift cannot have modes (%s) applied" % sv_mode
######################################
# "normal" mode
if is_ldst:
# TODO: for now, LD/ST-indexed is ignored.
mode |= ldst_elstride << SVP64MODE.ELS_NORMAL # element-strided
- # bitreverse mode
- if ldst_bitreverse:
- mode |= 1 << SVP64MODE.LDST_BITREV
+ # shifted mode
+ if ldst_shift:
+ mode |= 1 << SVP64MODE.LDST_SHIFT
else:
# TODO, reduce and subvector mode
# 00 1 dz CRM reduce mode (mapreduce), SUBVL=1
lst = [
'sv.addi win2.v, win.v, -1',
'sv.add./mrr 5.v, 2.v, 1.v',
- #'sv.lhzbr 5.v, 11(9.v), 15',
- #'sv.lwzbr 5.v, 11(9.v), 15',
+ #'sv.lhzsh 5.v, 11(9.v), 15',
+ #'sv.lwzsh 5.v, 11(9.v), 15',
'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
]
lst = [
'svshape 8, 1, 1, 1, 1',
]
lst = [
- #'sv.lfsbr 4.v, 11(8.v), 15',
- #'sv.lwzbr 4.v, 11(8.v), 15',
+ #'sv.lfssh 4.v, 11(8.v), 15',
+ #'sv.lwzsh 4.v, 11(8.v), 15',
#'sv.svstep. 2.v, 4, 0',
#'sv.fcfids. 48.v, 64.v',
'sv.fcoss. 80.v, 0.v',