From 6e1ff2a5c6dcfa2911b64e9e5b1c90dcdd476475 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 30 May 2021 12:40:08 +0100 Subject: [PATCH] add "normal" element-strided LD/ST decode/support to ISACaller --- src/openpower/decoder/isa/caller.py | 4 +- .../decoder/isa/test_caller_svp64_ldst.py | 45 ++++++++++++++++ src/openpower/decoder/power_svp64_rm.py | 2 +- src/openpower/sv/trans/svp64.py | 53 +++++++++++++++++-- 4 files changed, 98 insertions(+), 6 deletions(-) diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index c07003bb..d3a6fa38 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -1163,7 +1163,9 @@ class ISACaller: elif ldstmode == SVP64LDSTmode.ELSTRIDE.value: D = SelectableInt(D * offsmul, 32) replace_d = True - log("LDSTmode", ldstmode, offsmul, D) + ldst_ra_vec = yield self.dec2.rm_dec.ldst_ra_vec + ldst_imz_in = yield self.dec2.rm_dec.ldst_imz_in + log("LDSTmode", ldstmode, offsmul, D, ldst_ra_vec, ldst_imz_in) # new replacement D if replace_d: self.namespace['D'] = D diff --git a/src/openpower/decoder/isa/test_caller_svp64_ldst.py b/src/openpower/decoder/isa/test_caller_svp64_ldst.py index a9170484..0ebccea8 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_ldst.py +++ b/src/openpower/decoder/isa/test_caller_svp64_ldst.py @@ -22,6 +22,51 @@ class DecoderTestCase(FHDLTestCase): for i in range(32): self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64)) + def test_sv_load_store_elementstride(self): + """>>> lst = ["addi 1, 0, 0x0010", + "addi 2, 0, 0x0008", + "addi 5, 0, 0x1234", + "addi 6, 0, 0x1235", + "sv.stw/els 5.v, 16(1)", + "sv.lwz/els 9.v, 16(1)"] + + note: element stride mode is only enabled when RA is a scalar + and when the immediate is non-zero + + element stride is computed as: + for i in range(VL): + EA = (RA|0) + EXTS(D) * i + """ + lst = SVP64Asm(["addi 1, 0, 0x0010", + "addi 2, 0, 0x0008", + "addi 5, 0, 0x1234", + "addi 6, 0, 0x1235", + "sv.stw/els 5.v, 24(1)", # scalar r1 + 16 + 24*offs + "sv.lwz/els 9.v, 24(1)"]) # scalar r1 + 16 + 24*offs + lst = list(lst) + + # SVSTATE (in this case, VL=2) + svstate = SVP64State() + svstate.vl[0:7] = 2 # VL + svstate.maxvl[0:7] = 2 # MAXVL + print ("SVSTATE", bin(svstate.spr.asint())) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, svstate=svstate) + mem = sim.mem.dump(printout=False) + print (mem) + # contents of memory expected at: + # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10) + # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28) + # therefore, at address 0x10 ==> 0x1234 + # therefore, at address 0x28 ==> 0x1235 + expected_mem = [(16, 0x1234), + (40, 0x1235)] + self.assertEqual(mem, expected_mem) + print(sim.gpr(1)) + self.assertEqual(sim.gpr(9), SelectableInt(0x1234, 64)) + self.assertEqual(sim.gpr(10), SelectableInt(0x1235, 64)) + def test_sv_load_store_unitstride(self): """>>> lst = ["addi 1, 0, 0x0010", "addi 2, 0, 0x0008", diff --git a/src/openpower/decoder/power_svp64_rm.py b/src/openpower/decoder/power_svp64_rm.py index 90000f07..dad6853b 100644 --- a/src/openpower/decoder/power_svp64_rm.py +++ b/src/openpower/decoder/power_svp64_rm.py @@ -179,7 +179,7 @@ class SVP64RMModeDecode(Elaboratable): comb += self.ldstmode.eq(SVP64LDSTmode.UNITSTRIDE) # but if the LD/ST immediate is zero, allow cache-inhibited # loads from same location, therefore don't do element-striding - with m.Elif(self.ldst_imz_in): + with m.Elif(~self.ldst_imz_in): comb += self.ldstmode.eq(SVP64LDSTmode.ELSTRIDE) # extract src/dest predicate. use EXTRA3.MASK because EXTRA2.MASK diff --git a/src/openpower/sv/trans/svp64.py b/src/openpower/sv/trans/svp64.py index 274016a1..00a57e82 100644 --- a/src/openpower/sv/trans/svp64.py +++ b/src/openpower/sv/trans/svp64.py @@ -4,13 +4,14 @@ """SVP64 OpenPOWER v3.0B assembly translator -This class takes raw svp64 assembly mnemonics (aliases excluded) and -creates an EXT001-encoded "svp64 prefix" followed by a v3.0B opcode. +This class takes raw svp64 assembly mnemonics (aliases excluded) and creates +an EXT001-encoded "svp64 prefix" (as a .long) followed by a v3.0B opcode. It is very simple and straightforward, the only weirdness being the extraction of the register information and conversion to v3.0B numbering. Encoding format of svp64: https://libre-soc.org/openpower/sv/svp64/ +Encoding format of LDST: https://libre-soc.org/openpower/sv/ldst/ Bugtracker: https://bugs.libre-soc.org/show_bug.cgi?id=578 """ @@ -144,6 +145,7 @@ def decode_imm(field): else: return None, field + # decodes svp64 assembly listings and creates EXT001 svp64 prefixes class SVP64Asm: def __init__(self, lst, bigendian=False): @@ -416,6 +418,30 @@ class SVP64Asm: svp64_rm.extra3[idx].eq( SelectableInt(sv_extra, SVP64RM_EXTRA3_SPEC_SIZE)) + # identify if the op is a LD/ST. the "blegh" way. copied + # from power_enums. TODO, split the list _insns down. + is_ld = v30b_op in [ + "lbarx", "lbz", "lbzu", "lbzux", "lbzx", # load byte + "ld", "ldarx", "ldbrx", "ldu", "ldux", "ldx", # load double + "lfs", "lfsx", "lfsu", "lfsux", # FP load single + "lfd", "lfdx", "lfdu", "lfdux", "lfiwzx", "lfiwax", # FP load double + "lha", "lharx", "lhau", "lhaux", "lhax", # load half + "lhbrx", "lhz", "lhzu", "lhzux", "lhzx", # more load half + "lwa", "lwarx", "lwaux", "lwax", "lwbrx", # load word + "lwz", "lwzcix", "lwzu", "lwzux", "lwzx", # more load word + ] + is_st = v30b_op in [ + "stb", "stbcix", "stbcx", "stbu", "stbux", "stbx", + "std", "stdbrx", "stdcx", "stdu", "stdux", "stdx", + "stfs", "stfsx", "stfsu", "stfux", # FP store single + "stfd", "stfdx", "stfdu", "stfdux", "stfiwx", # FP store double + "sth", "sthbrx", "sthcx", "sthu", "sthux", "sthx", + "stw", "stwbrx", "stwcx", "stwu", "stwux", "stwx", + ] + # use this to determine if the SVP64 RM format is different. + # see https://libre-soc.org/openpower/sv/ldst/ + is_ldst = is_ld or is_st + # parts of svp64_rm mmode = 0 # bit 0 pmask = 0 # bits 1-3 @@ -440,6 +466,7 @@ class SVP64Asm: predresult = False failfirst = False + ldst_elstride = 0 # ok let's start identifying opcode augmentation fields for encmode in opmodes: @@ -470,6 +497,9 @@ class SVP64Asm: destwid = decode_elwidth(encmode[3:]) elif encmode.startswith("sw="): srcwid = decode_elwidth(encmode[3:]) + # element-strided LD/ST + elif encmode == 'els': + ldst_elstride = 1 # saturation elif encmode == 'sats': assert sv_mode is None @@ -539,7 +569,6 @@ class SVP64Asm: "dest-mask can only be specified on Twin-predicate ops" # construct the mode field, doing sanity-checking along the way - if mapreduce_svm: assert sv_mode == 0b00, "sub-vector mode in mapreduce only" assert subvl != 0, "sub-vector mode not possible on SUBVL=1" @@ -551,12 +580,22 @@ class SVP64Asm: assert has_pmask or mask_m_specified, \ "dest zeroing requires a dest predicate" + ###################################### # "normal" mode if sv_mode is None: mode |= src_zero << SVP64MODE.SZ # predicate zeroing mode |= dst_zero << SVP64MODE.DZ # predicate zeroing + if is_ldst: + # TODO: for now, LD/ST-indexed is ignored. + mode |= ldst_elstride << SVP64MODE.ELS_NORMAL # element-strided + else: + # TODO, reduce and subvector mode + # 00 1 dz CRM reduce mode (mapreduce), SUBVL=1 + # 00 1 SVM CRM subvector reduce mode, SUBVL>1 + pass sv_mode = 0b00 + ###################################### # "mapreduce" modes elif sv_mode == 0b00: mode |= (0b1<