From: Luke Kenneth Casson Leighton Date: Sun, 28 Mar 2021 15:48:53 +0000 (+0100) Subject: rather invasive reduction of SPR regfile size X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3473097265e97fb2e29c874bcb853ba48f35d090;p=soc.git rather invasive reduction of SPR regfile size done by dynamically creating an alternative SPR Enum --- diff --git a/src/soc/decoder/decode2execute1.py b/src/soc/decoder/decode2execute1.py index a6ac6262..6333dcdf 100644 --- a/src/soc/decoder/decode2execute1.py +++ b/src/soc/decoder/decode2execute1.py @@ -5,7 +5,8 @@ based on Anton Blanchard microwatt decode2.vhdl """ from nmigen import Signal, Record from nmutil.iocontrol import RecordObject -from soc.decoder.power_enums import MicrOp, CryIn, Function, SPR, LDSTMode +from soc.decoder.power_enums import (MicrOp, CryIn, Function, + SPRfull, SPRreduced, LDSTMode) from soc.consts import TT from soc.experiment.mem_types import LDSTException @@ -83,7 +84,13 @@ class Decode2ToOperand(IssuerDecode2ToOperand): class Decode2ToExecute1Type(RecordObject): - def __init__(self, name=None, asmcode=True, opkls=None, do=None): + def __init__(self, name=None, asmcode=True, opkls=None, do=None, + regreduce_en=False): + + if regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull if do is None and opkls is None: opkls = Decode2ToOperand diff --git a/src/soc/decoder/isa/mem.py b/src/soc/decoder/isa/mem.py index 7a806a40..a0ebcc4a 100644 --- a/src/soc/decoder/isa/mem.py +++ b/src/soc/decoder/isa/mem.py @@ -16,8 +16,6 @@ from copy import copy from soc.decoder.selectable_int import (FieldSelectableInt, SelectableInt, selectconcat) -from soc.decoder.power_enums import SPR as DEC_SPR - from soc.decoder.helpers import exts, gtu, ltu, undefined import math import sys diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py index abed03ec..c0d523d6 100644 --- a/src/soc/decoder/power_decoder2.py +++ b/src/soc/decoder/power_decoder2.py @@ -27,7 +27,8 @@ from soc.decoder.power_decoder import create_pdecode from soc.decoder.power_enums import (MicrOp, CryIn, Function, CRInSel, CROutSel, LdstLen, In1Sel, In2Sel, In3Sel, - OutSel, SPR, RC, LDSTMode, + OutSel, SPRfull, SPRreduced, + RC, LDSTMode, SVEXTRA, SVEtype, SVPtype) from soc.decoder.decode2execute1 import (Decode2ToExecute1Type, Data, Decode2ToOperand) @@ -65,13 +66,23 @@ class SPRMap(Elaboratable): """SPRMap: maps POWER9 SPR numbers to internal enum values, fast and slow """ - def __init__(self): + def __init__(self, regreduce_en): + self.regreduce_en = regreduce_en + if regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull + self.spr_i = Signal(10, reset_less=True) self.spr_o = Data(SPR, name="spr_o") self.fast_o = Data(3, name="fast_o") def elaborate(self, platform): m = Module() + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull with m.Switch(self.spr_i): for i, x in enumerate(SPR): with m.Case(x.value): @@ -90,7 +101,12 @@ class DecodeA(Elaboratable): decodes register RA, implicit and explicit CSRs """ - def __init__(self, dec): + def __init__(self, dec, regreduce_en): + self.regreduce_en = regreduce_en + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull self.dec = dec self.sel_in = Signal(In1Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) @@ -104,7 +120,7 @@ class DecodeA(Elaboratable): comb = m.d.comb op = self.dec.op reg = self.reg_out - m.submodules.sprmap = sprmap = SPRMap() + m.submodules.sprmap = sprmap = SPRMap(self.regreduce_en) # select Register A field, if *full 7 bits* are zero (2 more from SVP64) ra = Signal(5, reset_less=True) @@ -325,7 +341,12 @@ class DecodeOut(Elaboratable): decodes output register RA, RT or SPR """ - def __init__(self, dec): + def __init__(self, dec, regreduce_en): + self.regreduce_en = regreduce_en + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull self.dec = dec self.sel_in = Signal(OutSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) @@ -336,7 +357,7 @@ class DecodeOut(Elaboratable): def elaborate(self, platform): m = Module() comb = m.d.comb - m.submodules.sprmap = sprmap = SPRMap() + m.submodules.sprmap = sprmap = SPRMap(self.regreduce_en) op = self.dec.op reg = self.reg_out @@ -684,9 +705,10 @@ class PowerDecodeSubset(Elaboratable): only fields actually requested are copied over. hence, "subset" (duh). """ def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None, - svp64_en=True): + svp64_en=True, regreduce_en=False): self.svp64_en = svp64_en + self.regreduce_en = regreduce_en if svp64_en: self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field self.sv_a_nz = Signal(1) @@ -700,7 +722,8 @@ class PowerDecodeSubset(Elaboratable): # only needed for "main" PowerDecode2 if not self.final: - self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do) + self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do, + regreduce_en=regreduce_en) # create decoder if one not already given if dec is None: @@ -766,6 +789,10 @@ class PowerDecodeSubset(Elaboratable): return getattr(self.dec.op, op_field, None) def elaborate(self, platform): + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull m = Module() comb = m.d.comb state = self.state @@ -778,7 +805,8 @@ class PowerDecodeSubset(Elaboratable): name = "tmp" else: name = self.fn_name + "tmp" - self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls) + self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls, + regreduce_en=self.regreduce_en) # set up submodule decoders m.submodules.dec = self.dec @@ -909,8 +937,9 @@ class PowerDecode2(PowerDecodeSubset): """ def __init__(self, dec, opkls=None, fn_name=None, final=False, - state=None, svp64_en=True): - super().__init__(dec, opkls, fn_name, final, state, svp64_en) + state=None, svp64_en=True, regreduce_en=False): + super().__init__(dec, opkls, fn_name, final, state, svp64_en, + regreduce_en=False) self.exc = LDSTException("dec2_exc") if self.svp64_en: @@ -968,10 +997,10 @@ class PowerDecode2(PowerDecodeSubset): # copy over if non-exception, non-privileged etc. is detected # set up submodule decoders - m.submodules.dec_a = dec_a = DecodeA(self.dec) + m.submodules.dec_a = dec_a = DecodeA(self.dec, self.regreduce_en) m.submodules.dec_b = dec_b = DecodeB(self.dec) m.submodules.dec_c = dec_c = DecodeC(self.dec) - m.submodules.dec_o = dec_o = DecodeOut(self.dec) + m.submodules.dec_o = dec_o = DecodeOut(self.dec, self.regreduce_en) m.submodules.dec_o2 = dec_o2 = DecodeOut2(self.dec) m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec) m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec) diff --git a/src/soc/decoder/power_enums.py b/src/soc/decoder/power_enums.py index faa8cfc1..6c10306b 100644 --- a/src/soc/decoder/power_enums.py +++ b/src/soc/decoder/power_enums.py @@ -8,6 +8,10 @@ Note: for SV, from v3.1B p12: The designated SPR sandbox consists of non-privileged SPRs 704-719 and privileged SPRs 720-735. + +Note: the option exists to select a much shorter list of SPRs, to reduce +regfile size in HDL. this is SPRreduced and the supported list is in +get_spr_enum """ from enum import Enum, unique @@ -430,19 +434,35 @@ class CROutSel(Enum): # http://libre-riscv.org/openpower/isatables/sprs.csv # http://bugs.libre-riscv.org/show_bug.cgi?id=261 -spr_csv = get_csv("sprs.csv") -spr_info = namedtuple('spr_info', 'SPR priv_mtspr priv_mfspr length idx') -spr_dict = {} -spr_byname = {} -for row in spr_csv: - info = spr_info(SPR=row['SPR'], priv_mtspr=row['priv_mtspr'], - priv_mfspr=row['priv_mfspr'], length=int(row['len']), - idx=int(row['Idx'])) - spr_dict[int(row['Idx'])] = info - spr_byname[row['SPR']] = info -fields = [(row['SPR'], int(row['Idx'])) for row in spr_csv] -SPR = Enum('SPR', fields) - +def get_spr_enum(full_file): + """get_spr_enum - creates an Enum of SPRs, dynamically + has the option to reduce the enum to a much shorter list. + this saves drastically on the size of the regfile + """ + short_list = {'PIDR', 'DAR', 'PRTBL', 'DSIRS', 'SVSRR', 'SVSTATE', + 'SPRG0_priv', 'SPRG1_priv', 'SPRG2_priv', 'SPRG3_priv', + 'SPRG3' + } + spr_csv = [] + for row in get_csv("sprs.csv"): + if full_file or row['SPR'] in short_list: + spr_csv.append(row) + + spr_info = namedtuple('spr_info', 'SPR priv_mtspr priv_mfspr length idx') + spr_dict = {} + spr_byname = {} + for row in spr_csv: + info = spr_info(SPR=row['SPR'], priv_mtspr=row['priv_mtspr'], + priv_mfspr=row['priv_mfspr'], length=int(row['len']), + idx=int(row['Idx'])) + spr_dict[int(row['Idx'])] = info + spr_byname[row['SPR']] = info + fields = [(row['SPR'], int(row['Idx'])) for row in spr_csv] + SPR = Enum('SPR', fields) + return SPR, spr_dict, spr_byname + +SPRfull, spr_dict, spr_byname = get_spr_enum(full_file=True) +SPRreduced, _, _ = get_spr_enum(full_file=False) XER_bits = { 'SO': 32, @@ -454,11 +474,13 @@ XER_bits = { if __name__ == '__main__': # find out what the heck is in SPR enum :) - print("sprs", len(SPR)) - print(dir(SPR)) + print("sprs full", len(SPRfull)) + print(dir(SPRfull)) + print("sprs reduced", len(SPRreduced)) + print(dir(SPRreduced)) print(dir(Enum)) - print(SPR.__members__['TAR']) - for x in SPR: + print(SPRfull.__members__['TAR']) + for x in SPRfull: print(x, x.value, str(x), x.name) print("function", Function.ALU.name) diff --git a/src/soc/fu/mmu/fsm.py b/src/soc/fu/mmu/fsm.py index bd0c36ee..76cff311 100644 --- a/src/soc/fu/mmu/fsm.py +++ b/src/soc/fu/mmu/fsm.py @@ -10,7 +10,7 @@ from soc.experiment.dcache import DCache from soc.decoder.power_fields import DecodeFields from soc.decoder.power_fieldsn import SignalBitRange from soc.decoder.power_decoder2 import decode_spr_num -from soc.decoder.power_enums import MicrOp, SPR, XER_bits +from soc.decoder.power_enums import MicrOp, XER_bits from soc.experiment.pimem import PortInterface from soc.experiment.pimem import PortInterfaceBase diff --git a/src/soc/fu/spr/main_stage.py b/src/soc/fu/spr/main_stage.py index 1a1d5d4e..cca0c24e 100644 --- a/src/soc/fu/spr/main_stage.py +++ b/src/soc/fu/spr/main_stage.py @@ -7,7 +7,7 @@ from nmigen import (Module, Signal, Cat) from nmutil.pipemodbase import PipeModBase from soc.fu.spr.pipe_data import SPRInputData, SPROutputData -from soc.decoder.power_enums import MicrOp, SPR, XER_bits +from soc.decoder.power_enums import MicrOp, SPRfull, SPRreduced, XER_bits from soc.decoder.power_fields import DecodeFields from soc.decoder.power_fieldsn import SignalBitRange @@ -17,6 +17,10 @@ from soc.decoder.power_decoder2 import decode_spr_num class SPRMainStage(PipeModBase): def __init__(self, pspec): super().__init__(pspec, "spr_main") + # test if regfiles are reduced + self.regreduce_en = (hasattr(pspec, "regreduce") and + (pspec.regreduce == True)) + self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn]) self.fields.create_specs() @@ -27,6 +31,10 @@ class SPRMainStage(PipeModBase): return SPROutputData(self.pspec) def elaborate(self, platform): + if self.regreduce_en: + SPR = SPRreduced + else: + SPR = SPRfull m = Module() comb = m.d.comb op = self.i.ctx.op diff --git a/src/soc/regfile/regfiles.py b/src/soc/regfile/regfiles.py index 50286b4a..fa899461 100644 --- a/src/soc/regfile/regfiles.py +++ b/src/soc/regfile/regfiles.py @@ -26,7 +26,7 @@ Links: from soc.regfile.regfile import RegFile, RegFileArray, RegFileMem from soc.regfile.virtual_port import VirtualRegPort -from soc.decoder.power_enums import SPR +from soc.decoder.power_enums import SPRfull, SPRreduced # "State" Regfile @@ -177,7 +177,10 @@ class SPRRegs(RegFileMem): * write-through capability (read on same cycle as write) """ def __init__(self, svp64_en=False, regreduce_en=False): - n_sprs = len(SPR) + if regreduce_en: + n_sprs = len(SPRreduced) + else: + n_sprs = len(SPRfull) super().__init__(width=64, depth=n_sprs) self.w_ports = {'spr1': self.write_port("spr1")} self.r_ports = {'spr1': self.read_port("spr1")} diff --git a/src/soc/regfile/util.py b/src/soc/regfile/util.py index 5ccc4c54..e5f095dc 100644 --- a/src/soc/regfile/util.py +++ b/src/soc/regfile/util.py @@ -1,6 +1,8 @@ from soc.regfile.regfiles import FastRegs -from soc.decoder.power_enums import SPR, spr_dict +from soc.decoder.power_enums import SPRfull as SPR, spr_dict +# note that we can get away with using SPRfull here because the values +# (numerical values) are what is used for lookup. spr_to_fast = { SPR.CTR: FastRegs.CTR, SPR.LR: FastRegs.LR, SPR.TAR: FastRegs.TAR, diff --git a/src/soc/simple/issuer.py b/src/soc/simple/issuer.py index 78654ee0..7b55939d 100644 --- a/src/soc/simple/issuer.py +++ b/src/soc/simple/issuer.py @@ -158,6 +158,10 @@ class TestIssuerInternal(Elaboratable): # test is SVP64 is to be enabled self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True) + # and if regfiles are reduced + self.regreduce_en = (hasattr(pspec, "regreduce") and + (pspec.regreduce == True)) + # JTAG interface. add this right at the start because if it's # added it *modifies* the pspec, by adding enable/disable signals # for parts of the rest of the core @@ -207,7 +211,8 @@ class TestIssuerInternal(Elaboratable): self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE) self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state, opkls=IssuerDecode2ToOperand, - svp64_en=self.svp64_en) + svp64_en=self.svp64_en, + regreduce_en=self.regreduce_en) if self.svp64_en: self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix diff --git a/src/soc/simple/test/test_core.py b/src/soc/simple/test/test_core.py index 0bf1ee42..a7387e85 100644 --- a/src/soc/simple/test/test_core.py +++ b/src/soc/simple/test/test_core.py @@ -14,7 +14,9 @@ from soc.decoder.power_decoder import create_pdecode from soc.decoder.power_decoder2 import PowerDecode2 from soc.decoder.selectable_int import SelectableInt from soc.decoder.isa.all import ISA -from soc.decoder.power_enums import SPR, spr_dict, Function, XER_bits + +# note that for testing using SPRfull should be ok here +from soc.decoder.power_enums import SPRfull as SPR, spr_dict, Function, XER_bits from soc.config.test.test_loadstore import TestMemPspec from soc.config.endian import bigendian diff --git a/src/soc/simple/test/test_runner.py b/src/soc/simple/test/test_runner.py index fd912bed..41b5a76a 100644 --- a/src/soc/simple/test/test_runner.py +++ b/src/soc/simple/test/test_runner.py @@ -155,10 +155,11 @@ class TestRunner(FHDLTestCase): dmi = issuer.dbg.dmi pdecode2 = issuer.pdecode2 l0 = core.l0 + regreduce_en = pspec.regreduce_en == True # copy of the decoder for simulator simdec = create_pdecode() - simdec2 = PowerDecode2(simdec) + simdec2 = PowerDecode2(simdec, regreduce_en=regreduce_en) m.submodules.simdec2 = simdec2 # pain in the neck # run core clock at same rate as test clock diff --git a/src/soc/simulator/test_sim.py b/src/soc/simulator/test_sim.py index 1fe38e73..4d586fa7 100644 --- a/src/soc/simulator/test_sim.py +++ b/src/soc/simulator/test_sim.py @@ -6,7 +6,7 @@ from soc.decoder.power_decoder import (create_pdecode) from soc.decoder.power_enums import (Function, MicrOp, In1Sel, In2Sel, In3Sel, OutSel, RC, LdstLen, CryIn, - single_bit_flags, Form, SPR, + single_bit_flags, Form, get_signal_name, get_csv) from soc.decoder.power_decoder2 import (PowerDecode2) from soc.simulator.program import Program