X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fsimple%2Fissuer.py;h=b13b5ebeaa4a9a09934c6e02a0017c7cc0bfbdd5;hb=HEAD;hp=e2f66a50fa1142bad706ea8a1210dcf1cbf89b05;hpb=94bbe976e7a4efcc7091b9485d6c0bed892afe8c;p=soc.git diff --git a/src/soc/simple/issuer.py b/src/soc/simple/issuer.py index e2f66a50..15bd1760 100644 --- a/src/soc/simple/issuer.py +++ b/src/soc/simple/issuer.py @@ -21,33 +21,38 @@ from nmigen.cli import rtlil from nmigen.cli import main import sys +from nmutil.singlepipe import ControlBase +from soc.simple.core_data import FetchOutput, FetchInput + from nmigen.lib.coding import PriorityEncoder -from soc.decoder.power_decoder import create_pdecode -from soc.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder -from soc.decoder.decode2execute1 import IssuerDecode2ToOperand -from soc.decoder.decode2execute1 import Data -from soc.experiment.testmem import TestMemory # test only for instructions +from openpower.decoder.power_decoder import create_pdecode +from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder +from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand +from openpower.decoder.decode2execute1 import Data +from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR, + SVP64PredMode) +from openpower.state import CoreState +from openpower.consts import (CR, SVP64CROffs, MSR) +from soc.experiment.testmem import TestMemory # test only for instructions from soc.regfile.regfiles import StateRegs, FastRegs from soc.simple.core import NonProductionCore from soc.config.test.test_loadstore import TestMemPspec from soc.config.ifetch import ConfigFetchUnit -from soc.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR, - SVP64PredMode) from soc.debug.dmi import CoreDebug, DMIInterface from soc.debug.jtag import JTAG from soc.config.pinouts import get_pinspecs -from soc.config.state import CoreState from soc.interrupts.xics import XICS_ICP, XICS_ICS from soc.bus.simple_gpio import SimpleGPIO from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W from soc.clock.select import ClockSelect from soc.clock.dummypll import DummyPLL -from soc.sv.svstate import SVSTATERec - +from openpower.sv.svstate import SVSTATERec +from soc.experiment.icache import ICache from nmutil.util import rising_edge + def get_insn(f_instr_o, pc): if f_instr_o.width == 32: return f_instr_o @@ -56,23 +61,25 @@ def get_insn(f_instr_o, pc): return f_instr_o.word_select(pc[2], 32) # gets state input or reads from state regfile -def state_get(m, state_i, name, regfile, regnum): + + +def state_get(m, res, core_rst, state_i, name, regfile, regnum): comb = m.d.comb sync = m.d.sync - # read the PC - res = Signal(64, reset_less=True, name=name) + # read the {insert state variable here} res_ok_delay = Signal(name="%s_ok_delay" % name) - sync += res_ok_delay.eq(~state_i.ok) - with m.If(state_i.ok): - # incoming override (start from pc_i) - comb += res.eq(state_i.data) - with m.Else(): - # otherwise read StateRegs regfile for PC... - comb += regfile.ren.eq(1<> srcstep) + sync += self.dstmask.eq(new_dstmask >> dststep) m.next = "FETCH_PRED_DONE" with m.State("FETCH_PRED_DONE"): - comb += pred_mask_valid_o.eq(1) - with m.If(pred_mask_ready_i): + comb += pred_mask_o_valid.eq(1) + with m.If(pred_mask_i_ready): m.next = "FETCH_PRED_IDLE" - def issue_fsm(self, m, core, pc_changed, sv_changed, nia, + def issue_fsm(self, m, core, nia, dbg, core_rst, is_svp64_mode, - fetch_pc_ready_o, fetch_pc_valid_i, - fetch_insn_valid_o, fetch_insn_ready_i, - pred_insn_valid_i, pred_insn_ready_o, - pred_mask_valid_o, pred_mask_ready_i, - exec_insn_valid_i, exec_insn_ready_o, - exec_pc_valid_o, exec_pc_ready_i): + fetch_pc_o_ready, fetch_pc_i_valid, + fetch_insn_o_valid, fetch_insn_i_ready, + pred_insn_i_valid, pred_insn_o_ready, + pred_mask_o_valid, pred_mask_i_ready, + exec_insn_i_valid, exec_insn_o_ready, + exec_pc_o_valid, exec_pc_i_ready): """issue FSM decode / issue FSM. this interacts with the "fetch" FSM @@ -500,13 +1179,12 @@ class TestIssuerInternal(Elaboratable): sync = m.d.sync pdecode2 = self.pdecode2 cur_state = self.cur_state + new_svstate = self.new_svstate # temporaries - dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode + dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode # for updating svstate (things like srcstep etc.) - update_svstate = Signal() # set this (below) if updating - new_svstate = SVSTATERec("new_svstate") comb += new_svstate.eq(cur_state.svstate) # precalculate srcstep+1 and dststep+1 @@ -517,62 +1195,98 @@ class TestIssuerInternal(Elaboratable): comb += next_srcstep.eq(cur_state.svstate.srcstep+1) comb += next_dststep.eq(cur_state.svstate.dststep+1) + # note if an exception happened. in a pipelined or OoO design + # this needs to be accompanied by "shadowing" (or stalling) + exc_happened = self.core.o.exc_happened + # also note instruction fetch failed + if hasattr(core, "icache"): + fetch_failed = core.icache.i_out.fetch_failed + flush_needed = True + # set to fault in decoder + # update (highest priority) instruction fault + rising_fetch_failed = rising_edge(m, fetch_failed) + with m.If(rising_fetch_failed): + sync += pdecode2.instr_fault.eq(1) + else: + fetch_failed = Const(0, 1) + flush_needed = False + + sync += fetch_pc_i_valid.eq(0) + with m.FSM(name="issue_fsm"): + with m.State("PRE_IDLE"): + with m.If(~dbg.core_stop_o & ~core_rst): + m.next = "ISSUE_START" + # sync with the "fetch" phase which is reading the instruction # at this point, there is no instruction running, that # could inadvertently update the PC. with m.State("ISSUE_START"): + # reset instruction fault + sync += pdecode2.instr_fault.eq(0) # wait on "core stop" release, before next fetch # need to do this here, in case we are in a VL==0 loop with m.If(~dbg.core_stop_o & ~core_rst): - comb += fetch_pc_valid_i.eq(1) # tell fetch to start - with m.If(fetch_pc_ready_o): # fetch acknowledged us + sync += fetch_pc_i_valid.eq(1) # tell fetch to start + sync += cur_state.pc.eq(dbg.state.pc) + sync += cur_state.svstate.eq(dbg.state.svstate) + sync += cur_state.msr.eq(dbg.state.msr) + with m.If(fetch_pc_o_ready): # fetch acknowledged us m.next = "INSN_WAIT" with m.Else(): # tell core it's stopped, and acknowledge debug handshake - comb += core.core_stopped_i.eq(1) comb += dbg.core_stopped_i.eq(1) - # while stopped, allow updating the PC and SVSTATE - with m.If(self.pc_i.ok): - comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) - comb += self.state_w_pc.data_i.eq(self.pc_i.data) - sync += pc_changed.eq(1) + # while stopped, allow updating SVSTATE with m.If(self.svstate_i.ok): comb += new_svstate.eq(self.svstate_i.data) - comb += update_svstate.eq(1) - sync += sv_changed.eq(1) + comb += self.update_svstate.eq(1) + sync += self.sv_changed.eq(1) # wait for an instruction to arrive from Fetch with m.State("INSN_WAIT"): - comb += fetch_insn_ready_i.eq(1) - with m.If(fetch_insn_valid_o): - # loop into ISSUE_START if it's a SVP64 instruction - # and VL == 0. this because VL==0 is a for-loop - # from 0 to 0 i.e. always, always a NOP. - cur_vl = cur_state.svstate.vl - with m.If(is_svp64_mode & (cur_vl == 0)): - # update the PC before fetching the next instruction - # since we are in a VL==0 loop, no instruction was - # executed that we could be overwriting - comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) - comb += self.state_w_pc.data_i.eq(nia) - comb += self.insn_done.eq(1) - m.next = "ISSUE_START" - with m.Else(): - if self.svp64_en: - m.next = "PRED_START" # start fetching predicate - else: - m.next = "DECODE_SV" # skip predication + # when using "single-step" mode, checking dbg.stopping_o + # prevents progress. allow issue to proceed once started + stopping = Const(0) + #if self.allow_overlap: + # stopping = dbg.stopping_o + with m.If(stopping): + # stopping: jump back to idle + m.next = "ISSUE_START" + if flush_needed: + # request the icache to stop asserting "failed" + comb += core.icache.flush_in.eq(1) + # stop instruction fault + sync += pdecode2.instr_fault.eq(0) + with m.Else(): + comb += fetch_insn_i_ready.eq(1) + with m.If(fetch_insn_o_valid): + # loop into ISSUE_START if it's a SVP64 instruction + # and VL == 0. this because VL==0 is a for-loop + # from 0 to 0 i.e. always, always a NOP. + cur_vl = cur_state.svstate.vl + with m.If(is_svp64_mode & (cur_vl == 0)): + # update the PC before fetching the next instruction + # since we are in a VL==0 loop, no instruction was + # executed that we could be overwriting + comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) + comb += self.state_w_pc.i_data.eq(nia) + comb += self.insn_done.eq(1) + m.next = "ISSUE_START" + with m.Else(): + if self.svp64_en: + m.next = "PRED_START" # fetching predicate + else: + m.next = "DECODE_SV" # skip predication with m.State("PRED_START"): - comb += pred_insn_valid_i.eq(1) # tell fetch_pred to start - with m.If(pred_insn_ready_o): # fetch_pred acknowledged us + comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start + with m.If(pred_insn_o_ready): # fetch_pred acknowledged us m.next = "MASK_WAIT" with m.State("MASK_WAIT"): - comb += pred_mask_ready_i.eq(1) # ready to receive the masks - with m.If(pred_mask_valid_o): # predication masks are ready + comb += pred_mask_i_ready.eq(1) # ready to receive the masks + with m.If(pred_mask_o_valid): # predication masks are ready m.next = "PRED_SKIP" # skip zeros in predicate @@ -594,12 +1308,15 @@ class TestIssuerInternal(Elaboratable): # append guard bit, in case the mask is all zeros pri_enc_src = PriorityEncoder(65) m.submodules.pri_enc_src = pri_enc_src - comb += pri_enc_src.i.eq(Cat(self.srcmask, 1)) + comb += pri_enc_src.i.eq(Cat(self.srcmask, + Const(1, 1))) comb += src_delta.eq(pri_enc_src.o) # apply delta to srcstep comb += skip_srcstep.eq(cur_srcstep + src_delta) # shift-out all leading zeros from the mask # plus the leading "one" bit + # TODO count leading zeros and shift-out the zero + # bits, in the same step, in hardware sync += self.srcmask.eq(self.srcmask >> (src_delta+1)) # same as above, but for dststep @@ -608,7 +1325,8 @@ class TestIssuerInternal(Elaboratable): with m.If(~pred_dst_zero): pri_enc_dst = PriorityEncoder(65) m.submodules.pri_enc_dst = pri_enc_dst - comb += pri_enc_dst.i.eq(Cat(self.dstmask, 1)) + comb += pri_enc_dst.i.eq(Cat(self.dstmask, + Const(1, 1))) comb += dst_delta.eq(pri_enc_dst.o) comb += skip_dststep.eq(cur_dststep + dst_delta) sync += self.dstmask.eq(self.dstmask >> (dst_delta+1)) @@ -618,104 +1336,163 @@ class TestIssuerInternal(Elaboratable): (skip_dststep >= cur_vl)): # end of VL loop. Update PC and reset src/dst step comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) - comb += self.state_w_pc.data_i.eq(nia) + comb += self.state_w_pc.i_data.eq(nia) comb += new_svstate.srcstep.eq(0) comb += new_svstate.dststep.eq(0) - comb += update_svstate.eq(1) + comb += self.update_svstate.eq(1) + # synchronize with the simulator + comb += self.insn_done.eq(1) # go back to Issue m.next = "ISSUE_START" with m.Else(): # update new src/dst step comb += new_svstate.srcstep.eq(skip_srcstep) comb += new_svstate.dststep.eq(skip_dststep) - comb += update_svstate.eq(1) + comb += self.update_svstate.eq(1) # proceed to Decode m.next = "DECODE_SV" + # pass predicate mask bits through to satellite decoders + # TODO: for SIMD this will be *multiple* bits + sync += core.i.sv_pred_sm.eq(self.srcmask[0]) + sync += core.i.sv_pred_dm.eq(self.dstmask[0]) + # after src/dst step have been updated, we are ready # to decode the instruction with m.State("DECODE_SV"): # decode the instruction - sync += core.e.eq(pdecode2.e) - sync += core.state.eq(cur_state) - sync += core.raw_insn_i.eq(dec_opcode_i) - sync += core.bigendian_i.eq(self.core_bigendian_i) - # set RA_OR_ZERO detection in satellite decoders - sync += core.sv_a_nz.eq(pdecode2.sv_a_nz) + with m.If(~fetch_failed): + sync += pdecode2.instr_fault.eq(0) + sync += core.i.e.eq(pdecode2.e) + sync += core.i.state.eq(cur_state) + sync += core.i.raw_insn_i.eq(dec_opcode_i) + sync += core.i.bigendian_i.eq(self.core_bigendian_i) + if self.svp64_en: + sync += core.i.sv_rm.eq(pdecode2.sv_rm) + # set RA_OR_ZERO detection in satellite decoders + sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz) + # and svp64 detection + sync += core.i.is_svp64_mode.eq(is_svp64_mode) + # and svp64 bit-rev'd ldst mode + ldst_dec = pdecode2.use_svp64_ldst_dec + sync += core.i.use_svp64_ldst_dec.eq(ldst_dec) + # after decoding, reset any previous exception condition, + # allowing it to be set again during the next execution + sync += pdecode2.ldst_exc.eq(0) + m.next = "INSN_EXECUTE" # move to "execute" # handshake with execution FSM, move to "wait" once acknowledged with m.State("INSN_EXECUTE"): - comb += exec_insn_valid_i.eq(1) # trigger execute - with m.If(exec_insn_ready_o): # execute acknowledged us - m.next = "EXECUTE_WAIT" + # when using "single-step" mode, checking dbg.stopping_o + # prevents progress. allow execute to proceed once started + stopping = Const(0) + #if self.allow_overlap: + # stopping = dbg.stopping_o + with m.If(stopping): + # stopping: jump back to idle + m.next = "ISSUE_START" + if flush_needed: + # request the icache to stop asserting "failed" + comb += core.icache.flush_in.eq(1) + # stop instruction fault + sync += pdecode2.instr_fault.eq(0) + with m.Else(): + comb += exec_insn_i_valid.eq(1) # trigger execute + with m.If(exec_insn_o_ready): # execute acknowledged us + m.next = "EXECUTE_WAIT" with m.State("EXECUTE_WAIT"): - # wait on "core stop" release, at instruction end - # need to do this here, in case we are in a VL>1 loop - with m.If(~dbg.core_stop_o & ~core_rst): - comb += exec_pc_ready_i.eq(1) - with m.If(exec_pc_valid_o): + comb += exec_pc_i_ready.eq(1) + # see https://bugs.libre-soc.org/show_bug.cgi?id=636 + # the exception info needs to be blatted into + # pdecode.ldst_exc, and the instruction "re-run". + # when ldst_exc.happened is set, the PowerDecoder2 + # reacts very differently: it re-writes the instruction + # with a "trap" (calls PowerDecoder2.trap()) which + # will *overwrite* whatever was requested and jump the + # PC to the exception address, as well as alter MSR. + # nothing else needs to be done other than to note + # the change of PC and MSR (and, later, SVSTATE) + with m.If(exc_happened): + mmu = core.fus.get_exc("mmu0") + ldst = core.fus.get_exc("ldst0") + if mmu is not None: + with m.If(fetch_failed): + # instruction fetch: exception is from MMU + # reset instr_fault (highest priority) + sync += pdecode2.ldst_exc.eq(mmu) + sync += pdecode2.instr_fault.eq(0) + if flush_needed: + # request icache to stop asserting "failed" + comb += core.icache.flush_in.eq(1) + with m.If(~fetch_failed): + # otherwise assume it was a LDST exception + sync += pdecode2.ldst_exc.eq(ldst) + + with m.If(exec_pc_o_valid): + + # was this the last loop iteration? + is_last = Signal() + cur_vl = cur_state.svstate.vl + comb += is_last.eq(next_srcstep == cur_vl) - # was this the last loop iteration? - is_last = Signal() - cur_vl = cur_state.svstate.vl - comb += is_last.eq(next_srcstep == cur_vl) + with m.If(pdecode2.instr_fault): + # reset instruction fault, try again + sync += pdecode2.instr_fault.eq(0) + m.next = "ISSUE_START" - # if either PC or SVSTATE were changed by the previous - # instruction, go directly back to Fetch, without - # updating either PC or SVSTATE - with m.If(pc_changed | sv_changed): - m.next = "ISSUE_START" + # return directly to Decode if Execute generated an + # exception. + with m.Elif(pdecode2.ldst_exc.happened): + m.next = "DECODE_SV" - # also return to Fetch, when no output was a vector - # (regardless of SRCSTEP and VL), or when the last - # instruction was really the last one of the VL loop - with m.Elif((~pdecode2.loop_continue) | is_last): - # before going back to fetch, update the PC state - # register with the NIA. - # ok here we are not reading the branch unit. - # TODO: this just blithely overwrites whatever - # pipeline updated the PC - comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) - comb += self.state_w_pc.data_i.eq(nia) - # reset SRCSTEP before returning to Fetch + # if MSR, PC or SVSTATE were changed by the previous + # instruction, go directly back to Fetch, without + # updating either MSR PC or SVSTATE + with m.Elif(self.msr_changed | self.pc_changed | + self.sv_changed): + m.next = "ISSUE_START" + + # also return to Fetch, when no output was a vector + # (regardless of SRCSTEP and VL), or when the last + # instruction was really the last one of the VL loop + with m.Elif((~pdecode2.loop_continue) | is_last): + # before going back to fetch, update the PC state + # register with the NIA. + # ok here we are not reading the branch unit. + # TODO: this just blithely overwrites whatever + # pipeline updated the PC + comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) + comb += self.state_w_pc.i_data.eq(nia) + # reset SRCSTEP before returning to Fetch + if self.svp64_en: with m.If(pdecode2.loop_continue): comb += new_svstate.srcstep.eq(0) comb += new_svstate.dststep.eq(0) - comb += update_svstate.eq(1) - m.next = "ISSUE_START" + comb += self.update_svstate.eq(1) + else: + comb += new_svstate.srcstep.eq(0) + comb += new_svstate.dststep.eq(0) + comb += self.update_svstate.eq(1) + m.next = "ISSUE_START" - # returning to Execute? then, first update SRCSTEP - with m.Else(): - comb += new_svstate.srcstep.eq(next_srcstep) - comb += new_svstate.dststep.eq(next_dststep) - comb += update_svstate.eq(1) - # return to mask skip loop - m.next = "PRED_SKIP" + # returning to Execute? then, first update SRCSTEP + with m.Else(): + comb += new_svstate.srcstep.eq(next_srcstep) + comb += new_svstate.dststep.eq(next_dststep) + comb += self.update_svstate.eq(1) + # return to mask skip loop + m.next = "PRED_SKIP" - with m.Else(): - comb += core.core_stopped_i.eq(1) - comb += dbg.core_stopped_i.eq(1) - # while stopped, allow updating the PC and SVSTATE - with m.If(self.pc_i.ok): - comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) - comb += self.state_w_pc.data_i.eq(self.pc_i.data) - sync += pc_changed.eq(1) - with m.If(self.svstate_i.ok): - comb += new_svstate.eq(self.svstate_i.data) - comb += update_svstate.eq(1) - sync += sv_changed.eq(1) # check if svstate needs updating: if so, write it to State Regfile - with m.If(update_svstate): - comb += self.state_w_sv.wen.eq(1<