X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fsoc%2Fsimple%2Fissuer.py;h=ecc78d97bcb56523da17610765f794501f77535f;hb=133821d5dace2a6184b38bb8dac0fc095fef303f;hp=f6daa834adeb23f2e8aa922baec5e3d1f5d0c082;hpb=809f17813341f4bbf71fb9bab5f01746ef9eb857;p=soc.git diff --git a/src/soc/simple/issuer.py b/src/soc/simple/issuer.py index f6daa834..ecc78d97 100644 --- a/src/soc/simple/issuer.py +++ b/src/soc/simple/issuer.py @@ -60,6 +60,9 @@ class TestIssuerInternal(Elaboratable): """ def __init__(self, pspec): + # test is SVP64 is to be enabled + self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True) + # JTAG interface. add this right at the start because if it's # added it *modifies* the pspec, by adding enable/disable signals # for parts of the rest of the core @@ -85,7 +88,8 @@ class TestIssuerInternal(Elaboratable): if self.sram4x4k: self.sram4k = [] for i in range(4): - self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i)) + self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i, + features={'err'})) # add interrupt controller? self.xics = hasattr(pspec, "xics") and pspec.xics == True @@ -107,8 +111,10 @@ class TestIssuerInternal(Elaboratable): pdecode = create_pdecode() self.cur_state = CoreState("cur") # current state (MSR/PC/EINT/SVSTATE) self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state, - opkls=IssuerDecode2ToOperand) - self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix + opkls=IssuerDecode2ToOperand, + svp64_en=self.svp64_en) + if self.svp64_en: + self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix # Test Instruction memory self.imem = ConfigFetchUnit(pspec).fu @@ -122,6 +128,7 @@ class TestIssuerInternal(Elaboratable): # instruction go/monitor self.pc_o = Signal(64, reset_less=True) self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me" + self.svstate_i = Data(32, "svstate_i") # ditto self.core_bigendian_i = Signal() self.busy_o = Signal(reset_less=True) self.memerr_o = Signal(reset_less=True) @@ -146,11 +153,17 @@ class TestIssuerInternal(Elaboratable): self.state_nia = self.core.regs.rf['state'].w_ports['nia'] self.state_nia.wen.name = 'state_nia_wen' - def fetch_fsm(self, m, core, dbg, pc, nia, - core_rst, cur_state, + # pulse to synchronize the simulator at instruction end + self.insn_done = Signal() + + if self.svp64_en: + # store copies of predicate masks + self.srcmask = Signal(64) + self.dstmask = Signal(64) + + def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode, fetch_pc_ready_o, fetch_pc_valid_i, - exec_insn_valid_o, exec_insn_ready_i, - fetch_insn_o): + fetch_insn_valid_o, fetch_insn_ready_i): """fetch FSM this FSM performs fetch of raw instruction data, partial-decodes it 32-bit at a time to detect SVP64 prefixes, and will optionally @@ -159,37 +172,32 @@ class TestIssuerInternal(Elaboratable): comb = m.d.comb sync = m.d.sync pdecode2 = self.pdecode2 - svp64 = self.svp64 + cur_state = self.cur_state + dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode msr_read = Signal(reset=1) - sv_read = Signal(reset=1) with m.FSM(name='fetch_fsm'): # waiting (zzz) with m.State("IDLE"): - with m.If(~dbg.core_stop_o & ~core_rst): - comb += fetch_pc_ready_o.eq(1) - with m.If(fetch_pc_valid_i): - # instruction allowed to go: start by reading the PC - # capture the PC and also drop it into Insn Memory - # we have joined a pair of combinatorial memory - # lookups together. this is Generally Bad. - comb += self.imem.a_pc_i.eq(pc) - comb += self.imem.a_valid_i.eq(1) - comb += self.imem.f_valid_i.eq(1) - sync += cur_state.pc.eq(pc) - - # initiate read of MSR/SVSTATE. arrives one clock later - comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR) - comb += self.state_r_sv.ren.eq(1 << StateRegs.SVSTATE) - sync += msr_read.eq(0) - sync += sv_read.eq(0) - - m.next = "INSN_READ" # move to "wait for bus" phase - with m.Else(): - comb += core.core_stopped_i.eq(1) - comb += dbg.core_stopped_i.eq(1) + comb += fetch_pc_ready_o.eq(1) + with m.If(fetch_pc_valid_i): + # instruction allowed to go: start by reading the PC + # capture the PC and also drop it into Insn Memory + # we have joined a pair of combinatorial memory + # lookups together. this is Generally Bad. + comb += self.imem.a_pc_i.eq(pc) + comb += self.imem.a_valid_i.eq(1) + comb += self.imem.f_valid_i.eq(1) + sync += cur_state.pc.eq(pc) + sync += cur_state.svstate.eq(svstate) # and svstate + + # initiate read of MSR. arrives one clock later + comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR) + sync += msr_read.eq(0) + + m.next = "INSN_READ" # move to "wait for bus" phase # dummy pause to find out why simulation is not keeping up with m.State("INSN_READ"): @@ -197,9 +205,6 @@ class TestIssuerInternal(Elaboratable): with m.If(~msr_read): sync += msr_read.eq(1) # yeah don't read it again sync += cur_state.msr.eq(self.state_r_msr.data_o) - with m.If(~sv_read): - sync += sv_read.eq(1) # yeah don't read it again - sync += cur_state.svstate.eq(self.state_r_sv.data_o) with m.If(self.imem.f_busy_o): # zzz... # busy: stay in wait-read comb += self.imem.a_valid_i.eq(1) @@ -207,25 +212,35 @@ class TestIssuerInternal(Elaboratable): with m.Else(): # not busy: instruction fetched insn = get_insn(self.imem.f_instr_o, cur_state.pc) - # decode the SVP64 prefix, if any - comb += svp64.raw_opcode_in.eq(insn) - comb += svp64.bigendian.eq(self.core_bigendian_i) - # pass the decoded prefix (if any) to PowerDecoder2 - sync += pdecode2.sv_rm.eq(svp64.svp64_rm) - # calculate the address of the following instruction - insn_size = Mux(svp64.is_svp64_mode, 8, 4) - sync += nia.eq(cur_state.pc + insn_size) - with m.If(~svp64.is_svp64_mode): - # with no prefix, store the instruction - # and hand it directly to the next FSM - sync += fetch_insn_o.eq(insn) + if self.svp64_en: + svp64 = self.svp64 + # decode the SVP64 prefix, if any + comb += svp64.raw_opcode_in.eq(insn) + comb += svp64.bigendian.eq(self.core_bigendian_i) + # pass the decoded prefix (if any) to PowerDecoder2 + sync += pdecode2.sv_rm.eq(svp64.svp64_rm) + # remember whether this is a prefixed instruction, so + # the FSM can readily loop when VL==0 + sync += is_svp64_mode.eq(svp64.is_svp64_mode) + # calculate the address of the following instruction + insn_size = Mux(svp64.is_svp64_mode, 8, 4) + sync += nia.eq(cur_state.pc + insn_size) + with m.If(~svp64.is_svp64_mode): + # with no prefix, store the instruction + # and hand it directly to the next FSM + sync += dec_opcode_i.eq(insn) + m.next = "INSN_READY" + with m.Else(): + # fetch the rest of the instruction from memory + comb += self.imem.a_pc_i.eq(cur_state.pc + 4) + comb += self.imem.a_valid_i.eq(1) + comb += self.imem.f_valid_i.eq(1) + m.next = "INSN_READ2" + else: + # not SVP64 - 32-bit only + sync += nia.eq(cur_state.pc + 4) + sync += dec_opcode_i.eq(insn) m.next = "INSN_READY" - with m.Else(): - # fetch the rest of the instruction from memory - comb += self.imem.a_pc_i.eq(cur_state.pc + 4) - comb += self.imem.a_valid_i.eq(1) - comb += self.imem.f_valid_i.eq(1) - m.next = "INSN_READ2" with m.State("INSN_READ2"): with m.If(self.imem.f_busy_o): # zzz... @@ -235,82 +250,253 @@ class TestIssuerInternal(Elaboratable): with m.Else(): # not busy: instruction fetched insn = get_insn(self.imem.f_instr_o, cur_state.pc+4) - sync += fetch_insn_o.eq(insn) + sync += dec_opcode_i.eq(insn) m.next = "INSN_READY" + # TODO: probably can start looking at pdecode2.rm_dec + # here (or maybe even in INSN_READ state, if svp64_mode + # detected, in order to trigger - and wait for - the + # predicate reading. with m.State("INSN_READY"): # hand over the instruction, to be decoded - comb += exec_insn_valid_o.eq(1) - with m.If(exec_insn_ready_i): + comb += fetch_insn_valid_o.eq(1) + with m.If(fetch_insn_ready_i): m.next = "IDLE" - def execute_fsm(self, m, core, insn_done, pc_changed, - cur_state, fetch_insn_o, - fetch_pc_ready_o, fetch_pc_valid_i, - exec_insn_valid_o, exec_insn_ready_i): - """execute FSM + def fetch_predicate_fsm(self, m, core, TODO): + """fetch_predicate_fsm - obtains (constructs in the case of CR) + src/dest predicate masks - decode / issue / execute FSM. this interacts with the "fetch" FSM - through fetch_pc_ready/valid (incoming) and exec_insn_ready/valid - (outgoing). SVP64 RM prefixes have already been set up by the + https://bugs.libre-soc.org/show_bug.cgi?id=617 + the predicates can be read here, by using IntRegs r_ports['pred'] + or CRRegs r_ports['pred']. in the case of CRs it will have to + be done through multiple reads, extracting one relevant at a time. + later, a faster way would be to use the 32-bit-wide CR port but + this is more complex decoding, here. + """ + comb = m.d.comb + sync = m.d.sync + pdecode2 = self.pdecode2 + rm_dec = pdecode2.rm_dec # SVP64RMModeDecode + predmode = rm_dec.predmode + srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred + + def issue_fsm(self, m, core, pc_changed, sv_changed, nia, + dbg, core_rst, is_svp64_mode, + fetch_pc_ready_o, fetch_pc_valid_i, + fetch_insn_valid_o, fetch_insn_ready_i, + exec_insn_valid_i, exec_insn_ready_o, + exec_pc_valid_o, exec_pc_ready_i): + """issue FSM + + decode / issue FSM. this interacts with the "fetch" FSM + through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid + (outgoing). also interacts with the "execute" FSM + through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid + (incoming). + SVP64 RM prefixes have already been set up by the "fetch" phase, so execute is fairly straightforward. """ comb = m.d.comb sync = m.d.sync pdecode2 = self.pdecode2 - svp64 = self.svp64 + cur_state = self.cur_state # temporaries dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode - core_busy_o = core.busy_o # core is busy - core_ivalid_i = core.ivalid_i # instruction is valid - core_issue_i = core.issue_i # instruction is issued - insn_type = core.e.do.insn_type # instruction MicroOp type - with m.FSM(): + # for updating svstate (things like srcstep etc.) + update_svstate = Signal() # set this (below) if updating + new_svstate = SVSTATERec("new_svstate") + comb += new_svstate.eq(cur_state.svstate) + + with m.FSM(name="issue_fsm"): - # go fetch the instruction at the current PC + # sync with the "fetch" phase which is reading the instruction # at this point, there is no instruction running, that # could inadvertently update the PC. - with m.State("INSN_FETCH"): - comb += fetch_pc_valid_i.eq(1) - with m.If(fetch_pc_ready_o): - m.next = "INSN_WAIT" + with m.State("ISSUE_START"): + # wait on "core stop" release, before next fetch + # need to do this here, in case we are in a VL==0 loop + with m.If(~dbg.core_stop_o & ~core_rst): + comb += fetch_pc_valid_i.eq(1) # tell fetch to start + with m.If(fetch_pc_ready_o): # fetch acknowledged us + m.next = "INSN_WAIT" + with m.Else(): + # tell core it's stopped, and acknowledge debug handshake + comb += core.core_stopped_i.eq(1) + comb += dbg.core_stopped_i.eq(1) + # while stopped, allow updating the PC and SVSTATE + with m.If(self.pc_i.ok): + comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) + comb += self.state_w_pc.data_i.eq(self.pc_i.data) + sync += pc_changed.eq(1) + with m.If(self.svstate_i.ok): + comb += new_svstate.eq(self.svstate_i.data) + comb += update_svstate.eq(1) + sync += sv_changed.eq(1) # decode the instruction when it arrives with m.State("INSN_WAIT"): - comb += exec_insn_ready_i.eq(1) - with m.If(exec_insn_valid_o): + comb += fetch_insn_ready_i.eq(1) + with m.If(fetch_insn_valid_o): # decode the instruction - comb += dec_opcode_i.eq(fetch_insn_o) # actual opcode sync += core.e.eq(pdecode2.e) sync += core.state.eq(cur_state) sync += core.raw_insn_i.eq(dec_opcode_i) sync += core.bigendian_i.eq(self.core_bigendian_i) - # also drop PC and MSR into decode "state" - m.next = "INSN_START" # move to "start" + # set RA_OR_ZERO detection in satellite decoders + sync += core.sv_a_nz.eq(pdecode2.sv_a_nz) + # loop into ISSUE_START if it's a SVP64 instruction + # and VL == 0. this because VL==0 is a for-loop + # from 0 to 0 i.e. always, always a NOP. + cur_vl = cur_state.svstate.vl + with m.If(is_svp64_mode & (cur_vl == 0)): + # update the PC before fetching the next instruction + # since we are in a VL==0 loop, no instruction was + # executed that we could be overwriting + comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) + comb += self.state_w_pc.data_i.eq(nia) + comb += self.insn_done.eq(1) + m.next = "ISSUE_START" + with m.Else(): + m.next = "INSN_EXECUTE" # move to "execute" + + # handshake with execution FSM, move to "wait" once acknowledged + with m.State("INSN_EXECUTE"): + comb += exec_insn_valid_i.eq(1) # trigger execute + with m.If(exec_insn_ready_o): # execute acknowledged us + m.next = "EXECUTE_WAIT" + + with m.State("EXECUTE_WAIT"): + # wait on "core stop" release, at instruction end + # need to do this here, in case we are in a VL>1 loop + with m.If(~dbg.core_stop_o & ~core_rst): + comb += exec_pc_ready_i.eq(1) + with m.If(exec_pc_valid_o): + # precalculate srcstep+1 and dststep+1 + # TODO these need to "skip" over predicated-out src/dst + # https://bugs.libre-soc.org/show_bug.cgi?id=617#c3 + # but still without exceeding VL in either case + next_srcstep = Signal.like(cur_state.svstate.srcstep) + next_dststep = Signal.like(cur_state.svstate.dststep) + comb += next_srcstep.eq(cur_state.svstate.srcstep+1) + comb += next_dststep.eq(cur_state.svstate.dststep+1) + + # was this the last loop iteration? + is_last = Signal() + cur_vl = cur_state.svstate.vl + comb += is_last.eq(next_srcstep == cur_vl) + + # if either PC or SVSTATE were changed by the previous + # instruction, go directly back to Fetch, without + # updating either PC or SVSTATE + with m.If(pc_changed | sv_changed): + m.next = "ISSUE_START" + + # also return to Fetch, when no output was a vector + # (regardless of SRCSTEP and VL), or when the last + # instruction was really the last one of the VL loop + with m.Elif((~pdecode2.loop_continue) | is_last): + # before going back to fetch, update the PC state + # register with the NIA. + # ok here we are not reading the branch unit. + # TODO: this just blithely overwrites whatever + # pipeline updated the PC + comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) + comb += self.state_w_pc.data_i.eq(nia) + # reset SRCSTEP before returning to Fetch + with m.If(pdecode2.loop_continue): + comb += new_svstate.srcstep.eq(0) + comb += new_svstate.dststep.eq(0) + comb += update_svstate.eq(1) + m.next = "ISSUE_START" + + # returning to Execute? then, first update SRCSTEP + with m.Else(): + comb += new_svstate.srcstep.eq(next_srcstep) + comb += new_svstate.dststep.eq(next_dststep) + comb += update_svstate.eq(1) + m.next = "DECODE_SV" + + with m.Else(): + comb += core.core_stopped_i.eq(1) + comb += dbg.core_stopped_i.eq(1) + # while stopped, allow updating the PC and SVSTATE + with m.If(self.pc_i.ok): + comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) + comb += self.state_w_pc.data_i.eq(self.pc_i.data) + sync += pc_changed.eq(1) + with m.If(self.svstate_i.ok): + comb += new_svstate.eq(self.svstate_i.data) + comb += update_svstate.eq(1) + sync += sv_changed.eq(1) + + # need to decode the instruction again, after updating SRCSTEP + # in the previous state. + # mostly a copy of INSN_WAIT, but without the actual wait + with m.State("DECODE_SV"): + # decode the instruction + sync += core.e.eq(pdecode2.e) + sync += core.state.eq(cur_state) + sync += core.bigendian_i.eq(self.core_bigendian_i) + sync += core.sv_a_nz.eq(pdecode2.sv_a_nz) + m.next = "INSN_EXECUTE" # move to "execute" + + # check if svstate needs updating: if so, write it to State Regfile + with m.If(update_svstate): + comb += self.state_w_sv.wen.eq(1<