From 85bb4722393d54c2e30239757a5229441fc89616 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 28 Jan 2022 03:19:40 +0000 Subject: [PATCH] sort out misaligned store in LoadStore1 --- src/soc/experiment/pimem.py | 2 +- src/soc/experiment/test/test_loadstore1.py | 66 +++++++++++++--------- src/soc/fu/ldst/loadstore.py | 16 +++--- 3 files changed, 50 insertions(+), 34 deletions(-) diff --git a/src/soc/experiment/pimem.py b/src/soc/experiment/pimem.py index d7e97eba..498ecb8f 100644 --- a/src/soc/experiment/pimem.py +++ b/src/soc/experiment/pimem.py @@ -296,7 +296,7 @@ class PortInterfaceBase(Elaboratable): with m.If(st_active.q & pi.st.ok): # shift data up before storing. lenexp *bit* version of mask is # passed straight through as byte-level "write-enable" lines. - stdata = Signal(self.regwid, reset_less=True) + stdata = Signal(self.regwid*2, reset_less=True) comb += stdata.eq(pi.st.data << (lenexp.addr_i*8)) # TODO: replace with link to LoadStoreUnitInterface.x_store_data # and also handle the ready/stall/busy protocol diff --git a/src/soc/experiment/test/test_loadstore1.py b/src/soc/experiment/test/test_loadstore1.py index a21a4e08..e79e0c12 100644 --- a/src/soc/experiment/test/test_loadstore1.py +++ b/src/soc/experiment/test/test_loadstore1.py @@ -20,7 +20,7 @@ from soc.experiment.test import pagetables from nmigen.compat.sim import run_simulation from random import random -from openpower.test.wb_get import wb_get +from openpower.test.wb_get import wb_get_classic from openpower.test import wb_get as wbget from openpower.exceptions import LDSTExceptionTuple @@ -540,15 +540,25 @@ def _test_loadstore1_microwatt_mmu_bin_test5(dut, mem): wbget.stop = True -def test_pi_ld_misalign(pi,addr,data_len,msr): +def test_pi_ld_misalign(pi, addr, data_len, msr): for i in range(0,data_len): ld_data, exctype, exc = yield from pi_ld(pi, addr+i, data_len, msr=msr) yield - if i == 0: - assert exc is None # use "is None" not "== None" - print("MISALIGN: test_pi_ld_misalign returned",hex(ld_data)) - else: - assert exc.alignment == 1 + assert exc is None # use "is None" not "== None" + print("MISALIGN: test_pi_ld_misalign returned",hex(ld_data)) + + +def test_pi_st_ld_misalign(pi, addr, data_len, msr): + data = 0x0102030405060708 + for i in range(0, data_len): + exctype, exc = yield from pi_st(pi, addr+i, data, data_len, msr=msr) + print (exctype, exc) + assert exc is None # use "is None" not "== None" + ld_data, exctype, exc = yield from pi_ld(pi, addr+i, data_len, msr=msr) + yield + assert exc is None # use "is None" not "== None" + print("MISALIGN: test_pi_ld_misalign returned",hex(ld_data)) + assert ld_data == data def _test_loadstore1_misalign(dut, mem): @@ -566,6 +576,8 @@ def _test_loadstore1_misalign(dut, mem): yield from test_pi_ld_misalign(pi,0,8,msr) + yield from test_pi_st_ld_misalign(pi,0,8,msr) + wbget.stop = True @@ -855,10 +867,10 @@ def test_loadstore1_ifetch_unit_iface(): sim.add_clock(1e-6) sim.add_sync_process(wrap(_test_loadstore1_ifetch_iface(m, mem))) - # add two wb_get processes onto the *same* memory dictionary. + # add two wb_get_classic processes onto the *same* memory dictionary. # this shouuuld work.... cross-fingers... - sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem))) - sim.add_sync_process(wrap(wb_get(icache.ibus, mem))) # ibus not bus + sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem))) + sim.add_sync_process(wrap(wb_get_classic(icache.ibus, mem))) # ibus not bus with sim.write_vcd('test_loadstore1_ifetch_iface.vcd', traces=[m.debug_status]): # include extra debug sim.run() @@ -876,10 +888,10 @@ def test_loadstore1_ifetch(): icache = m.submodules.ldst.icache sim.add_sync_process(wrap(_test_loadstore1_ifetch(m, mem))) - # add two wb_get processes onto the *same* memory dictionary. + # add two wb_get_classic processes onto the *same* memory dictionary. # this shouuuld work.... cross-fingers... - sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem))) - sim.add_sync_process(wrap(wb_get(icache.bus, mem))) + sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem))) + sim.add_sync_process(wrap(wb_get_classic(icache.bus, mem))) with sim.write_vcd('test_loadstore1_ifetch.vcd', traces=[m.debug_status]): # include extra debug sim.run() @@ -896,7 +908,7 @@ def test_loadstore1(): sim.add_clock(1e-6) sim.add_sync_process(wrap(_test_loadstore1(m, mem))) - sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem))) + sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem))) with sim.write_vcd('test_loadstore1.vcd'): sim.run() @@ -912,7 +924,7 @@ def test_loadstore1_microwatt_mmu_bin_test2(): sim.add_clock(1e-6) sim.add_sync_process(wrap(_test_loadstore1_microwatt_mmu_bin_test2(m, mem))) - sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem))) + sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem))) with sim.write_vcd('test_microwatt_mmu_test2.vcd'): sim.run() @@ -928,7 +940,7 @@ def test_loadstore1_microwatt_mmu_bin_test5(): sim.add_clock(1e-6) sim.add_sync_process(wrap(_test_loadstore1_microwatt_mmu_bin_test5(m, mem))) - sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem))) + sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem))) with sim.write_vcd('test_microwatt_mmu_test5.vcd'): sim.run() @@ -945,11 +957,13 @@ def test_loadstore1_misalign(): ###########1122334455667788 mem[0] = 0x0102030405060708 + mem[8] = 0xffffffffffffffff sim.add_sync_process(wrap(_test_loadstore1_misalign(m, mem))) - sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem))) + sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem))) with sim.write_vcd('test_loadstore1_misalign.vcd'): sim.run() + print ("mem", mem) def test_loadstore1_invalid(): @@ -963,7 +977,7 @@ def test_loadstore1_invalid(): sim.add_clock(1e-6) sim.add_sync_process(wrap(_test_loadstore1_invalid(m, mem))) - sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem))) + sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem))) with sim.write_vcd('test_loadstore1_invalid.vcd'): sim.run() @@ -981,10 +995,10 @@ def test_loadstore1_ifetch_invalid(): icache = m.submodules.ldst.icache sim.add_sync_process(wrap(_test_loadstore1_ifetch_invalid(m, mem))) - # add two wb_get processes onto the *same* memory dictionary. + # add two wb_get_classic processes onto the *same* memory dictionary. # this shouuuld work.... cross-fingers... - sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem))) - sim.add_sync_process(wrap(wb_get(icache.bus, mem))) + sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem))) + sim.add_sync_process(wrap(wb_get_classic(icache.bus, mem))) with sim.write_vcd('test_loadstore1_ifetch_invalid.vcd', traces=[m.debug_status]): # include extra debug sim.run() @@ -1009,10 +1023,10 @@ def test_loadstore1_ifetch_multi(): sim.add_clock(1e-6) sim.add_sync_process(wrap(_test_loadstore1_ifetch_multi(m, mem))) - # add two wb_get processes onto the *same* memory dictionary. + # add two wb_get_classic processes onto the *same* memory dictionary. # this shouuuld work.... cross-fingers... - sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem))) - sim.add_sync_process(wrap(wb_get(icache.ibus, mem))) # ibus not bus + sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem))) + sim.add_sync_process(wrap(wb_get_classic(icache.ibus, mem))) # ibus not bus with sim.write_vcd('test_loadstore1_ifetch_multi.vcd', traces=[m.debug_status]): # include extra debug sim.run() @@ -1020,10 +1034,10 @@ def test_loadstore1_ifetch_multi(): if __name__ == '__main__': #test_loadstore1() #test_loadstore1_microwatt_mmu_bin_test2() - test_loadstore1_microwatt_mmu_bin_test5() + #test_loadstore1_microwatt_mmu_bin_test5() #test_loadstore1_invalid() #test_loadstore1_ifetch() #FIXME #test_loadstore1_ifetch_invalid() #test_loadstore1_ifetch_unit_iface() # guess: should be working #test_loadstore1_ifetch_multi() - #test_loadstore1_misalign() + test_loadstore1_misalign() diff --git a/src/soc/fu/ldst/loadstore.py b/src/soc/fu/ldst/loadstore.py index 500024aa..6af17736 100644 --- a/src/soc/fu/ldst/loadstore.py +++ b/src/soc/fu/ldst/loadstore.py @@ -112,8 +112,9 @@ class LoadStore1(PortInterfaceBase): self.dcbz = Signal() self.raddr = Signal(64) self.maddr = Signal(64) - self.store_data = Signal(128) # 128-bit to cope with - self.load_data = Signal(128) # misalignment + self.store_data = Signal(64) # first half (aligned) + self.store_data2 = Signal(64) # second half (misaligned) + self.load_data = Signal(128) # 128 to cope with misalignment self.load_data_delay = Signal(128) # perform 2 LD/STs self.byte_sel = Signal(16) # also for misaligned, 16-bit self.alignstate = Signal(Misalign) # progress of alignment request @@ -208,6 +209,7 @@ class LoadStore1(PortInterfaceBase): # put data into comb which is picked up in main elaborate() m.d.comb += self.d_w_valid.eq(1) m.d.comb += self.store_data.eq(data) + m.d.sync += self.store_data2.eq(data[64:128]) st_ok = self.done # TODO indicates write data is valid m.d.comb += self.pi.store_done.data.eq(self.d_in.store_done) m.d.comb += self.pi.store_done.ok.eq(1) @@ -258,6 +260,7 @@ class LoadStore1(PortInterfaceBase): # fsm skeleton with m.Switch(self.state): with m.Case(State.IDLE): + sync += self.load_data_delay.eq(0) # clear out with m.If((self.d_validblip | self.instr_fault) & ~exc.happened): comb += self.busy.eq(1) @@ -321,6 +324,8 @@ class LoadStore1(PortInterfaceBase): with m.If(ldst_r.load): m.d.comb += self.load_data[0:63].eq(d_in.data) sync += self.load_data_delay[0:64].eq(d_in.data) + with m.Else(): + m.d.sync += d_out.data.eq(self.store_data2) # mmm kinda cheating, make a 2nd blip. # use an aligned version of the address addr_aligned, z3 = Signal(64), Const(0, 3) @@ -435,12 +440,9 @@ class LoadStore1(PortInterfaceBase): if hasattr(dbus, "stall"): comb += dcache.bus.stall.eq(dbus.stall) - # update out d data when flag set + # update out d data when flag set, for first half (second done in FSM) with m.If(self.d_w_valid): - with m.If(ldst_r.alignstate == Misalign.WAITSECOND): - m.d.sync += d_out.data.eq(self.store_data[64:128]) - with m.Else(): - m.d.sync += d_out.data.eq(self.store_data[0:64]) + m.d.sync += d_out.data.eq(self.store_data) #with m.Else(): # m.d.sync += d_out.data.eq(0) # unit test passes with that change -- 2.30.2