* https://libre-soc.org/3d_gpu/architecture/set_associative_cache.jpg
* https://bugs.libre-soc.org/show_bug.cgi?id=469
+* https://libre-soc.org/irclog-microwatt/%23microwatt.2021-12-07.log.html
+ (discussion about brams for ECP5)
"""
WBIOMasterOut, WBIOSlaveOut)
from soc.experiment.cache_ram import CacheRam
-#from soc.experiment.plru import PLRU
-from nmutil.plru import PLRU, PLRUs
+from soc.experiment.plru import PLRU, PLRUs
+#from nmutil.plru import PLRU, PLRUs
# for test
from soc.bus.sram import SRAM
# TODO: make these parameters of DCache at some point
LINE_SIZE = 64 # Line size in bytes
-NUM_LINES = 16 # Number of lines in a set
+NUM_LINES = 32 # Number of lines in a set
NUM_WAYS = 4 # Number of ways
TLB_SET_SIZE = 64 # L1 DTLB entries per set
TLB_NUM_WAYS = 2 # L1 DTLB number of sets
print ("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
print (" TAG_WIDTH", TAG_WIDTH)
print (" NUM_WAYS", NUM_WAYS)
+print (" NUM_LINES", NUM_LINES)
def CacheTagArray():
- tag_layout = [('valid', 1),
+ tag_layout = [('valid', NUM_WAYS),
('tag', TAG_RAM_WIDTH),
]
return Array(Record(tag_layout, name="tag%d" % x) for x in range(NUM_LINES))
with m.If(virt_mode):
for j in range(TLB_NUM_WAYS): # tlb_num_way_t
s_tag = Signal(TAG_BITS, name="s_tag%d" % j)
- s_hit = Signal()
- s_pte = Signal(TLB_PTE_BITS)
- s_ra = Signal(REAL_ADDR_BITS)
+ s_hit = Signal(name="s_hit%d" % j)
+ s_pte = Signal(TLB_PTE_BITS, name="s_pte%d" % j)
+ s_ra = Signal(REAL_ADDR_BITS, name="s_ra%d" % j)
+ # read the PTE, calc the Real Address, get tge tag
comb += s_pte.eq(read_tlb_pte(j, tlb_way.pte))
comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
comb += s_tag.eq(get_tag(s_ra))
-
+ # for each way check tge tag against the cache tag set
for i in range(NUM_WAYS): # way_t
is_tag_hit = Signal(name="is_tag_hit_%d_%d" % (j, i))
comb += is_tag_hit.eq(go & cache_i_validdx[i] &
comb += hit_way_set[j].eq(i)
comb += s_hit.eq(1)
comb += hit_set[j].eq(s_hit)
- with m.If(s_tag == reload_tag):
- comb += rel_matches[j].eq(1)
+ comb += rel_matches[j].eq(s_tag == reload_tag)
with m.If(tlb_hit.valid):
comb += is_hit.eq(hit_set[tlb_hit.way])
comb += hit_way.eq(hit_way_set[tlb_hit.way])
at the end of line (this requires dealing with requests coming in
while not idle...)
"""
- def __init__(self):
+ def __init__(self, pspec=None):
self.d_in = LoadStore1ToDCacheType("d_in")
self.d_out = DCacheToLoadStore1Type("d_out")
self.log_out = Signal(20)
+ # test if microwatt compatibility is to be enabled
+ self.microwatt_compat = (hasattr(pspec, "microwatt_compat") and
+ (pspec.microwatt_compat == True))
+
def stage_0(self, m, r0, r1, r0_full):
"""Latch the request in r0.req as long as we're not stalling
"""
comb += r.doall.eq(0)
comb += r.tlbld.eq(0)
comb += r.mmu_req.eq(0)
+
with m.If((~r1.full & ~d_in.hold) | ~r0_full):
sync += r0.eq(r)
sync += r0_full.eq(r.req.valid)
d_out = Signal(WB_DATA_BITS, name="dout_%d" % i) # cache_row_t
way = CacheRam(ROW_BITS, WB_DATA_BITS, ADD_BUF=True, ram_num=i)
- setattr(m.submodules, "cacheram_%d" % i, way)
+ m.submodules["cacheram_%d" % i] = way
comb += way.rd_en.eq(do_read)
comb += way.rd_addr.eq(rd_addr)
sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STCX_FAIL)):
- with m.If(~r0.mmu_req):
- sync += r1.ls_valid.eq(1)
- with m.Else():
+ with m.If(r0.mmu_req):
sync += r1.mmu_done.eq(1)
+ with m.Else():
+ sync += r1.ls_valid.eq(1)
with m.If(r1.write_tag):
# Store new tag in selected way
sync += r1.full.eq(0)
sync += r1.slow_valid.eq(1)
- with m.If(~req.mmu_req):
- sync += r1.ls_valid.eq(1)
- with m.Else():
+ with m.If(req.mmu_req):
sync += r1.mmu_done.eq(1)
+ with m.Else():
+ sync += r1.ls_valid.eq(1)
with m.If(req.op == Op.OP_STORE_HIT):
sync += r1.write_bram.eq(1)
(r1.store_row == get_row(req.real_addr))):
sync += r1.full.eq(0)
sync += r1.slow_valid.eq(1)
- with m.If(~r1.mmu_req):
- sync += r1.ls_valid.eq(1)
- with m.Else():
+ with m.If(r1.mmu_req):
sync += r1.mmu_done.eq(1)
+ with m.Else():
+ sync += r1.ls_valid.eq(1)
sync += r1.forward_sel.eq(~0) # all 1s
sync += r1.use_forward1.eq(1)
with m.Case(State.STORE_WAIT_ACK):
st_stbs_done = Signal()
- acks = Signal(3)
adjust_acks = Signal(3)
comb += st_stbs_done.eq(~r1.wb.stb)
- comb += acks.eq(r1.acks_pending)
with m.If(r1.inc_acks != r1.dec_acks):
with m.If(r1.inc_acks):
- comb += adjust_acks.eq(acks + 1)
+ comb += adjust_acks.eq(r1.acks_pending + 1)
with m.Else():
- comb += adjust_acks.eq(acks - 1)
+ comb += adjust_acks.eq(r1.acks_pending - 1)
with m.Else():
- comb += adjust_acks.eq(acks)
+ comb += adjust_acks.eq(r1.acks_pending)
sync += r1.acks_pending.eq(adjust_acks)
comb += st_stbs_done.eq(1)
# Got ack ? See if complete.
+ sync += Display("got ack %d %d stbs %d adjust_acks %d",
+ bus.ack, bus.ack, st_stbs_done, adjust_acks)
with m.If(bus.ack):
with m.If(st_stbs_done & (adjust_acks == 1)):
sync += r1.state.eq(State.IDLE)
sync += r1.full.eq(0)
sync += r1.slow_valid.eq(1)
- with m.If(~r1.mmu_req):
- sync += r1.ls_valid.eq(1)
- with m.Else():
+ with m.If(r1.mmu_req):
sync += r1.mmu_done.eq(1)
+ with m.Else():
+ sync += r1.ls_valid.eq(1)
sync += r1.forward_sel.eq(~0) # all 1s
sync += r1.use_forward1.eq(1)
# deal with litex not doing wishbone pipeline mode
# XXX in wrong way. FIFOs are needed in the SRAM test
# so that stb/ack match up. same thing done in icache.py
- comb += self.bus.stall.eq(self.bus.cyc & ~self.bus.ack)
+ if not self.microwatt_compat:
+ comb += self.bus.stall.eq(self.bus.cyc & ~self.bus.ack)
# Wire up wishbone request latch out of stage 1
comb += self.bus.we.eq(r1.wb.we)