from enum import Enum, unique
from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
from enum import Enum, unique
from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
from random import randint
from nmigen.cli import main
from nmutil.iocontrol import RecordObject
from random import randint
from nmigen.cli import main
from nmutil.iocontrol import RecordObject
from nmigen.utils import log2_int
from soc.experiment.mem_types import (LoadStore1ToDCacheType,
DCacheToLoadStore1Type,
from nmigen.utils import log2_int
from soc.experiment.mem_types import (LoadStore1ToDCacheType,
DCacheToLoadStore1Type,
from nmigen.back.pysim import Simulator, Delay, Settle
else:
from nmigen.sim.cxxsim import Simulator, Delay, Settle
from nmigen.back.pysim import Simulator, Delay, Settle
else:
from nmigen.sim.cxxsim import Simulator, Delay, Settle
print ("ROW_SIZE", ROW_SIZE)
print ("ROW_PER_LINE", ROW_PER_LINE)
print ("BRAM_ROWS", BRAM_ROWS)
print ("ROW_SIZE", ROW_SIZE)
print ("ROW_PER_LINE", ROW_PER_LINE)
print ("BRAM_ROWS", BRAM_ROWS)
self.write_bram = Signal()
self.write_tag = Signal()
self.slow_valid = Signal()
self.write_bram = Signal()
self.write_tag = Signal()
self.slow_valid = Signal()
self.wb = WBMasterOut("wb")
self.reload_tag = Signal(TAG_BITS)
self.store_way = Signal(WAY_BITS)
self.wb = WBMasterOut("wb")
self.reload_tag = Signal(TAG_BITS)
self.store_way = Signal(WAY_BITS)
sync += cache_tag_set.eq(cache_tags[index])
def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
sync += cache_tag_set.eq(cache_tags[index])
def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
- r0_valid, r1, cache_valid_bits, replace_way,
+ r0_valid, r1, cache_valids, replace_way,
use_forward1_next, use_forward2_next,
req_hit_way, plru_victim, rc_ok, perm_attr,
valid_ra, perm_ok, access_ok, req_op, req_go,
use_forward1_next, use_forward2_next,
req_hit_way, plru_victim, rc_ok, perm_attr,
valid_ra, perm_ok, access_ok, req_op, req_go,
# Extract line, row and tag from request
comb += req_index.eq(get_index(r0.req.addr))
comb += req_row.eq(get_row(r0.req.addr))
comb += req_tag.eq(get_tag(ra))
# Extract line, row and tag from request
comb += req_index.eq(get_index(r0.req.addr))
comb += req_row.eq(get_row(r0.req.addr))
comb += req_tag.eq(get_tag(ra))
- comb += Display("dcache_req addr:%x ra: %x idx: %x tag: %x row: %x",
- r0.req.addr, ra, req_index, req_tag, req_row)
+ if False: # display on comb is a bit... busy.
+ comb += Display("dcache_req addr:%x ra: %x idx: %x tag: %x row: %x",
+ r0.req.addr, ra, req_index, req_tag, req_row)
m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
tlb_valid_way, tlb_hit_way,
m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
tlb_valid_way, tlb_hit_way,
# For a store, consider this a hit even if the row isn't
# valid since it will be by the time we perform the store.
# For a load, check the appropriate row valid bit.
# For a store, consider this a hit even if the row isn't
# valid since it will be by the time we perform the store.
# For a load, check the appropriate row valid bit.
# All wishbone requests generation is done here.
# This machine operates at stage 1.
def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
# All wishbone requests generation is done here.
# This machine operates at stage 1.
def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
- cache_valid_bits, r0, replace_way,
+ cache_valids, r0, replace_way,
req_hit_way, req_same_tag,
r0_valid, req_op, cache_tags, req_go, ra):
req_hit_way, req_same_tag,
r0_valid, req_op, cache_tags, req_go, ra):
sync += r1.wb.sel.eq(req.byte_sel)
sync += r1.wb.dat.eq(req.data)
sync += r1.dcbz.eq(req.dcbz)
sync += r1.wb.sel.eq(req.byte_sel)
sync += r1.wb.dat.eq(req.data)
sync += r1.dcbz.eq(req.dcbz)
# Clear stb and set ld_stbs_done
# so we can handle an eventual
# last ack on the same cycle.
# Clear stb and set ld_stbs_done
# so we can handle an eventual
# last ack on the same cycle.
- rarange = Signal(LINE_OFF_BITS-ROW_OFF_BITS)
- comb += rarange.eq(r1.wb.adr[ROW_OFF_BITS:LINE_OFF_BITS]+1)
- sync += r1.wb.adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(rarange)
+ row = Signal(LINE_OFF_BITS-ROW_OFF_BITS)
+ comb += row.eq(r1.real_adr[ROW_OFF_BITS:])
+ sync += r1.real_adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(row+1)
# If this is the data we were looking for,
# we can complete the request next cycle.
# If this is the data we were looking for,
# we can complete the request next cycle.
sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
stall_out, req_op[:3], d_out.valid, d_out.error,
r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
stall_out, req_op[:3], d_out.valid, d_out.error,
r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
# Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
cache_tags = CacheTagArray()
cache_tag_set = Signal(TAG_RAM_WIDTH)
# Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
cache_tags = CacheTagArray()
cache_tag_set = Signal(TAG_RAM_WIDTH)
# TODO attribute ram_style : string;
# TODO attribute ram_style of cache_tags : signal is "distributed";
# TODO attribute ram_style : string;
# TODO attribute ram_style of cache_tags : signal is "distributed";
comb += self.wb_out.eq(r1.wb)
# call sub-functions putting everything together, using shared
comb += self.wb_out.eq(r1.wb)
# call sub-functions putting everything together, using shared
self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
- r0_valid, r1, cache_valid_bits, replace_way,
+ r0_valid, r1, cache_valids, replace_way,
use_forward1_next, use_forward2_next,
req_hit_way, plru_victim, rc_ok, perm_attr,
valid_ra, perm_ok, access_ok, req_op, req_go,
use_forward1_next, use_forward2_next,
req_hit_way, plru_victim, rc_ok, perm_attr,
valid_ra, perm_ok, access_ok, req_op, req_go,
req_hit_way, req_index, req_tag, access_ok,
tlb_hit, tlb_hit_way, tlb_req_index)
self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
req_hit_way, req_index, req_tag, access_ok,
tlb_hit, tlb_hit_way, tlb_req_index)
self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
- cache_valid_bits, r0, replace_way,
+ cache_valids, r0, replace_way,
req_hit_way, req_same_tag,
r0_valid, req_op, cache_tags, req_go, ra)
#self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
req_hit_way, req_same_tag,
r0_valid, req_op, cache_tags, req_go, ra)
#self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
assert data == sim_data, \
"check %x data %x != %x" % (addr, data, sim_data)
assert data == sim_data, \
"check %x data %x != %x" % (addr, data, sim_data)
data = yield from dcache_load(dut, addr*8)
assert data == sim_mem[addr], \
"final check %x data %x != %x" % (addr*8, data, sim_mem[addr])
data = yield from dcache_load(dut, addr*8)
assert data == sim_mem[addr], \
"final check %x data %x != %x" % (addr*8, data, sim_mem[addr])
m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
m.d.comb += sram.bus.we.eq(dut.wb_out.we)
m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
m.d.comb += sram.bus.we.eq(dut.wb_out.we)
m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
mem.append((i*2)| ((i*2+1)<<32))
test_dcache(mem, dcache_sim, "")
mem.append((i*2)| ((i*2+1)<<32))
test_dcache(mem, dcache_sim, "")