+# TODO: make these parameters of DCache at some point
+LINE_SIZE = 64 # Line size in bytes
+NUM_LINES = 32 # Number of lines in a set
+NUM_WAYS = 4 # Number of ways
+TLB_SET_SIZE = 64 # L1 DTLB entries per set
+TLB_NUM_WAYS = 2 # L1 DTLB number of sets
+TLB_LG_PGSZ = 12 # L1 DTLB log_2(page_size)
+LOG_LENGTH = 0 # Non-zero to enable log data collection
+
+# BRAM organisation: We never access more than
+# -- wishbone_data_bits at a time so to save
+# -- resources we make the array only that wide, and
+# -- use consecutive indices for to make a cache "line"
+# --
+# -- ROW_SIZE is the width in bytes of the BRAM
+# -- (based on WB, so 64-bits)
+ROW_SIZE = WB_DATA_BITS // 8;
+
+# ROW_PER_LINE is the number of row (wishbone
+# transactions) in a line
+ROW_PER_LINE = LINE_SIZE // ROW_SIZE
+
+# BRAM_ROWS is the number of rows in BRAM needed
+# to represent the full dcache
+BRAM_ROWS = NUM_LINES * ROW_PER_LINE
+
+
+# Bit fields counts in the address
+
+# REAL_ADDR_BITS is the number of real address
+# bits that we store
+REAL_ADDR_BITS = 56
+
+# ROW_BITS is the number of bits to select a row
+ROW_BITS = log2_int(BRAM_ROWS)
+
+# ROW_LINE_BITS is the number of bits to select
+# a row within a line
+ROW_LINE_BITS = log2_int(ROW_PER_LINE)
+
+# LINE_OFF_BITS is the number of bits for
+# the offset in a cache line
+LINE_OFF_BITS = log2_int(LINE_SIZE)
+
+# ROW_OFF_BITS is the number of bits for
+# the offset in a row
+ROW_OFF_BITS = log2_int(ROW_SIZE)
+
+# INDEX_BITS is the number if bits to
+# select a cache line
+INDEX_BITS = log2_int(NUM_LINES)
+
+# SET_SIZE_BITS is the log base 2 of the set size
+SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
+
+# TAG_BITS is the number of bits of
+# the tag part of the address
+TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
+
+# TAG_WIDTH is the width in bits of each way of the tag RAM
+TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
+
+# WAY_BITS is the number of bits to select a way
+WAY_BITS = log2_int(NUM_WAYS)
+
+# Example of layout for 32 lines of 64 bytes:
+#
+# .. tag |index| line |
+# .. | row | |
+# .. | |---| | ROW_LINE_BITS (3)
+# .. | |--- - --| LINE_OFF_BITS (6)
+# .. | |- --| ROW_OFF_BITS (3)
+# .. |----- ---| | ROW_BITS (8)
+# .. |-----| | INDEX_BITS (5)
+# .. --------| | TAG_BITS (45)
+
+TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
+
+def CacheTagArray():
+ return Array(CacheTagSet() for x in range(NUM_LINES))
+
+def CacheValidBitsArray():
+ return Array(CacheWayValidBits() for x in range(NUM_LINES))
+
+def RowPerLineValidArray():
+ return Array(Signal() for x in range(ROW_PER_LINE))
+
+# L1 TLB
+TLB_SET_BITS = log2_int(TLB_SET_SIZE)
+TLB_WAY_BITS = log2_int(TLB_NUM_WAYS)
+TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
+TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
+TLB_PTE_BITS = 64
+TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
+
+assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
+assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
+assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
+assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
+assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
+assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS), \
+ "geometry bits don't add up"
+assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
+ "geometry bits don't add up"
+assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
+ "geometry bits don't add up"
+assert 64 == wishbone_data_bits, "Can't yet handle wb width that isn't 64-bits"
+assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
+
+
+def TLBValidBitsArray():
+ return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
+
+def TLBTagsArray():
+ return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
+
+def TLBPtesArray():
+ return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
+
+def HitWaySet():
+ return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
+
+# Cache RAM interface
+def CacheRamOut():
+ return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
+
+# PLRU output interface
+def PLRUOut():
+ return Array(Signal(WAY_BITS) for x in range(Index()))
+
+# TLB PLRU output interface
+def TLBPLRUOut():
+ return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
+
+# Helper functions to decode incoming requests
+#
+# Return the cache line index (tag index) for an address
+def get_index(addr):
+ return addr[LINE_OFF_BITS:SET_SIZE_BITS]
+
+# Return the cache row index (data memory) for an address
+def get_row(addr):
+ return addr[ROW_OFF_BITS:SET_SIZE_BITS]
+
+# Return the index of a row within a line
+def get_row_of_line(row):
+ row_v = Signal(ROW_BITS)
+ row_v = Signal(row)
+ return row_v[0:ROW_LINE_BITS]
+
+# Returns whether this is the last row of a line
+def is_last_row_addr(addr, last):
+ return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
+
+# Returns whether this is the last row of a line
+def is_last_row(row, last):
+ return get_row_of_line(row) == last
+
+# Return the address of the next row in the current cache line
+def next_row_addr(addr):
+ row_idx = Signal(ROW_LINE_BITS)
+ result = WBAddrType()
+ # Is there no simpler way in VHDL to
+ # generate that 3 bits adder ?
+ row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
+ row_idx = Signal(row_idx + 1)
+ result = addr
+ result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
+ return result
+
+# Return the next row in the current cache line. We use a
+# dedicated function in order to limit the size of the
+# generated adder to be only the bits within a cache line
+# (3 bits with default settings)
+def next_row(row)
+ row_v = row[0:ROW_LINE_BITS] + 1
+ return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
+
+# Get the tag value from the address
+def get_tag(addr):
+ return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
+
+# Read a tag from a tag memory row
+def read_tag(way, tagset):
+ return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
+
+# Read a TLB tag from a TLB tag memory row
+def read_tlb_tag(way, tags):
+ j = way * TLB_EA_TAG_BITS
+ return tags[j:j + TLB_EA_TAG_BITS]
+
+# Write a TLB tag to a TLB tag memory row
+def write_tlb_tag(way, tags), tag):
+ j = way * TLB_EA_TAG_BITS
+ tags[j:j + TLB_EA_TAG_BITS] = tag
+
+# Read a PTE from a TLB PTE memory row
+def read_tlb_pte(way, ptes):
+ j = way * TLB_PTE_BITS
+ return ptes[j:j + TLB_PTE_BITS]
+
+def write_tlb_pte(way, ptes,newpte):
+ j = way * TLB_PTE_BITS
+ return ptes[j:j + TLB_PTE_BITS].eq(newpte)
+