--- /dev/null
+from nmigen import Module, Signal
+from nmigen.lib.coding import Encoder, PriorityEncoder
+
+class AddressEncoder():
+ """Address Encoder
+
+ The purpose of this module is to take in a vector and
+ encode the bits that are one hot into an address. This module
+ combines both nmigen's Encoder and PriorityEncoder and will state
+ whether the input line has a single bit hot, multiple bits hot,
+ or no bits hot. The output line will always have the lowest value
+ address output.
+
+ Usage:
+ The output is valid when either single or multiple match is high.
+ Otherwise output is 0.
+ """
+ def __init__(self, width):
+ """ Arguments:
+ * width: The desired length of the input vector
+ """
+ # Internal
+ self.encoder = Encoder(width)
+ self.p_encoder = PriorityEncoder(width)
+
+ # Input
+ self.i = Signal(width)
+
+ # Output
+ self.single_match = Signal(1)
+ self.multiple_match = Signal(1)
+ self.o = Signal(max=width)
+
+ def elaborate(self, platform=None):
+ m = Module()
+
+ # Add internal submodules
+ m.submodules.encoder = self.encoder
+ m.submodules.p_encoder = self.p_encoder
+
+ m.d.comb += [
+ self.encoder.i.eq(self.i),
+ self.p_encoder.i.eq(self.i)
+ ]
+
+ # Steps:
+ # 1. check if the input vector is non-zero
+ # 2. if non-zero, check if single match or multiple match
+ # 3. set output line to be lowest value address output
+
+ # If the priority encoder recieves an input of 0
+ # If n is 1 then the output is not valid
+ with m.If(self.p_encoder.n):
+ m.d.comb += [
+ self.single_match.eq(0),
+ self.multiple_match.eq(0),
+ self.o.eq(0)
+ ]
+ # If the priority encoder recieves an input > 0
+ with m.Else():
+ # Multiple Match if encoder n is invalid
+ with m.If(self.encoder.n):
+ m.d.comb += [
+ self.single_match.eq(0),
+ self.multiple_match.eq(1)
+ ]
+ # Single Match if encoder n is valid
+ with m.Else():
+ m.d.comb += [
+ self.single_match.eq(1),
+ self.multiple_match.eq(0)
+ ]
+ # Always set output based on priority encoder output
+ m.d.comb += self.o.eq(self.p_encoder.o)
+ return m
--- /dev/null
+from nmigen import Array, Cat, Module, Signal
+from nmigen.lib.coding import Decoder
+from nmigen.cli import main #, verilog
+
+from CamEntry import CamEntry
+from AddressEncoder import AddressEncoder
+
+class Cam():
+ """ Content Addressable Memory (CAM)
+
+ The purpose of this module is to quickly look up whether an
+ entry exists given a data key.
+ This module will search for the given data in all internal entries
+ and output whether a single or multiple match was found.
+ If an single entry is found the address be returned and single_match
+ is set HIGH. If multiple entries are found the lowest address is
+ returned and multiple_match is set HIGH. If neither single_match or
+ multiple_match are HIGH this implies no match was found. To write
+ to the CAM set the address bus to the desired entry and set write_enable
+ HIGH. Entry managment should be performed one level above this block
+ as lookup is performed within.
+
+ Notes:
+ The read and write operations take one clock cycle to complete.
+ Currently the read_warning line is present for interfacing but
+ is not necessary for this design. This module is capable of writing
+ in the first cycle, reading on the second, and output the correct
+ address on the third.
+ """
+
+ def __init__(self, data_size, cam_size):
+ """ Arguments:
+ * data_size: (bits) The bit size of the data
+ * cam_size: (number) The number of entries in the CAM
+ """
+
+ # Internal
+ self.cam_size = cam_size
+ self.encoder = AddressEncoder(cam_size)
+ self.decoder = Decoder(cam_size)
+ self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
+
+ # Input
+ self.enable = Signal(1)
+ self.write_enable = Signal(1)
+ self.data_in = Signal(data_size) # The data to be written
+ self.data_mask = Signal(data_size) # mask for ternary writes
+ self.address_in = Signal(max=cam_size) # address of CAM Entry to write
+
+ # Output
+ self.read_warning = Signal(1) # High when a read interrupts a write
+ self.single_match = Signal(1) # High when there is only one match
+ self.multiple_match = Signal(1) # High when there at least two matches
+ self.match_address = Signal(max=cam_size) # The lowest address matched
+
+ def elaborate(self, platform=None):
+ m = Module()
+ # AddressEncoder for match types and output address
+ m.submodules.AddressEncoder = self.encoder
+ # Decoder is used to select which entry will be written to
+ m.submodules.Decoder = self.decoder
+ # CamEntry Array Submodules
+ # Note these area added anonymously
+ entry_array = self.entry_array
+ m.submodules += entry_array
+
+ # Decoder logic
+ m.d.comb += [
+ self.decoder.i.eq(self.address_in),
+ self.decoder.n.eq(0)
+ ]
+
+ encoder_vector = []
+ with m.If(self.enable):
+ # Set the key value for every CamEntry
+ for index in range(self.cam_size):
+
+ # Write Operation
+ with m.If(self.write_enable):
+ with m.If(self.decoder.o[index]):
+ m.d.comb += entry_array[index].command.eq(2)
+ with m.Else():
+ m.d.comb += entry_array[index].command.eq(0)
+
+ # Read Operation
+ with m.Else():
+ m.d.comb += entry_array[index].command.eq(1)
+
+ # Send data input to all entries
+ m.d.comb += entry_array[index].data_in.eq(self.data_in)
+ # Send all entry matches to encoder
+ ematch = entry_array[index].match
+ encoder_vector.append(ematch)
+
+ # Give input to and accept output from encoder module
+ m.d.comb += [
+ self.encoder.i.eq(Cat(*encoder_vector)),
+ self.single_match.eq(self.encoder.single_match),
+ self.multiple_match.eq(self.encoder.multiple_match),
+ self.match_address.eq(self.encoder.o)
+ ]
+
+ # If the CAM is not enabled set all outputs to 0
+ with m.Else():
+ m.d.comb += [
+ self.read_warning.eq(0),
+ self.single_match.eq(0),
+ self.multiple_match.eq(0),
+ self.match_address.eq(0)
+ ]
+
+ return m
+
+ def ports(self):
+ return [self.enable, self.write_enable,
+ self.data_in, self.data_mask,
+ self.read_warning, self.single_match,
+ self.multiple_match, self.match_address]
+
+
+if __name__ == '__main__':
+ cam = Cam(4, 4)
+ main(cam, ports=cam.ports())
+
--- /dev/null
+from nmigen import Module, Signal
+
+class CamEntry:
+ """ Content Addressable Memory (CAM) Entry
+
+ The purpose of this module is to represent an entry within a CAM.
+ This module when given a read command will compare the given data
+ and output whether a match was found or not. When given a write
+ command it will write the given data into internal registers.
+ """
+
+ def __init__(self, data_size):
+ """ Arguments:
+ * data_size: (bit count) The size of the data
+ """
+ # Input
+ self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
+ self.data_in = Signal(data_size) # Data input when writing
+
+ # Output
+ self.match = Signal(1) # Result of the internal/input key comparison
+ self.data = Signal(data_size)
+
+ def elaborate(self, platform=None):
+ m = Module()
+ with m.Switch(self.command):
+ with m.Case("00"):
+ m.d.sync += self.match.eq(0)
+ with m.Case("01"):
+ with m.If(self.data == self.data_in):
+ m.d.sync += self.match.eq(1)
+ with m.Else():
+ m.d.sync += self.match.eq(0)
+ with m.Case("10"):
+ m.d.sync += [
+ self.data.eq(self.data_in),
+ self.match.eq(0)
+ ]
+ with m.Case():
+ m.d.sync += [
+ self.match.eq(0),
+ self.data.eq(0)
+ ]
+
+ return m
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+from nmigen.cli import verilog, rtlil
+
+
+class LFSRPolynomial(set):
+ """ implements a polynomial for use in LFSR
+ """
+ def __init__(self, exponents=()):
+ for e in exponents:
+ assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
+ assert (e >= 0), ValueError("%d must not be negative" % e)
+ set.__init__(self, set(exponents).union({0})) # must contain zero
+
+ @property
+ def max_exponent(self):
+ return max(self) # derived from set, so this returns the max exponent
+
+ @property
+ def exponents(self):
+ exponents = list(self) # get elements of set as a list
+ exponents.sort(reverse=True)
+ return exponents
+
+ def __str__(self):
+ expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
+ retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
+ return " + ".join(retval)
+
+ def __repr__(self):
+ return "LFSRPolynomial(%s)" % self.exponents
+
+
+# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa
+LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
+LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
+LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
+LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
+LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
+LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
+LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
+LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
+LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
+LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
+LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
+LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
+LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
+LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
+LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
+LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
+LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
+LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
+LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
+LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
+LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
+LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
+LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
+
+
+class LFSR(LFSRPolynomial, Elaboratable):
+ """ implements a Linear Feedback Shift Register
+ """
+ def __init__(self, polynomial):
+ """ Inputs:
+ ------
+ :polynomial: the polynomial to feedback on. may be a LFSRPolynomial
+ instance or an iterable of ints (list/tuple/generator)
+ :enable: enable (set LO to disable. NOTE: defaults to HI)
+
+ Outputs:
+ -------
+ :state: the LFSR state. bitwidth is taken from the polynomial
+ maximum exponent.
+
+ Note: if an LFSRPolynomial is passed in as the input, because
+ LFSRPolynomial is derived from set() it's ok:
+ LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
+ """
+ LFSRPolynomial.__init__(self, polynomial)
+ self.state = Signal(self.max_exponent, reset=1)
+ self.enable = Signal(reset=1)
+
+ def elaborate(self, platform):
+ m = Module()
+ # do absolutely nothing if the polynomial is empty (always has a zero)
+ if self.max_exponent <= 1:
+ return m
+
+ # create XOR-bunch, select bits from state based on exponent
+ feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
+ for exponent in self:
+ if exponent > 0: # don't have to skip, saves CPU cycles though
+ feedback ^= self.state[exponent - 1]
+
+ # if enabled, shift-and-feedback
+ with m.If(self.enable):
+ # shift up lower bits by Cat'ing in a new bit zero (feedback)
+ newstate = Cat(feedback, self.state[:-1])
+ m.d.sync += self.state.eq(newstate)
+
+ return m
+
+
+# example: Poly24
+if __name__ == '__main__':
+ p24 = rtlil.convert(LFSR(LFSR_POLY_24))
+ with open("lfsr2_p24.il", "w") as f:
+ f.write(p24)
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Module
+from typing import Iterable, Optional, Iterator, Any, Union
+from typing_extensions import final
+
+
+@final
+class LFSRPolynomial(set):
+ def __init__(self, exponents: Iterable[int] = ()):
+ def elements() -> Iterable[int]: ...
+ @property
+ def exponents(self) -> list[int]: ...
+ def __str__(self) -> str: ...
+ def __repr__(self) -> str: ...
+
+
+@final
+class LFSR:
+ def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
+ @property
+ def width(self) -> int: ...
+ def elaborate(self, platform: Any) -> Module: ...
--- /dev/null
+verilog:
+ python3 Cam.py generate -t v > Cam.v
--- /dev/null
+from nmigen import Cat, Memory, Module, Signal, Elaboratable
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+
+
+class MemorySet(Elaboratable):
+ def __init__(self, data_size, tag_size, set_count, active):
+ self.active = active
+ input_size = tag_size + data_size # Size of the input data
+ memory_width = input_size + 1 # The width of the cache memory
+ self.active = active
+ self.data_size = data_size
+ self.tag_size = tag_size
+
+ # XXX TODO, use rd-enable and wr-enable?
+ self.mem = Memory(memory_width, set_count)
+ self.r = self.mem.read_port()
+ self.w = self.mem.write_port()
+
+ # inputs (address)
+ self.cset = Signal(max=set_count) # The set to be checked
+ self.tag = Signal(tag_size) # The tag to find
+ self.data_i = Signal(data_size) # Incoming data
+
+ # outputs
+ self.valid = Signal()
+ self.data_o = Signal(data_size) # Outgoing data (excludes tag)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.mem = self.mem
+ m.submodules.r = self.r
+ m.submodules.w = self.w
+
+ # temporaries
+ active_bit = Signal()
+ tag_valid = Signal()
+ data_start = self.active + 1
+ data_end = data_start + self.data_size
+ tag_start = data_end
+ tag_end = tag_start + self.tag_size
+
+ # connect the read port address to the set/entry
+ read_port = self.r
+ m.d.comb += read_port.addr.eq(self.cset)
+ # Pull out active bit from data
+ data = read_port.data
+ m.d.comb += active_bit.eq(data[self.active])
+ # Validate given tag vs stored tag
+ tag = data[tag_start:tag_end]
+ m.d.comb += tag_valid.eq(self.tag == tag)
+ # An entry is only valid if the tags match AND
+ # is marked as a valid entry
+ m.d.comb += self.valid.eq(tag_valid & active_bit)
+
+ # output data: TODO, check rd-enable?
+ m.d.comb += self.data_o.eq(data[data_start:data_end])
+
+ # connect the write port addr to the set/entry (only if write enabled)
+ # (which is only done on a match, see SAC.write_entry below)
+ write_port = self.w
+ with m.If(write_port.en):
+ m.d.comb += write_port.addr.eq(self.cset)
+ m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
+
+ return m
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.cli import main
+
+from PteEntry import PteEntry
+
+class PermissionValidator():
+ """ The purpose of this Module is to check the Permissions of a given PTE
+ against the requested access permissions.
+
+ This module will either validate (by setting the valid bit HIGH)
+ the request or find a permission fault and invalidate (by setting
+ the valid bit LOW) the request
+ """
+
+ def __init__(self, asid_size, pte_size):
+ """ Arguments:
+ * asid_size: (bit count) The size of the asid to be processed
+ * pte_size: (bit count) The size of the pte to be processed
+
+ Return:
+ * valid HIGH when permissions are correct
+ """
+ # Internal
+ self.pte_entry = PteEntry(asid_size, pte_size)
+
+ # Input
+ self.data = Signal(asid_size + pte_size);
+ self.xwr = Signal(3) # Execute, Write, Read
+ self.super_mode = Signal(1) # Supervisor Mode
+ self.super_access = Signal(1) # Supervisor Access
+ self.asid = Signal(15) # Address Space IDentifier (ASID)
+
+ # Output
+ self.valid = Signal(1) # Denotes if the permissions are correct
+
+ def elaborate(self, platform=None):
+ m = Module()
+
+ m.submodules.pte_entry = self.pte_entry
+
+ m.d.comb += self.pte_entry.i.eq(self.data)
+
+ # Check if the entry is valid
+ with m.If(self.pte_entry.v):
+ # ASID match or Global Permission
+ # Note that the MSB bound is exclusive
+ with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
+ # Check Execute, Write, Read (XWR) Permissions
+ with m.If(self.pte_entry.xwr == self.xwr):
+ # Supervisor Logic
+ with m.If(self.super_mode):
+ # Valid if entry is not in user mode or supervisor
+ # has Supervisor User Memory (SUM) access via the
+ # SUM bit in the sstatus register
+ m.d.comb += self.valid.eq((~self.pte_entry.u) \
+ | self.super_access)
+ # User logic
+ with m.Else():
+ # Valid if the entry is in user mode only
+ m.d.comb += self.valid.eq(self.pte_entry.u)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ return m
\ No newline at end of file
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.cli import main
+
+class PteEntry():
+ """ The purpose of this Module is to centralize the parsing of Page
+ Table Entries (PTE) into one module to prevent common mistakes
+ and duplication of code. The control bits are parsed out for
+ ease of use.
+
+ This module parses according to the standard PTE given by the
+ Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
+ The Address Space IDentifier (ASID) is appended to the MSB of the input
+ and is parsed out as such.
+
+ An valid input Signal would be:
+ ASID PTE
+ Bits:[78-64][63-0]
+
+ The output PTE value will include the control bits.
+ """
+ def __init__(self, asid_size, pte_size):
+ """ Arguments:
+ * asid_size: (bit count) The size of the asid to be processed
+ * pte_size: (bit count) The size of the pte to be processed
+
+ Return:
+ * d The Dirty bit from the PTE portion of i
+ * a The Accessed bit from the PTE portion of i
+ * g The Global bit from the PTE portion of i
+ * u The User Mode bit from the PTE portion of i
+ * xwr The Execute/Write/Read bit from the PTE portion of i
+ * v The Valid bit from the PTE portion of i
+ * asid The asid portion of i
+ * pte The pte portion of i
+ """
+ # Internal
+ self.asid_start = pte_size
+ self.asid_end = pte_size + asid_size
+
+ # Input
+ self.i = Signal(asid_size + pte_size)
+
+ # Output
+ self.d = Signal(1) # Dirty bit (From pte)
+ self.a = Signal(1) # Accessed bit (From pte)
+ self.g = Signal(1) # Global Access (From pte)
+ self.u = Signal(1) # User Mode (From pte)
+ self.xwr = Signal(3) # Execute Read Write (From pte)
+ self.v = Signal(1) # Valid (From pte)
+ self.asid = Signal(asid_size) # Associated Address Space IDentifier
+ self.pte = Signal(pte_size) # Full Page Table Entry
+
+ def elaborate(self, platform=None):
+ m = Module()
+ # Pull out all control bites from PTE
+ m.d.comb += [
+ self.d.eq(self.i[7]),
+ self.a.eq(self.i[6]),
+ self.g.eq(self.i[5]),
+ self.u.eq(self.i[4]),
+ self.xwr.eq(self.i[1:4]),
+ self.v.eq(self.i[0])
+ ]
+ m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
+ m.d.comb += self.pte.eq(self.i[0:self.asid_start])
+ return m
\ No newline at end of file
--- /dev/null
+"""
+
+Online simulator of 4-way set-associative cache:
+http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html
+
+Python simulator of a N-way set-associative cache:
+https://github.com/vaskevich/CacheSim/blob/master/cachesim.py
+"""
+import sys
+sys.path.append("ariane/src/")
+
+from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
+from nmigen.compat.genlib import fsm
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+
+from AddressEncoder import AddressEncoder
+from MemorySet import MemorySet
+
+# TODO: use a LFSR that advances continuously and picking the bottom
+# few bits from it to select which cache line to replace, instead of PLRU
+# http://bugs.libre-riscv.org/show_bug.cgi?id=71
+from plru import PLRU
+from LFSR import LFSR, LFSR_POLY_24
+
+SA_NA = "00" # no action (none)
+SA_RD = "01" # read
+SA_WR = "10" # write
+
+
+class SetAssociativeCache(Elaboratable):
+ """ Set Associative Cache Memory
+
+ The purpose of this module is to generate a memory cache given the
+ constraints passed in. This will create a n-way set associative cache.
+ It is expected for the SV TLB that the VMA will provide the set number
+ while the ASID provides the tag (still to be decided).
+
+ """
+ def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
+ """ Arguments
+ * tag_size (bits): The bit count of the tag
+ * data_size (bits): The bit count of the data to be stored
+ * set_count (number): The number of sets/entries in the cache
+ * way_count (number): The number of slots a data can be stored
+ in one set
+ * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
+ set/entry to write to. otherwise, use a PLRU
+ """
+ # Internals
+ self.lfsr_mode = lfsr
+ self.way_count = way_count # The number of slots in one set
+ self.tag_size = tag_size # The bit count of the tag
+ self.data_size = data_size # The bit count of the data to be stored
+
+ # set up Memory array
+ self.mem_array = Array() # memory array
+ for i in range(way_count):
+ ms = MemorySet(data_size, tag_size, set_count, active=0)
+ self.mem_array.append(ms)
+
+ # Finds valid entries
+ self.encoder = AddressEncoder(way_count)
+
+ # setup PLRU or LFSR
+ if lfsr:
+ # LFSR mode
+ self.lfsr = LFSR(LFSR_POLY_24)
+ else:
+ # PLRU mode
+ self.plru = PLRU(way_count) # One block to handle plru calculations
+ self.plru_array = Array() # PLRU data on each set
+ for i in range(set_count):
+ name="plru%d" % i
+ self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
+
+ # Input
+ self.enable = Signal(1) # Whether the cache is enabled
+ self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX)
+ self.cset = Signal(max=set_count) # The set to be checked
+ self.tag = Signal(tag_size) # The tag to find
+ self.data_i = Signal(data_size) # The input data
+
+ # Output
+ self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
+ self.hit = Signal(1) # Tag matched one way in the given set
+ self.multiple_hit = Signal(1) # Tag matched many ways in the given set
+ self.data_o = Signal(data_size) # The data linked to the matched tag
+
+ def check_tags(self, m):
+ """ Validate the tags in the selected set. If one and only one
+ tag matches set its state to zero and increment all others
+ by one. We only advance to next state if a single hit is found.
+ """
+ # Vector to store way valid results
+ # A zero denotes a way is invalid
+ valid_vector = []
+ # Loop through memory to prep read/write ports and set valid_vector
+ for i in range(self.way_count):
+ valid_vector.append(self.mem_array[i].valid)
+
+ # Pass encoder the valid vector
+ m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
+
+ # Only one entry should be marked
+ # This is due to already verifying the tags
+ # matched and the valid bit is high
+ with m.If(self.hit):
+ m.next = "FINISHED_READ"
+ # Pull out data from the read port
+ data = self.mem_array[self.encoder.o].data_o
+ m.d.comb += self.data_o.eq(data)
+ if not self.lfsr_mode:
+ self.access_plru(m)
+
+ # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
+ with m.Elif(self.multiple_hit):
+ # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+ m.d.comb += self.data_o.eq(0)
+
+ # No tag matches means no data
+ with m.Else():
+ # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+ m.d.comb += self.data_o.eq(0)
+
+ def access_plru(self, m):
+ """ An entry was accessed and the plru tree must now be updated
+ """
+ # Pull out the set's entry being edited
+ plru_entry = self.plru_array[self.cset]
+ m.d.comb += [
+ # Set the plru data to the current state
+ self.plru.plru_tree.eq(plru_entry),
+ # Set that the cache was accessed
+ self.plru.lu_access_i.eq(1)
+ ]
+
+ def read(self, m):
+ """ Go through the read process of the cache.
+ This takes two cycles to complete. First it checks for a valid tag
+ and secondly it updates the LRU values.
+ """
+ with m.FSM() as fsm_read:
+ with m.State("READY"):
+ m.d.comb += self.ready.eq(0)
+ # check_tags will set the state if the conditions are met
+ self.check_tags(m)
+ with m.State("FINISHED_READ"):
+ m.next = "READY"
+ m.d.comb += self.ready.eq(1)
+ if not self.lfsr_mode:
+ plru_tree_o = self.plru.plru_tree_o
+ m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
+
+ def write_entry(self, m):
+ if not self.lfsr_mode:
+ m.d.comb += [# set cset (mem address) into PLRU
+ self.plru.plru_tree.eq(self.plru_array[self.cset]),
+ # and connect plru to encoder for write
+ self.encoder.i.eq(self.plru.replace_en_o)
+ ]
+ write_port = self.mem_array[self.encoder.o].w
+ else:
+ # use the LFSR to generate a random(ish) one of the mem array
+ lfsr_output = Signal(max=self.way_count)
+ lfsr_random = Signal(max=self.way_count)
+ m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
+ # address too big, limit to range of array
+ m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
+ lfsr_output - self.way_count,
+ lfsr_output))
+ write_port = self.mem_array[lfsr_random].w
+
+ # then if there is a match from the encoder, enable the selected write
+ with m.If(self.encoder.single_match):
+ m.d.comb += write_port.en.eq(1)
+
+ def write(self, m):
+ """ Go through the write process of the cache.
+ This takes two cycles to complete. First it writes the entry,
+ and secondly it updates the PLRU (in plru mode)
+ """
+ with m.FSM() as fsm_write:
+ with m.State("READY"):
+ m.d.comb += self.ready.eq(0)
+ self.write_entry(m)
+ m.next ="FINISHED_WRITE"
+ with m.State("FINISHED_WRITE"):
+ m.d.comb += self.ready.eq(1)
+ if not self.lfsr_mode:
+ plru_entry = self.plru_array[self.cset]
+ m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
+ m.next = "READY"
+
+
+ def elaborate(self, platform=None):
+ m = Module()
+
+ # ----
+ # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
+ # ----
+
+ m.submodules.AddressEncoder = self.encoder
+ if self.lfsr_mode:
+ m.submodules.LFSR = self.lfsr
+ else:
+ m.submodules.PLRU = self.plru
+
+ for i, mem in enumerate(self.mem_array):
+ setattr(m.submodules, "mem%d" % i, mem)
+
+ # ----
+ # select mode: PLRU connect to encoder, LFSR do... something
+ # ----
+
+ if not self.lfsr_mode:
+ # Set what entry was hit
+ m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
+ else:
+ # enable LFSR
+ m.d.comb += self.lfsr.enable.eq(self.enable)
+
+ # ----
+ # connect hit/multiple hit to encoder output
+ # ----
+
+ m.d.comb += [
+ self.hit.eq(self.encoder.single_match),
+ self.multiple_hit.eq(self.encoder.multiple_match),
+ ]
+
+ # ----
+ # connect incoming data/tag/cset(addr) to mem_array
+ # ----
+
+ for mem in self.mem_array:
+ write_port = mem.w
+ m.d.comb += [mem.cset.eq(self.cset),
+ mem.tag.eq(self.tag),
+ mem.data_i.eq(self.data_i),
+ write_port.en.eq(0), # default: disable write
+ ]
+ # ----
+ # Commands: READ/WRITE/TODO
+ # ----
+
+ with m.If(self.enable):
+ with m.Switch(self.command):
+ # Search all sets at a particular tag
+ with m.Case(SA_RD):
+ self.read(m)
+ with m.Case(SA_WR):
+ self.write(m)
+ # Maybe catch multiple tags write here?
+ # TODO
+ # TODO: invalidate/flush, flush-all?
+
+ return m
+
+ def ports(self):
+ return [self.enable, self.command, self.cset, self.tag, self.data_i,
+ self.ready, self.hit, self.multiple_hit, self.data_o]
+
+
+if __name__ == '__main__':
+ sac = SetAssociativeCache(4, 8, 4, 6)
+ vl = rtlil.convert(sac, ports=sac.ports())
+ with open("SetAssociativeCache.il", "w") as f:
+ f.write(vl)
+
+ sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
+ vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
+ with open("SetAssociativeCacheLFSR.il", "w") as f:
+ f.write(vl)
--- /dev/null
+""" TLB Module
+
+ The expected form of the data is:
+ * Item (Bits)
+ * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
+"""
+
+from nmigen import Memory, Module, Signal, Cat
+from nmigen.cli import main
+
+from PermissionValidator import PermissionValidator
+from Cam import Cam
+
+class TLB():
+ def __init__(self, asid_size, vma_size, pte_size, L1_size):
+ """ Arguments
+ * asid_size: Address Space IDentifier (ASID) typically 15 bits
+ * vma_size: Virtual Memory Address (VMA) typically 36 bits
+ * pte_size: Page Table Entry (PTE) typically 64 bits
+
+ Notes:
+ These arguments should represent the largest possible size
+ defined by the MODE settings. See
+ Volume II: RISC-V Privileged Architectures V1.10 Page 57
+ """
+
+ # Internal
+ self.state = 0
+ # L1 Cache Modules
+ L1_size = 8 # XXX overridden incoming argument?
+ self.cam_L1 = Cam(vma_size, L1_size)
+ self.mem_L1 = Memory(asid_size + pte_size, L1_size)
+
+ # Permission Validator
+ self.perm_validator = PermissionValidator(asid_size, pte_size)
+
+ # Inputs
+ self.supermode = Signal(1) # Supervisor Mode
+ self.super_access = Signal(1) # Supervisor Access
+ self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
+ self.xwr = Signal(3) # Execute, Write, Read
+ self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
+ self.address_L1 = Signal(max=L1_size)
+ self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
+ self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
+ self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
+
+ # Outputs
+ self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
+ self.perm_valid = Signal(1) # Denotes if the permissions are correct
+ self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
+
+ def search(self, m, read_L1, write_L1):
+ """ searches the TLB
+ """
+ m.d.comb += [
+ write_L1.en.eq(0),
+ self.cam_L1.write_enable.eq(0),
+ self.cam_L1.data_in.eq(self.vma)
+ ]
+ # Match found in L1 CAM
+ match_found = Signal(reset_less=True)
+ m.d.comb += match_found.eq(self.cam_L1.single_match
+ | self.cam_L1.multiple_match)
+ with m.If(match_found):
+ # Memory shortcut variables
+ mem_address = self.cam_L1.match_address
+ # Memory Logic
+ m.d.comb += read_L1.addr.eq(mem_address)
+ # Permission Validator Logic
+ m.d.comb += [
+ self.hit.eq(1),
+ # Set permission validator data to the correct
+ # register file data according to CAM match
+ # address
+ self.perm_validator.data.eq(read_L1.data),
+ # Execute, Read, Write
+ self.perm_validator.xwr.eq(self.xwr),
+ # Supervisor Mode
+ self.perm_validator.super_mode.eq(self.supermode),
+ # Supverisor Access
+ self.perm_validator.super_access.eq(self.super_access),
+ # Address Space IDentifier (ASID)
+ self.perm_validator.asid.eq(self.asid),
+ # Output result of permission validation
+ self.perm_valid.eq(self.perm_validator.valid)
+ ]
+ # Only output PTE if permissions are valid
+ with m.If(self.perm_validator.valid):
+ # XXX TODO - dummy for now
+ reg_data = Signal.like(self.pte_out)
+ m.d.comb += [
+ self.pte_out.eq(reg_data)
+ ]
+ with m.Else():
+ m.d.comb += [
+ self.pte_out.eq(0)
+ ]
+ # Miss Logic
+ with m.Else():
+ m.d.comb += [
+ self.hit.eq(0),
+ self.perm_valid.eq(0),
+ self.pte_out.eq(0)
+ ]
+
+ def write_l1(self, m, read_L1, write_L1):
+ """ writes to the L1 cache
+ """
+ # Memory_L1 Logic
+ m.d.comb += [
+ write_L1.en.eq(1),
+ write_L1.addr.eq(self.address_L1),
+ # The Cat places arguments from LSB -> MSB
+ write_L1.data.eq(Cat(self.pte_in, self.asid))
+ ]
+ # CAM_L1 Logic
+ m.d.comb += [
+ self.cam_L1.write_enable.eq(1),
+ self.cam_L1.data_in.eq(self.vma),
+ ]
+
+ def elaborate(self, platform):
+ m = Module()
+ # Add submodules
+ # Submodules for L1 Cache
+ m.d.submodules.cam_L1 = self.cam_L1
+ m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port()
+ m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port()
+ # Permission Validator Submodule
+ m.d.submodules.perm_valididator = self.perm_validator
+
+ # When MODE specifies translation
+ # TODO add in different bit length handling ie prefix 0s
+ tlb_enable = Signal(reset_less=True)
+ m.d.comb += tlb_enable.eq(self.mode != 0)
+
+ with m.If(tlb_enable):
+ m.d.comb += [
+ self.cam_L1.enable.eq(1)
+ ]
+ with m.Switch(self.command):
+ # Search
+ with m.Case("01"):
+ self.search(m, read_L1, write_L1)
+
+ # Write L1
+ # Expected that the miss will be handled in software
+ with m.Case("10"):
+ self.write_l1(m, read_L1, write_L1)
+
+ # TODO
+ #with m.Case("11"):
+
+ # When disabled
+ with m.Else():
+ m.d.comb += [
+ self.cam_L1.enable.eq(0),
+ # XXX TODO - self.reg_file.enable.eq(0),
+ self.hit.eq(0),
+ self.perm_valid.eq(0), # XXX TODO, check this
+ self.pte_out.eq(0)
+ ]
+ return m
+
+
+if __name__ == '__main__':
+ tlb = TLB(15, 36, 64, 4)
+ main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command,
+ tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
+ tlb.vma, tlb.pte_in,
+ tlb.hit, tlb.perm_valid, tlb.pte_out,
+ ] + tlb.cam_L1.ports())
--- /dev/null
+#include <cstdint>
+#include <iostream>
+#include <cmath>
+
+
+#define NWAY 4
+#define NLINE 256
+#define HIT 0
+#define MISS 1
+#define MS 1000
+/*
+Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
+Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
+four-way set associative - three bits
+ each bit represents one branch point in a binary decision tree; let 1
+ represent that the left side has been referenced more recently than the
+ right side, and 0 vice-versa
+ are all 4 lines valid?
+ / \
+ yes no, use an invalid line
+ |
+ |
+ |
+ bit_0 == 0? state | replace ref to | next state
+ / \ ------+-------- -------+-----------
+ y n 00x | line_0 line_0 | 11_
+ / \ 01x | line_1 line_1 | 10_
+ bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
+ / \ / \ 1x1 | line_3 line_3 | 0_0
+ y n y n
+ / \ / \ ('x' means ('_' means unchanged)
+ line_0 line_1 line_2 line_3 don't care)
+ 8-way set associative - 7 = 1+2+4 bits
+16-way set associative - 15 = 1+2+4+8 bits
+32-way set associative - 31 = 1+2+4+8+16 bits
+64-way set associative - 63 = 1+2+4+8+16+32 bits
+*/
+using namespace std;
+struct AddressField {
+ uint64_t wd_idx : 2;//Unused
+ uint64_t offset : 4;//Unused
+ uint64_t index : 8;//NLINE = 256 = 2^8
+ uint64_t tag : 50;
+};
+
+union Address {
+ uint32_t* p;
+ AddressField fields;
+};
+
+struct Cell {
+ bool v;
+ uint64_t tag;
+
+ Cell() : v(false), tag(0) {}
+
+ bool isHit(uint64_t tag) {
+ return v && (tag == this->tag);
+ }
+
+ void fetch(uint32_t* address) {
+ Address addr;
+ addr.p = address;
+ addr.fields.offset = 0;
+ addr.fields.wd_idx = 0;
+ tag = addr.fields.tag;
+ v = true;
+ }
+};
+
+ostream& operator<<(ostream & out, const Cell& cell) {
+ out << " v:" << cell.v << " tag:" << hex << cell.tag;
+ return out;
+}
+
+struct Block {
+ Cell cell[NWAY];
+ uint32_t state;
+ uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
+ uint64_t *value;
+ uint64_t *next_value;
+
+ Block() : state(0) {
+ switch (NWAY) {
+ case 4:
+ mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
+ value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
+ next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
+ break;
+ case 8:
+ mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
+ 0b1010001};
+ value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
+ 0b1010001};
+ next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
+ 0b0000001, 0b0000000};
+ break;
+ //TODO - more NWAY goes here.
+ default:
+ std::cout << "Error definition NWAY = " << NWAY << std::endl;
+ }
+ }
+
+ uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
+ for (int i = 0; i < NWAY; ++i) {
+ if (cell[i].isHit(tag)) {
+ *pway = i;
+ return pway;
+ }
+ }
+ return NULL;
+ }
+
+ void setLRU(uint32_t *address) {
+ int way = 0;
+ uint32_t st = state;
+ for (int i = 0; i < NWAY; ++i) {
+ if ((state & mask[i]) == value[i]) {
+ state ^= mask[i];
+ way = i;
+ break;
+ }
+ }
+ cell[way].fetch(address);
+ cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
+ }
+
+ uint32_t *get(uint32_t *address, uint32_t *pway) {
+ Address addr;
+ addr.p = address;
+ uint32_t *d = getByTag(addr.fields.tag, pway);
+ if (d != NULL) {
+ return &d[addr.fields.offset];
+ }
+ return d;
+ }
+
+ int set(uint32_t *address) {
+ uint32_t way = 0;
+ uint32_t *p = get(address, &way);
+ if (p != NULL) {
+ printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
+ state &= ~mask[way];
+ printf("%X --> ", state);
+ state |= next_value[way];
+ printf("%X\n", state);
+ // *p = *address; //skip since address is fake.
+ return HIT;
+ } else {
+ setLRU(address);
+ return MISS;
+ }
+ }
+};
+
+ostream& operator<<(ostream & out, const Block& block) {
+ out << "state:" << block.state << " ";
+ for (int i = 0; i<NWAY; i++) {
+ out << block.cell[i];
+ }
+ return out;
+}
+
+struct Cache {
+ Block block[NLINE];
+ uint32_t count[2];
+ Cache() { count[HIT] = 0; count[MISS] = 0; }
+
+ void access(uint32_t* address) {
+ Address addr;
+ addr.p = address;
+ Block& b = block[addr.fields.index];
+ ++count[b.set(address)];
+ }
+
+};
+ostream& operator<<(ostream & out, const Cache& cache) {
+ out << "\n==Summary==\n\tHit: " << cache.count[HIT] << " Miss: " << cache.count[MISS] << std::endl;
+ for (int i = 0; i < NLINE; i++) {
+ out << cache.block[i] << endl;
+ }
+ return out;
+}
+
+Cache cache;
+void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
+{
+ int x, i, j;
+ for (i = 0; i < MS; i++) {
+ for (j = 0; j < MS; j++) {
+ cache.access(res + i*MS +j);
+ for (x = 0; x < MS; x++) {
+ cache.access(m1 + i*MS + x);
+ cache.access(m2 + x*MS + j);
+ cache.access(res + i*MS +j);
+ // res[i][j] += m1[i][x] * m2[x][j];
+ cache.access(res + i*MS +j);
+ }
+ }
+ }
+}
+
+int main()
+{
+ uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL; // fake virtual address; don’t access it
+ uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL; // fake virtual address; don’t access it
+ uint32_t* res = (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
+ multiply(m1, m2, res);
+ cout << cache << endl;
+ return 0;
+}
--- /dev/null
+pseudo-LRU
+
+two-way set associative - one bit
+
+ indicates which line of the two has been reference more recently
+
+
+four-way set associative - three bits
+
+ each bit represents one branch point in a binary decision tree; let 1
+ represent that the left side has been referenced more recently than the
+ right side, and 0 vice-versa
+
+ are all 4 lines valid?
+ / \
+ yes no, use an invalid line
+ |
+ |
+ |
+ bit_0 == 0? state | replace ref to | next state
+ / \ ------+-------- -------+-----------
+ y n 00x | line_0 line_0 | 11_
+ / \ 01x | line_1 line_1 | 10_
+ bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
+ / \ / \ 1x1 | line_3 line_3 | 0_0
+ y n y n
+ / \ / \ ('x' means ('_' means unchanged)
+ line_0 line_1 line_2 line_3 don't care)
+
+ (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
+ Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
+
+
+note that there is a 6-bit encoding for true LRU for four-way set associative
+
+ bit 0: bank[1] more recently used than bank[0]
+ bit 1: bank[2] more recently used than bank[0]
+ bit 2: bank[2] more recently used than bank[1]
+ bit 3: bank[3] more recently used than bank[0]
+ bit 4: bank[3] more recently used than bank[1]
+ bit 5: bank[3] more recently used than bank[2]
+
+ this results in 24 valid bit patterns within the 64 possible bit patterns
+ (4! possible valid traces for bank references)
+
+ e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
+
+ you can implement a state machine with a 256x6 ROM (6-bit state encoding
+ appended with a 2-bit bank reference input will yield a new 6-bit state),
+ and you can implement an LRU bank indicator with a 64x2 ROM
+
--- /dev/null
+from nmigen import Const
+
+INSTR_ADDR_MISALIGNED = Const(0, 64)
+INSTR_ACCESS_FAULT = Const(1, 64)
+ILLEGAL_INSTR = Const(2, 64)
+BREAKPOINT = Const(3, 64)
+LD_ADDR_MISALIGNED = Const(4, 64)
+LD_ACCESS_FAULT = Const(5, 64)
+ST_ADDR_MISALIGNED = Const(6, 64)
+ST_ACCESS_FAULT = Const(7, 64)
+ENV_CALL_UMODE = Const(8, 64) # environment call from user mode
+ENV_CALL_SMODE = Const(9, 64) # environment call from supervisor mode
+ENV_CALL_MMODE = Const(11, 64) # environment call from machine mode
+INSTR_PAGE_FAULT = Const(12, 64) # Instruction page fault
+LOAD_PAGE_FAULT = Const(13, 64) # Load page fault
+STORE_PAGE_FAULT = Const(15, 64) # Store page fault
--- /dev/null
+"""
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: Florian Zaruba, ETH Zurich
+# Date: 19/04/2017
+# Description: Memory Management Unit for Ariane, contains TLB and
+# address translation unit. SV39 as defined in RISC-V
+# privilege specification 1.11-WIP
+
+import ariane_pkg::*;
+"""
+
+from nmigen import Const, Signal, Cat, Module, Mux
+from nmigen.cli import verilog, rtlil
+
+from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
+from tlb import TLB
+from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
+ LOAD_PAGE_FAULT, STORE_PAGE_FAULT)
+
+PRIV_LVL_M = Const(0b11, 2)
+PRIV_LVL_S = Const(0b01, 2)
+PRIV_LVL_U = Const(0b00, 2)
+
+
+class RVException:
+ def __init__(self):
+ self.cause = Signal(64) # cause of exception
+ self.tval = Signal(64) # more info of causing exception
+ # (e.g.: instruction causing it),
+ # address of LD/ST fault
+ self.valid = Signal()
+
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+
+ def __iter__(self):
+ yield self.cause
+ yield self.tval
+ yield self.valid
+
+ def ports(self):
+ return list(self)
+
+
+class ICacheReqI:
+ def __init__(self):
+ self.fetch_valid = Signal() # address translation valid
+ self.fetch_paddr = Signal(64) # physical address in
+ self.fetch_exception = RVException() # exception occurred during fetch
+
+ def __iter__(self):
+ yield self.fetch_valid
+ yield self.fetch_paddr
+ yield from self.fetch_exception
+
+ def ports(self):
+ return list(self)
+
+
+class ICacheReqO:
+ def __init__(self):
+ self.fetch_req = Signal() # address translation request
+ self.fetch_vaddr = Signal(64) # virtual address out
+
+ def __iter__(self):
+ yield self.fetch_req
+ yield self.fetch_vaddr
+
+ def ports(self):
+ return list(self)
+
+
+class MMU:
+ def __init__(self, instr_tlb_entries = 4,
+ data_tlb_entries = 4,
+ asid_width = 1):
+ self.instr_tlb_entries = instr_tlb_entries
+ self.data_tlb_entries = data_tlb_entries
+ self.asid_width = asid_width
+
+ self.flush_i = Signal()
+ self.enable_translation_i = Signal()
+ self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
+ # IF interface
+ self.icache_areq_i = ICacheReqO()
+ self.icache_areq_o = ICacheReqI()
+ # LSU interface
+ # this is a more minimalistic interface because the actual addressing
+ # logic is handled in the LSU as we distinguish load and stores,
+ # what we do here is simple address translation
+ self.misaligned_ex_i = RVException()
+ self.lsu_req_i = Signal() # request address translation
+ self.lsu_vaddr_i = Signal(64) # virtual address in
+ self.lsu_is_store_i = Signal() # the translation is requested by a store
+ # if we need to walk the page table we can't grant in the same cycle
+
+ # Cycle 0
+ self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
+ # if translation hits in the DTLB
+ # Cycle 1
+ self.lsu_valid_o = Signal() # translation is valid
+ self.lsu_paddr_o = Signal(64) # translated address
+ self.lsu_exception_o = RVException() # addr translate threw exception
+
+ # General control signals
+ self.priv_lvl_i = Signal(2)
+ self.ld_st_priv_lvl_i = Signal(2)
+ self.sum_i = Signal()
+ self.mxr_i = Signal()
+ # input logic flag_mprv_i,
+ self.satp_ppn_i = Signal(44)
+ self.asid_i = Signal(self.asid_width)
+ self.flush_tlb_i = Signal()
+ # Performance counters
+ self.itlb_miss_o = Signal()
+ self.dtlb_miss_o = Signal()
+ # PTW memory interface
+ self.req_port_i = DCacheReqO()
+ self.req_port_o = DCacheReqI()
+
+ def elaborate(self, platform):
+ m = Module()
+
+ iaccess_err = Signal() # insufficient priv to access instr page
+ daccess_err = Signal() # insufficient priv to access data page
+ ptw_active = Signal() # PTW is currently walking a page table
+ walking_instr = Signal() # PTW is walking because of an ITLB miss
+ ptw_error = Signal() # PTW threw an exception
+
+ update_vaddr = Signal(39)
+ uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
+ update_ptw_itlb = TLBUpdate(self.asid_width)
+ update_ptw_dtlb = TLBUpdate(self.asid_width)
+
+ itlb_lu_access = Signal()
+ itlb_content = PTE()
+ itlb_is_2M = Signal()
+ itlb_is_1G = Signal()
+ itlb_lu_hit = Signal()
+
+ dtlb_lu_access = Signal()
+ dtlb_content = PTE()
+ dtlb_is_2M = Signal()
+ dtlb_is_1G = Signal()
+ dtlb_lu_hit = Signal()
+
+ # Assignments
+ m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
+ dtlb_lu_access.eq(self.lsu_req_i)
+ ]
+
+ # ITLB
+ m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
+ self.asid_width)
+ m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
+ i_tlb.update_i.eq(update_ptw_itlb),
+ i_tlb.lu_access_i.eq(itlb_lu_access),
+ i_tlb.lu_asid_i.eq(self.asid_i),
+ i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+ itlb_content.eq(i_tlb.lu_content_o),
+ itlb_is_2M.eq(i_tlb.lu_is_2M_o),
+ itlb_is_1G.eq(i_tlb.lu_is_1G_o),
+ itlb_lu_hit.eq(i_tlb.lu_hit_o),
+ ]
+
+ # DTLB
+ m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
+ self.asid_width)
+ m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
+ d_tlb.update_i.eq(update_ptw_dtlb),
+ d_tlb.lu_access_i.eq(dtlb_lu_access),
+ d_tlb.lu_asid_i.eq(self.asid_i),
+ d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
+ dtlb_content.eq(d_tlb.lu_content_o),
+ dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
+ dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
+ dtlb_lu_hit.eq(d_tlb.lu_hit_o),
+ ]
+
+ # PTW
+ m.submodules.ptw = ptw = PTW(self.asid_width)
+ m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
+ walking_instr.eq(ptw.walking_instr_o),
+ ptw_error.eq(ptw.ptw_error_o),
+ ptw.enable_translation_i.eq(self.enable_translation_i),
+
+ update_vaddr.eq(ptw.update_vaddr_o),
+ update_ptw_itlb.eq(ptw.itlb_update_o),
+ update_ptw_dtlb.eq(ptw.dtlb_update_o),
+
+ ptw.itlb_access_i.eq(itlb_lu_access),
+ ptw.itlb_hit_i.eq(itlb_lu_hit),
+ ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+
+ ptw.dtlb_access_i.eq(dtlb_lu_access),
+ ptw.dtlb_hit_i.eq(dtlb_lu_hit),
+ ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
+
+ ptw.req_port_i.eq(self.req_port_i),
+ self.req_port_o.eq(ptw.req_port_o),
+ ]
+
+ # ila_1 i_ila_1 (
+ # .clk(clk_i), # input wire clk
+ # .probe0({req_port_o.address_tag, req_port_o.address_index}),
+ # .probe1(req_port_o.data_req), # input wire [63:0] probe1
+ # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2
+ # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3
+ # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4
+ # .probe5(ptw_error), # input wire [1:0] probe5
+ # .probe6(update_vaddr), # input wire [0:0] probe6
+ # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7
+ # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8
+ # .probe9(dtlb_lu_access), # input wire [0:0] probe9
+ # .probe10(lsu_vaddr_i), # input wire [0:0] probe10
+ # .probe11(dtlb_lu_hit), # input wire [0:0] probe11
+ # .probe12(itlb_lu_access), # input wire [0:0] probe12
+ # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13
+ # .probe14(itlb_lu_hit) # input wire [0:0] probe13
+ # );
+
+ #-----------------------
+ # Instruction Interface
+ #-----------------------
+ # The instruction interface is a simple request response interface
+
+ # MMU disabled: just pass through
+ m.d.comb += [self.icache_areq_o.fetch_valid.eq(
+ self.icache_areq_i.fetch_req),
+ # play through in case we disabled address translation
+ self.icache_areq_o.fetch_paddr.eq(
+ self.icache_areq_i.fetch_vaddr)
+ ]
+ # two potential exception sources:
+ # 1. HPTW threw an exception -> signal with a page fault exception
+ # 2. We got an access error because of insufficient permissions ->
+ # throw an access exception
+ m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
+ # Check whether we are allowed to access this memory region
+ # from a fetch perspective
+
+ # XXX TODO: use PermissionValidator instead [we like modules]
+ m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
+ (((self.priv_lvl_i == PRIV_LVL_U) & \
+ ~itlb_content.u) | \
+ ((self.priv_lvl_i == PRIV_LVL_S) & \
+ itlb_content.u)))
+
+ # MMU enabled: address from TLB, request delayed until hit.
+ # Error when TLB hit and no access right or TLB hit and
+ # translated address not valid (e.g. AXI decode error),
+ # or when PTW performs walk due to ITLB miss and raises
+ # an error.
+ with m.If (self.enable_translation_i):
+ # we work with SV39, so if VM is enabled, check that
+ # all bits [63:38] are equal
+ with m.If (self.icache_areq_i.fetch_req & \
+ ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \
+ (self.icache_areq_i.fetch_vaddr[38:64]) == 0)):
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+ fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+ fe.valid.eq(1)
+ ]
+
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
+
+ # 4K page
+ paddr = Signal.like(self.icache_areq_o.fetch_paddr)
+ paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
+ itlb_content.ppn)
+ m.d.comb += paddr.eq(paddr4k)
+ # Mega page
+ with m.If(itlb_is_2M):
+ m.d.comb += paddr[12:21].eq(
+ self.icache_areq_i.fetch_vaddr[12:21])
+ # Giga page
+ with m.If(itlb_is_1G):
+ m.d.comb += paddr[12:30].eq(
+ self.icache_areq_i.fetch_vaddr[12:30])
+ m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
+
+ # ---------
+ # ITLB Hit
+ # --------
+ # if we hit the ITLB output the request signal immediately
+ with m.If(itlb_lu_hit):
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(
+ self.icache_areq_i.fetch_req)
+ # we got an access error
+ with m.If (iaccess_err):
+ # throw a page fault
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+ fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+ fe.valid.eq(1)
+ ]
+ # ---------
+ # ITLB Miss
+ # ---------
+ # watch out for exceptions happening during walking the page table
+ with m.Elif(ptw_active & walking_instr):
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
+ fe.tval.eq(uaddr64),
+ fe.valid.eq(1)
+ ]
+
+ #-----------------------
+ # Data Interface
+ #-----------------------
+
+ lsu_vaddr = Signal(64)
+ dtlb_pte = PTE()
+ misaligned_ex = RVException()
+ lsu_req = Signal()
+ lsu_is_store = Signal()
+ dtlb_hit = Signal()
+ dtlb_is_2M = Signal()
+ dtlb_is_1G = Signal()
+
+ # check if we need to do translation or if we are always
+ # ready (e.g.: we are not translating anything)
+ m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
+ dtlb_lu_hit, 1))
+
+ # The data interface is simpler and only consists of a
+ # request/response interface
+ m.d.comb += [
+ # save request and DTLB response
+ lsu_vaddr.eq(self.lsu_vaddr_i),
+ lsu_req.eq(self.lsu_req_i),
+ misaligned_ex.eq(self.misaligned_ex_i),
+ dtlb_pte.eq(dtlb_content),
+ dtlb_hit.eq(dtlb_lu_hit),
+ lsu_is_store.eq(self.lsu_is_store_i),
+ dtlb_is_2M.eq(dtlb_is_2M),
+ dtlb_is_1G.eq(dtlb_is_1G),
+ ]
+ m.d.sync += [
+ self.lsu_paddr_o.eq(lsu_vaddr),
+ self.lsu_valid_o.eq(lsu_req),
+ self.lsu_exception_o.eq(misaligned_ex),
+ ]
+
+ sverr = Signal()
+ usrerr = Signal()
+
+ m.d.comb += [
+ # mute misaligned exceptions if there is no request
+ # otherwise they will throw accidental exceptions
+ misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
+
+ # SUM is not set and we are trying to access a user
+ # page in supervisor mode
+ sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
+ dtlb_pte.u),
+ # this is not a user page but we are in user mode and
+ # trying to access it
+ usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
+
+ # Check if the User flag is set, then we may only
+ # access it in supervisor mode if SUM is enabled
+ daccess_err.eq(sverr | usrerr),
+ ]
+
+ # translation is enabled and no misaligned exception occurred
+ with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
+ m.d.comb += lsu_req.eq(0)
+ # 4K page
+ paddr = Signal.like(lsu_vaddr)
+ paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
+ m.d.comb += paddr.eq(paddr4k)
+ # Mega page
+ with m.If(dtlb_is_2M):
+ m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
+ # Giga page
+ with m.If(dtlb_is_1G):
+ m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
+ m.d.sync += self.lsu_paddr_o.eq(paddr)
+
+ # ---------
+ # DTLB Hit
+ # --------
+ with m.If(dtlb_hit & lsu_req):
+ m.d.comb += lsu_req.eq(1)
+ # this is a store
+ with m.If (lsu_is_store):
+ # check if the page is write-able and
+ # we are not violating privileges
+ # also check if the dirty flag is set
+ with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+ le.tval.eq(lsu_vaddr),
+ le.valid.eq(1)
+ ]
+
+ # this is a load, check for sufficient access
+ # privileges - throw a page fault if necessary
+ with m.Elif(daccess_err):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+ le.tval.eq(lsu_vaddr),
+ le.valid.eq(1)
+ ]
+ # ---------
+ # DTLB Miss
+ # ---------
+ # watch out for exceptions
+ with m.Elif (ptw_active & ~walking_instr):
+ # page table walker threw an exception
+ with m.If (ptw_error):
+ # an error makes the translation valid
+ m.d.comb += lsu_req.eq(1)
+ # the page table walker can only throw page faults
+ with m.If (lsu_is_store):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+ le.tval.eq(uaddr64),
+ le.valid.eq(1)
+ ]
+ with m.Else():
+ m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+ le.tval.eq(uaddr64),
+ le.valid.eq(1)
+ ]
+
+ return m
+
+ def ports(self):
+ return [self.flush_i, self.enable_translation_i,
+ self.en_ld_st_translation_i,
+ self.lsu_req_i,
+ self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
+ self.lsu_valid_o, self.lsu_paddr_o,
+ self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
+ self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
+ self.itlb_miss_o, self.dtlb_miss_o] + \
+ self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
+ self.req_port_i.ports() + self.req_port_o.ports() + \
+ self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
+
+if __name__ == '__main__':
+ mmu = MMU()
+ vl = rtlil.convert(mmu, ports=mmu.ports())
+ with open("test_mmu.il", "w") as f:
+ f.write(vl)
+
--- /dev/null
+from nmigen import Signal, Module, Cat, Const
+from nmigen.hdl.ir import Elaboratable
+from math import log2
+
+from ptw import TLBUpdate, PTE, ASID_WIDTH
+
+class PLRU(Elaboratable):
+ """ PLRU - Pseudo Least Recently Used Replacement
+
+ PLRU-tree indexing:
+ lvl0 0
+ / \
+ / \
+ lvl1 1 2
+ / \ / \
+ lvl2 3 4 5 6
+ / \ /\/\ /\
+ ... ... ... ...
+ """
+ def __init__(self, entries):
+ self.entries = entries
+ self.lu_hit = Signal(entries)
+ self.replace_en_o = Signal(entries)
+ self.lu_access_i = Signal()
+ # Tree (bit per entry)
+ self.TLBSZ = 2*(self.entries-1)
+ self.plru_tree = Signal(self.TLBSZ)
+ self.plru_tree_o = Signal(self.TLBSZ)
+
+ def elaborate(self, platform=None):
+ m = Module()
+
+ # Just predefine which nodes will be set/cleared
+ # E.g. for a TLB with 8 entries, the for-loop is semantically
+ # equivalent to the following pseudo-code:
+ # unique case (1'b1)
+ # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1};
+ # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0};
+ # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1};
+ # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0};
+ # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1};
+ # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0};
+ # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1};
+ # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0};
+ # default: begin /* No hit */ end
+ # endcase
+ LOG_TLB = int(log2(self.entries))
+ print(LOG_TLB)
+ for i in range(self.entries):
+ # we got a hit so update the pointer as it was least recently used
+ hit = Signal(reset_less=True)
+ m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i)
+ with m.If(hit):
+ # Set the nodes to the values we would expect
+ for lvl in range(LOG_TLB):
+ idx_base = (1<<lvl)-1
+ # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+ shift = LOG_TLB - lvl;
+ new_idx = Const(~((i >> (shift-1)) & 1), (1, False))
+ plru_idx = idx_base + (i >> shift)
+ print ("plru", i, lvl, hex(idx_base),
+ plru_idx, shift, new_idx)
+ m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx)
+
+ # Decode tree to write enable signals
+ # Next for-loop basically creates the following logic for e.g.
+ # an 8 entry TLB (note: pseudo-code obviously):
+ # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1}
+ # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0}
+ # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1}
+ # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0}
+ # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1}
+ # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0}
+ # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1}
+ # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0}
+ # For each entry traverse the tree. If every tree-node matches
+ # the corresponding bit of the entry's index, this is
+ # the next entry to replace.
+ replace = []
+ for i in range(self.entries):
+ en = []
+ for lvl in range(LOG_TLB):
+ idx_base = (1<<lvl)-1
+ # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+ shift = LOG_TLB - lvl;
+ new_idx = (i >> (shift-1)) & 1;
+ plru_idx = idx_base + (i>>shift)
+ plru = Signal(reset_less=True,
+ name="plru-%d-%d-%d" % (i, lvl, plru_idx))
+ m.d.comb += plru.eq(self.plru_tree[plru_idx])
+ # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx;
+ if new_idx:
+ en.append(~plru) # yes inverted (using bool())
+ else:
+ en.append(plru) # yes inverted (using bool())
+ print ("plru", i, en)
+ # boolean logic manipulation:
+ # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
+ replace.append(~Cat(*en).bool())
+ m.d.comb += self.replace_en_o.eq(Cat(*replace))
+
+ return m
+
+ def ports(self):
+ return [self.entries, self.lu_hit, self.replace_en_o,
+ self.lu_access_i, self.plru_tree, self.plru_tree_o]
\ No newline at end of file
--- /dev/null
+"""
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 24.4.2017
+# Description: Hardware-PTW
+
+/* verilator lint_off WIDTH */
+import ariane_pkg::*;
+
+see linux kernel source:
+
+* "arch/riscv/include/asm/page.h"
+* "arch/riscv/include/asm/mmu_context.h"
+* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
+
+"""
+
+from nmigen import Const, Signal, Cat, Module
+from nmigen.hdl.ast import ArrayProxy
+from nmigen.cli import verilog, rtlil
+from math import log2
+
+
+DCACHE_SET_ASSOC = 8
+CONFIG_L1D_SIZE = 32*1024
+DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC))
+DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH
+
+ASID_WIDTH = 8
+
+
+class DCacheReqI:
+ def __init__(self):
+ self.address_index = Signal(DCACHE_INDEX_WIDTH)
+ self.address_tag = Signal(DCACHE_TAG_WIDTH)
+ self.data_wdata = Signal(64)
+ self.data_req = Signal()
+ self.data_we = Signal()
+ self.data_be = Signal(8)
+ self.data_size = Signal(2)
+ self.kill_req = Signal()
+ self.tag_valid = Signal()
+
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+
+ def ports(self):
+ return [self.address_index, self.address_tag,
+ self.data_wdata, self.data_req,
+ self.data_we, self.data_be, self.data_size,
+ self.kill_req, self.tag_valid,
+ ]
+
+class DCacheReqO:
+ def __init__(self):
+ self.data_gnt = Signal()
+ self.data_rvalid = Signal()
+ self.data_rdata = Signal(64) # actually in PTE object format
+
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+
+ def ports(self):
+ return [self.data_gnt, self.data_rvalid, self.data_rdata]
+
+
+class PTE: #(RecordObject):
+ def __init__(self):
+ self.v = Signal()
+ self.r = Signal()
+ self.w = Signal()
+ self.x = Signal()
+ self.u = Signal()
+ self.g = Signal()
+ self.a = Signal()
+ self.d = Signal()
+ self.rsw = Signal(2)
+ self.ppn = Signal(44)
+ self.reserved = Signal(10)
+
+ def flatten(self):
+ return Cat(*self.ports())
+
+ def eq(self, x):
+ if isinstance(x, ArrayProxy):
+ res = []
+ for o in self.ports():
+ i = getattr(x, o.name)
+ res.append(i)
+ x = Cat(*res)
+ else:
+ x = x.flatten()
+ return self.flatten().eq(x)
+
+ def __iter__(self):
+ """ order is critical so that flatten creates LSB to MSB
+ """
+ yield self.v
+ yield self.r
+ yield self.w
+ yield self.x
+ yield self.u
+ yield self.g
+ yield self.a
+ yield self.d
+ yield self.rsw
+ yield self.ppn
+ yield self.reserved
+
+ def ports(self):
+ return list(self)
+
+
+class TLBUpdate:
+ def __init__(self, asid_width):
+ self.valid = Signal() # valid flag
+ self.is_2M = Signal()
+ self.is_1G = Signal()
+ self.vpn = Signal(27)
+ self.asid = Signal(asid_width)
+ self.content = PTE()
+
+ def flatten(self):
+ return Cat(*self.ports())
+
+ def eq(self, x):
+ return self.flatten().eq(x.flatten())
+
+ def ports(self):
+ return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
+ self.content.ports()
+
+
+# SV39 defines three levels of page tables
+LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
+LVL2 = Const(1, 2)
+LVL3 = Const(2, 2)
+
+
+class PTW:
+ def __init__(self, asid_width=8):
+ self.asid_width = asid_width
+
+ self.flush_i = Signal() # flush everything, we need to do this because
+ # actually everything we do is speculative at this stage
+ # e.g.: there could be a CSR instruction that changes everything
+ self.ptw_active_o = Signal(reset=1) # active if not IDLE
+ self.walking_instr_o = Signal() # set when walking for TLB
+ self.ptw_error_o = Signal() # set when an error occurred
+ self.enable_translation_i = Signal() # CSRs indicate to enable SV39
+ self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
+
+ self.lsu_is_store_i = Signal() # translation triggered by store
+ # PTW memory interface
+ self.req_port_i = DCacheReqO()
+ self.req_port_o = DCacheReqI()
+
+ # to TLBs, update logic
+ self.itlb_update_o = TLBUpdate(asid_width)
+ self.dtlb_update_o = TLBUpdate(asid_width)
+
+ self.update_vaddr_o = Signal(39)
+
+ self.asid_i = Signal(self.asid_width)
+ # from TLBs
+ # did we miss?
+ self.itlb_access_i = Signal()
+ self.itlb_hit_i = Signal()
+ self.itlb_vaddr_i = Signal(64)
+
+ self.dtlb_access_i = Signal()
+ self.dtlb_hit_i = Signal()
+ self.dtlb_vaddr_i = Signal(64)
+ # from CSR file
+ self.satp_ppn_i = Signal(44) # ppn from satp
+ self.mxr_i = Signal()
+ # Performance counters
+ self.itlb_miss_o = Signal()
+ self.dtlb_miss_o = Signal()
+
+ def ports(self):
+ return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
+ ]
+ return [
+ self.enable_translation_i, self.en_ld_st_translation_i,
+ self.lsu_is_store_i, self.req_port_i, self.req_port_o,
+ self.update_vaddr_o,
+ self.asid_i,
+ self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
+ self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
+ self.satp_ppn_i, self.mxr_i,
+ self.itlb_miss_o, self.dtlb_miss_o
+ ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
+
+ def elaborate(self, platform):
+ m = Module()
+
+ # input registers
+ data_rvalid = Signal()
+ data_rdata = Signal(64)
+
+ # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
+ # is spec'd in 64-bit binary-format: better to spec as Record?
+ pte = PTE()
+ m.d.comb += pte.flatten().eq(data_rdata)
+
+ # SV39 defines three levels of page tables
+ ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
+ ptw_lvl1 = Signal()
+ ptw_lvl2 = Signal()
+ ptw_lvl3 = Signal()
+ m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
+ ptw_lvl2.eq(ptw_lvl == LVL2),
+ ptw_lvl3.eq(ptw_lvl == LVL3)]
+
+ # is this an instruction page table walk?
+ is_instr_ptw = Signal()
+ global_mapping = Signal()
+ # latched tag signal
+ tag_valid = Signal()
+ # register the ASID
+ tlb_update_asid = Signal(self.asid_width)
+ # register VPN we need to walk, SV39 defines a 39 bit virtual addr
+ vaddr = Signal(64)
+ # 4 byte aligned physical pointer
+ ptw_pptr = Signal(56)
+
+ end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH
+ m.d.sync += [
+ # Assignments
+ self.update_vaddr_o.eq(vaddr),
+
+ self.walking_instr_o.eq(is_instr_ptw),
+ # directly output the correct physical address
+ self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
+ self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
+ # we are never going to kill this request
+ self.req_port_o.kill_req.eq(0), # XXX assign comb?
+ # we are never going to write with the HPTW
+ self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
+ # -----------
+ # TLB Update
+ # -----------
+ self.itlb_update_o.vpn.eq(vaddr[12:39]),
+ self.dtlb_update_o.vpn.eq(vaddr[12:39]),
+ # update the correct page table level
+ self.itlb_update_o.is_2M.eq(ptw_lvl2),
+ self.itlb_update_o.is_1G.eq(ptw_lvl1),
+ self.dtlb_update_o.is_2M.eq(ptw_lvl2),
+ self.dtlb_update_o.is_1G.eq(ptw_lvl1),
+ # output the correct ASID
+ self.itlb_update_o.asid.eq(tlb_update_asid),
+ self.dtlb_update_o.asid.eq(tlb_update_asid),
+ # set the global mapping bit
+ self.itlb_update_o.content.eq(pte),
+ self.itlb_update_o.content.g.eq(global_mapping),
+ self.dtlb_update_o.content.eq(pte),
+ self.dtlb_update_o.content.g.eq(global_mapping),
+
+ self.req_port_o.tag_valid.eq(tag_valid),
+ ]
+
+ #-------------------
+ # Page table walker
+ #-------------------
+ # A virtual address va is translated into a physical address pa as
+ # follows:
+ # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
+ # PAGESIZE=2^12 and LEVELS=3.)
+ # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
+ # (For Sv32, PTESIZE=4.)
+ # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
+ # access exception.
+ # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
+ # step 5. Otherwise, this PTE is a pointer to the next level of
+ # the page table.
+ # Let i=i-1. If i < 0, stop and raise an access exception.
+ # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2.
+ # 5. A leaf PTE has been found. Determine if the requested memory
+ # access is allowed by the pte.r, pte.w, and pte.x bits. If not,
+ # stop and raise an access exception. Otherwise, the translation is
+ # successful. Set pte.a to 1, and, if the memory access is a
+ # store, set pte.d to 1.
+ # The translated physical address is given as follows:
+ # - pa.pgoff = va.pgoff.
+ # - If i > 0, then this is a superpage translation and
+ # pa.ppn[i-1:0] = va.vpn[i-1:0].
+ # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
+ # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned
+ # superpage stop and raise a page-fault exception.
+
+ m.d.sync += tag_valid.eq(0)
+
+ # default assignments
+ m.d.comb += [
+ # PTW memory interface
+ self.req_port_o.data_req.eq(0),
+ self.req_port_o.data_be.eq(Const(0xFF, 8)),
+ self.req_port_o.data_size.eq(Const(0b11, 2)),
+ self.req_port_o.data_we.eq(0),
+ self.ptw_error_o.eq(0),
+ self.itlb_update_o.valid.eq(0),
+ self.dtlb_update_o.valid.eq(0),
+
+ self.itlb_miss_o.eq(0),
+ self.dtlb_miss_o.eq(0),
+ ]
+
+ # ------------
+ # State Machine
+ # ------------
+
+ with m.FSM() as fsm:
+
+ with m.State("IDLE"):
+ self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
+ ptw_pptr, vaddr, tlb_update_asid)
+
+ with m.State("WAIT_GRANT"):
+ self.grant(m, tag_valid, data_rvalid)
+
+ with m.State("PTE_LOOKUP"):
+ # we wait for the valid signal
+ with m.If(data_rvalid):
+ self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
+ data_rvalid, global_mapping,
+ is_instr_ptw, ptw_pptr)
+
+ # Propagate error to MMU/LSU
+ with m.State("PROPAGATE_ERROR"):
+ m.next = "IDLE"
+ m.d.comb += self.ptw_error_o.eq(1)
+
+ # wait for the rvalid before going back to IDLE
+ with m.State("WAIT_RVALID"):
+ with m.If(data_rvalid):
+ m.next = "IDLE"
+
+ m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
+ data_rvalid.eq(self.req_port_i.data_rvalid)
+ ]
+
+ return m
+
+ def set_grant_state(self, m):
+ # should we have flushed before we got an rvalid,
+ # wait for it until going back to IDLE
+ with m.If(self.flush_i):
+ with m.If (self.req_port_i.data_gnt):
+ m.next = "WAIT_RVALID"
+ with m.Else():
+ m.next = "IDLE"
+ with m.Else():
+ m.next = "WAIT_GRANT"
+
+ def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
+ ptw_pptr, vaddr, tlb_update_asid):
+ # by default we start with the top-most page table
+ m.d.sync += [is_instr_ptw.eq(0),
+ ptw_lvl.eq(LVL1),
+ global_mapping.eq(0),
+ self.ptw_active_o.eq(0), # deactive (IDLE)
+ ]
+ # work out itlb/dtlb miss
+ m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
+ self.itlb_access_i & \
+ ~self.itlb_hit_i & \
+ ~self.dtlb_access_i)
+ m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
+ self.dtlb_access_i & \
+ ~self.dtlb_hit_i)
+ # we got an ITLB miss?
+ with m.If(self.itlb_miss_o):
+ pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39],
+ self.satp_ppn_i)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ is_instr_ptw.eq(1),
+ vaddr.eq(self.itlb_vaddr_i),
+ tlb_update_asid.eq(self.asid_i),
+ ]
+ self.set_grant_state(m)
+
+ # we got a DTLB miss?
+ with m.Elif(self.dtlb_miss_o):
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39],
+ self.satp_ppn_i)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ vaddr.eq(self.dtlb_vaddr_i),
+ tlb_update_asid.eq(self.asid_i),
+ ]
+ self.set_grant_state(m)
+
+ def grant(self, m, tag_valid, data_rvalid):
+ # we've got a data WAIT_GRANT so tell the
+ # cache that the tag is valid
+
+ # send a request out
+ m.d.comb += self.req_port_o.data_req.eq(1)
+ # wait for the WAIT_GRANT
+ with m.If(self.req_port_i.data_gnt):
+ # send the tag valid signal one cycle later
+ m.d.sync += tag_valid.eq(1)
+ # should we have flushed before we got an rvalid,
+ # wait for it until going back to IDLE
+ with m.If(self.flush_i):
+ with m.If (~data_rvalid):
+ m.next = "WAIT_RVALID"
+ with m.Else():
+ m.next = "IDLE"
+ with m.Else():
+ m.next = "PTE_LOOKUP"
+
+ def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
+ data_rvalid, global_mapping,
+ is_instr_ptw, ptw_pptr):
+ # temporaries
+ pte_rx = Signal(reset_less=True)
+ pte_exe = Signal(reset_less=True)
+ pte_inv = Signal(reset_less=True)
+ pte_a = Signal(reset_less=True)
+ st_wd = Signal(reset_less=True)
+ m.d.comb += [pte_rx.eq(pte.r | pte.x),
+ pte_exe.eq(~pte.x | ~pte.a),
+ pte_inv.eq(~pte.v | (~pte.r & pte.w)),
+ pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
+ st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
+
+ l1err = Signal(reset_less=True)
+ l2err = Signal(reset_less=True)
+ m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)),
+ l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ]
+
+ # check if the global mapping bit is set
+ with m.If (pte.g):
+ m.d.sync += global_mapping.eq(1)
+
+ m.next = "IDLE"
+
+ # -------------
+ # Invalid PTE
+ # -------------
+ # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
+ # stop and raise a page-fault exception.
+ with m.If (pte_inv):
+ m.next = "PROPAGATE_ERROR"
+
+ # -----------
+ # Valid PTE
+ # -----------
+
+ # it is a valid PTE
+ # if pte.r = 1 or pte.x = 1 it is a valid PTE
+ with m.Elif (pte_rx):
+ # Valid translation found (either 1G, 2M or 4K)
+ with m.If(is_instr_ptw):
+ # ------------
+ # Update ITLB
+ # ------------
+ # If page not executable, we can directly raise error.
+ # This doesn't put a useless entry into the TLB.
+ # The same idea applies to the access flag since we let
+ # the access flag be managed by SW.
+ with m.If (pte_exe):
+ m.next = "IDLE"
+ with m.Else():
+ m.d.comb += self.itlb_update_o.valid.eq(1)
+
+ with m.Else():
+ # ------------
+ # Update DTLB
+ # ------------
+ # Check if the access flag has been set, otherwise
+ # throw page-fault and let software handle those bits.
+ # If page not readable (there are no write-only pages)
+ # directly raise an error. This doesn't put a useless
+ # entry into the TLB.
+ with m.If(pte_a):
+ m.d.comb += self.dtlb_update_o.valid.eq(1)
+ with m.Else():
+ m.next = "PROPAGATE_ERROR"
+ # Request is a store: perform additional checks
+ # If the request was a store and the page not
+ # write-able, raise an error
+ # the same applies if the dirty flag is not set
+ with m.If (st_wd):
+ m.d.comb += self.dtlb_update_o.valid.eq(0)
+ m.next = "PROPAGATE_ERROR"
+
+ # check if the ppn is correctly aligned: Case (6)
+ with m.If(l1err | l2err):
+ m.next = "PROPAGATE_ERROR"
+ m.d.comb += [self.dtlb_update_o.valid.eq(0),
+ self.itlb_update_o.valid.eq(0)]
+
+ # this is a pointer to the next TLB level
+ with m.Else():
+ # pointer to next level of page table
+ with m.If (ptw_lvl1):
+ # we are in the second level now
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL2)
+ ]
+ with m.If(ptw_lvl2):
+ # here we received a pointer to the third level
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL3)
+ ]
+ self.set_grant_state(m)
+
+ with m.If (ptw_lvl3):
+ # Should already be the last level
+ # page table => Error
+ m.d.sync += ptw_lvl.eq(LVL3)
+ m.next = "PROPAGATE_ERROR"
+
+
+if __name__ == '__main__':
+ ptw = PTW()
+ vl = rtlil.convert(ptw, ports=ptw.ports())
+ with open("test_ptw.il", "w") as f:
+ f.write(vl)
--- /dev/null
+"""
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 21.4.2017
+# Description: Translation Lookaside Buffer, SV39
+# fully set-associative
+
+Implementation in c++:
+https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp
+
+Text description:
+https://people.cs.clemson.edu/~mark/464/p_lru.txt
+
+Online simulator:
+http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html
+"""
+from math import log2
+from nmigen import Signal, Module, Cat, Const, Array
+from nmigen.cli import verilog, rtlil
+from nmigen.lib.coding import Encoder
+
+from ptw import TLBUpdate, PTE, ASID_WIDTH
+from plru import PLRU
+from tlb_content import TLBContent
+
+TLB_ENTRIES = 8
+
+class TLB:
+ def __init__(self, tlb_entries=8, asid_width=8):
+ self.tlb_entries = tlb_entries
+ self.asid_width = asid_width
+
+ self.flush_i = Signal() # Flush signal
+ # Lookup signals
+ self.lu_access_i = Signal()
+ self.lu_asid_i = Signal(self.asid_width)
+ self.lu_vaddr_i = Signal(64)
+ self.lu_content_o = PTE()
+ self.lu_is_2M_o = Signal()
+ self.lu_is_1G_o = Signal()
+ self.lu_hit_o = Signal()
+ # Update TLB
+ self.pte_width = len(self.lu_content_o.flatten())
+ self.update_i = TLBUpdate(asid_width)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ vpn2 = Signal(9)
+ vpn1 = Signal(9)
+ vpn0 = Signal(9)
+
+ #-------------
+ # Translation
+ #-------------
+
+ # SV39 defines three levels of page tables
+ m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]),
+ vpn1.eq(self.lu_vaddr_i[21:30]),
+ vpn2.eq(self.lu_vaddr_i[30:39]),
+ ]
+
+ tc = []
+ for i in range(self.tlb_entries):
+ tlc = TLBContent(self.pte_width, self.asid_width)
+ setattr(m.submodules, "tc%d" % i, tlc)
+ tc.append(tlc)
+ # connect inputs
+ tlc.update_i = self.update_i # saves a lot of graphviz links
+ m.d.comb += [tlc.vpn0.eq(vpn0),
+ tlc.vpn1.eq(vpn1),
+ tlc.vpn2.eq(vpn2),
+ tlc.flush_i.eq(self.flush_i),
+ #tlc.update_i.eq(self.update_i),
+ tlc.lu_asid_i.eq(self.lu_asid_i)]
+ tc = Array(tc)
+
+ #--------------
+ # Select hit
+ #--------------
+
+ # use Encoder to select hit index
+ # XXX TODO: assert that there's only one valid entry (one lu_hit)
+ hitsel = Encoder(self.tlb_entries)
+ m.submodules.hitsel = hitsel
+
+ hits = []
+ for i in range(self.tlb_entries):
+ hits.append(tc[i].lu_hit_o)
+ m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
+ idx = hitsel.o
+
+ active = Signal(reset_less=True)
+ m.d.comb += active.eq(~hitsel.n)
+ with m.If(active):
+ # active hit, send selected as output
+ m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
+ self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
+ self.lu_hit_o.eq(1),
+ self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
+ ]
+
+ #--------------
+ # PLRU.
+ #--------------
+
+ p = PLRU(self.tlb_entries)
+ plru_tree = Signal(p.TLBSZ)
+ m.submodules.plru = p
+
+ # connect PLRU inputs/outputs
+ # XXX TODO: assert that there's only one valid entry (one replace_en)
+ en = []
+ for i in range(self.tlb_entries):
+ en.append(tc[i].replace_en_i)
+ m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
+ p.lu_hit.eq(hitsel.i),
+ p.lu_access_i.eq(self.lu_access_i),
+ p.plru_tree.eq(plru_tree)]
+ m.d.sync += plru_tree.eq(p.plru_tree_o)
+
+ #--------------
+ # Sanity checks
+ #--------------
+
+ assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
+ "TLB size must be a multiple of 2 and greater than 1"
+ assert (self.asid_width >= 1), \
+ "ASID width must be at least 1"
+
+ return m
+
+ """
+ # Just for checking
+ function int countSetBits(logic[self.tlb_entries-1:0] vector);
+ automatic int count = 0;
+ foreach (vector[idx]) begin
+ count += vector[idx];
+ end
+ return count;
+ endfunction
+
+ assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
+ else $error("More then one hit in TLB!"); $stop(); end
+ assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
+ else $error("More then one TLB entry selected for next replace!");
+ """
+
+ def ports(self):
+ return [self.flush_i, self.lu_access_i,
+ self.lu_asid_i, self.lu_vaddr_i,
+ self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
+ ] + self.lu_content_o.ports() + self.update_i.ports()
+
+if __name__ == '__main__':
+ tlb = TLB()
+ vl = rtlil.convert(tlb, ports=tlb.ports())
+ with open("test_tlb.il", "w") as f:
+ f.write(vl)
+
--- /dev/null
+from nmigen import Signal, Module, Cat, Const
+
+from ptw import TLBUpdate, PTE
+
+class TLBEntry:
+ def __init__(self, asid_width):
+ self.asid = Signal(asid_width)
+ # SV39 defines three levels of page tables
+ self.vpn0 = Signal(9)
+ self.vpn1 = Signal(9)
+ self.vpn2 = Signal(9)
+ self.is_2M = Signal()
+ self.is_1G = Signal()
+ self.valid = Signal()
+
+ def flatten(self):
+ return Cat(*self.ports())
+
+ def eq(self, x):
+ return self.flatten().eq(x.flatten())
+
+ def ports(self):
+ return [self.asid, self.vpn0, self.vpn1, self.vpn2,
+ self.is_2M, self.is_1G, self.valid]
+
+class TLBContent:
+ def __init__(self, pte_width, asid_width):
+ self.asid_width = asid_width
+ self.pte_width = pte_width
+ self.flush_i = Signal() # Flush signal
+ # Update TLB
+ self.update_i = TLBUpdate(asid_width)
+ self.vpn2 = Signal(9)
+ self.vpn1 = Signal(9)
+ self.vpn0 = Signal(9)
+ self.replace_en_i = Signal() # replace the following entry,
+ # set by replacement strategy
+ # Lookup signals
+ self.lu_asid_i = Signal(asid_width)
+ self.lu_content_o = Signal(pte_width)
+ self.lu_is_2M_o = Signal()
+ self.lu_is_1G_o = Signal()
+ self.lu_hit_o = Signal()
+
+ def elaborate(self, platform):
+ m = Module()
+
+ tags = TLBEntry(self.asid_width)
+ content = Signal(self.pte_width)
+
+ m.d.comb += [self.lu_hit_o.eq(0),
+ self.lu_is_2M_o.eq(0),
+ self.lu_is_1G_o.eq(0)]
+
+ # temporaries for 1st level match
+ asid_ok = Signal(reset_less=True)
+ vpn2_ok = Signal(reset_less=True)
+ tags_ok = Signal(reset_less=True)
+ vpn2_hit = Signal(reset_less=True)
+ m.d.comb += [tags_ok.eq(tags.valid),
+ asid_ok.eq(tags.asid == self.lu_asid_i),
+ vpn2_ok.eq(tags.vpn2 == self.vpn2),
+ vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)]
+ # temporaries for 2nd level match
+ vpn1_ok = Signal(reset_less=True)
+ tags_2M = Signal(reset_less=True)
+ vpn0_ok = Signal(reset_less=True)
+ vpn0_or_2M = Signal(reset_less=True)
+ m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1),
+ tags_2M.eq(tags.is_2M),
+ vpn0_ok.eq(self.vpn0 == tags.vpn0),
+ vpn0_or_2M.eq(tags_2M | vpn0_ok)]
+ # first level match, this may be a giga page,
+ # check the ASID flags as well
+ with m.If(vpn2_hit):
+ # second level
+ with m.If (tags.is_1G):
+ m.d.comb += [ self.lu_content_o.eq(content),
+ self.lu_is_1G_o.eq(1),
+ self.lu_hit_o.eq(1),
+ ]
+ # not a giga page hit so check further
+ with m.Elif(vpn1_ok):
+ # this could be a 2 mega page hit or a 4 kB hit
+ # output accordingly
+ with m.If(vpn0_or_2M):
+ m.d.comb += [ self.lu_content_o.eq(content),
+ self.lu_is_2M_o.eq(tags.is_2M),
+ self.lu_hit_o.eq(1),
+ ]
+ # ------------------
+ # Update or Flush
+ # ------------------
+
+ # temporaries
+ replace_valid = Signal(reset_less=True)
+ m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
+
+ # flush
+ with m.If (self.flush_i):
+ # invalidate (flush) conditions: all if zero or just this ASID
+ with m.If (self.lu_asid_i == Const(0, self.asid_width) |
+ (self.lu_asid_i == tags.asid)):
+ m.d.sync += tags.valid.eq(0)
+
+ # normal replacement
+ with m.Elif(replace_valid):
+ m.d.sync += [ # update tag array
+ tags.asid.eq(self.update_i.asid),
+ tags.vpn2.eq(self.update_i.vpn[18:27]),
+ tags.vpn1.eq(self.update_i.vpn[9:18]),
+ tags.vpn0.eq(self.update_i.vpn[0:9]),
+ tags.is_1G.eq(self.update_i.is_1G),
+ tags.is_2M.eq(self.update_i.is_2M),
+ tags.valid.eq(1),
+ # and content as well
+ content.eq(self.update_i.content.flatten())
+ ]
+ return m
+
+ def ports(self):
+ return [self.flush_i,
+ self.lu_asid_i,
+ self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
+ ] + self.update_i.content.ports() + self.update_i.ports()
--- /dev/null
+import sys
+sys.path.append("../src")
+sys.path.append("../../../TestUtil")
+
+from plru import PLRU
+
+from nmigen.compat.sim import run_simulation
+
+def testbench(dut):
+ yield
+
+if __name__ == "__main__":
+ dut = PLRU(4)
+ run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd")
+ print("PLRU Unit Test Success")
\ No newline at end of file
--- /dev/null
+import sys
+sys.path.append("../src")
+sys.path.append("../../../TestUtil")
+
+from nmigen.compat.sim import run_simulation
+
+from ptw import PTW, PTE
+
+
+def testbench(dut):
+
+ addr = 0x8000000
+
+ #pte = PTE()
+ #yield pte.v.eq(1)
+ #yield pte.r.eq(1)
+
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield dut.req_port_i.data_rvalid.eq(1)
+ yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten())
+
+ # data lookup
+ yield dut.en_ld_st_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000000)
+
+ yield
+ yield
+ yield
+
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x200000)
+
+ yield
+ yield
+ yield
+
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000011)
+
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+
+ # data lookup, PTW levels 1-2-3
+ addr = 0x4000000
+ yield dut.dtlb_vaddr_i.eq(addr)
+ yield dut.mxr_i.eq(0x1)
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield dut.req_port_i.data_rvalid.eq(1)
+ yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten())
+
+ yield dut.en_ld_st_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(addr)
+
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000011)
+
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+ yield
+ yield
+
+
+ # instruction lookup
+ yield dut.en_ld_st_translation_i.eq(0)
+ yield dut.enable_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x800000)
+
+ yield
+ yield
+ yield
+
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x200000)
+
+ yield
+ yield
+ yield
+
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x800011)
+
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+
+ yield
+
+
+
+if __name__ == "__main__":
+ dut = PTW()
+ run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd")
+ print("PTW Unit Test Success")
--- /dev/null
+import sys
+sys.path.append("../src")
+sys.path.append("../../../TestUtil")
+
+from nmigen.compat.sim import run_simulation
+
+from tlb import TLB
+
+def set_vaddr(addr):
+ yield dut.lu_vaddr_i.eq(addr)
+ yield dut.update_i.vpn.eq(addr>>12)
+
+
+def testbench(dut):
+ yield dut.lu_access_i.eq(1)
+ yield dut.lu_asid_i.eq(1)
+ yield dut.update_i.valid.eq(1)
+ yield dut.update_i.is_1G.eq(0)
+ yield dut.update_i.is_2M.eq(0)
+ yield dut.update_i.asid.eq(1)
+ yield dut.update_i.content.ppn.eq(0)
+ yield dut.update_i.content.rsw.eq(0)
+ yield dut.update_i.content.r.eq(1)
+
+ yield
+
+ addr = 0x80000
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x90001
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x28000000
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x28000001
+ yield from set_vaddr(addr)
+
+ addr = 0x28000001
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x1000040000
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x1000040001
+ yield from set_vaddr(addr)
+ yield
+
+ yield dut.update_i.is_1G.eq(1)
+ addr = 0x2040000
+ yield from set_vaddr(addr)
+ yield
+
+ yield dut.update_i.is_1G.eq(1)
+ addr = 0x2040001
+ yield from set_vaddr(addr)
+ yield
+
+ yield
+
+
+if __name__ == "__main__":
+ dut = TLB()
+ run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd")
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.lib.coding import Encoder, PriorityEncoder
-
-class AddressEncoder():
- """Address Encoder
-
- The purpose of this module is to take in a vector and
- encode the bits that are one hot into an address. This module
- combines both nmigen's Encoder and PriorityEncoder and will state
- whether the input line has a single bit hot, multiple bits hot,
- or no bits hot. The output line will always have the lowest value
- address output.
-
- Usage:
- The output is valid when either single or multiple match is high.
- Otherwise output is 0.
- """
- def __init__(self, width):
- """ Arguments:
- * width: The desired length of the input vector
- """
- # Internal
- self.encoder = Encoder(width)
- self.p_encoder = PriorityEncoder(width)
-
- # Input
- self.i = Signal(width)
-
- # Output
- self.single_match = Signal(1)
- self.multiple_match = Signal(1)
- self.o = Signal(max=width)
-
- def elaborate(self, platform=None):
- m = Module()
-
- # Add internal submodules
- m.submodules.encoder = self.encoder
- m.submodules.p_encoder = self.p_encoder
-
- m.d.comb += [
- self.encoder.i.eq(self.i),
- self.p_encoder.i.eq(self.i)
- ]
-
- # Steps:
- # 1. check if the input vector is non-zero
- # 2. if non-zero, check if single match or multiple match
- # 3. set output line to be lowest value address output
-
- # If the priority encoder recieves an input of 0
- # If n is 1 then the output is not valid
- with m.If(self.p_encoder.n):
- m.d.comb += [
- self.single_match.eq(0),
- self.multiple_match.eq(0),
- self.o.eq(0)
- ]
- # If the priority encoder recieves an input > 0
- with m.Else():
- # Multiple Match if encoder n is invalid
- with m.If(self.encoder.n):
- m.d.comb += [
- self.single_match.eq(0),
- self.multiple_match.eq(1)
- ]
- # Single Match if encoder n is valid
- with m.Else():
- m.d.comb += [
- self.single_match.eq(1),
- self.multiple_match.eq(0)
- ]
- # Always set output based on priority encoder output
- m.d.comb += self.o.eq(self.p_encoder.o)
- return m
+++ /dev/null
-from nmigen import Array, Cat, Module, Signal
-from nmigen.lib.coding import Decoder
-from nmigen.cli import main #, verilog
-
-from CamEntry import CamEntry
-from AddressEncoder import AddressEncoder
-
-class Cam():
- """ Content Addressable Memory (CAM)
-
- The purpose of this module is to quickly look up whether an
- entry exists given a data key.
- This module will search for the given data in all internal entries
- and output whether a single or multiple match was found.
- If an single entry is found the address be returned and single_match
- is set HIGH. If multiple entries are found the lowest address is
- returned and multiple_match is set HIGH. If neither single_match or
- multiple_match are HIGH this implies no match was found. To write
- to the CAM set the address bus to the desired entry and set write_enable
- HIGH. Entry managment should be performed one level above this block
- as lookup is performed within.
-
- Notes:
- The read and write operations take one clock cycle to complete.
- Currently the read_warning line is present for interfacing but
- is not necessary for this design. This module is capable of writing
- in the first cycle, reading on the second, and output the correct
- address on the third.
- """
-
- def __init__(self, data_size, cam_size):
- """ Arguments:
- * data_size: (bits) The bit size of the data
- * cam_size: (number) The number of entries in the CAM
- """
-
- # Internal
- self.cam_size = cam_size
- self.encoder = AddressEncoder(cam_size)
- self.decoder = Decoder(cam_size)
- self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
-
- # Input
- self.enable = Signal(1)
- self.write_enable = Signal(1)
- self.data_in = Signal(data_size) # The data to be written
- self.data_mask = Signal(data_size) # mask for ternary writes
- self.address_in = Signal(max=cam_size) # address of CAM Entry to write
-
- # Output
- self.read_warning = Signal(1) # High when a read interrupts a write
- self.single_match = Signal(1) # High when there is only one match
- self.multiple_match = Signal(1) # High when there at least two matches
- self.match_address = Signal(max=cam_size) # The lowest address matched
-
- def elaborate(self, platform=None):
- m = Module()
- # AddressEncoder for match types and output address
- m.submodules.AddressEncoder = self.encoder
- # Decoder is used to select which entry will be written to
- m.submodules.Decoder = self.decoder
- # CamEntry Array Submodules
- # Note these area added anonymously
- entry_array = self.entry_array
- m.submodules += entry_array
-
- # Decoder logic
- m.d.comb += [
- self.decoder.i.eq(self.address_in),
- self.decoder.n.eq(0)
- ]
-
- encoder_vector = []
- with m.If(self.enable):
- # Set the key value for every CamEntry
- for index in range(self.cam_size):
-
- # Write Operation
- with m.If(self.write_enable):
- with m.If(self.decoder.o[index]):
- m.d.comb += entry_array[index].command.eq(2)
- with m.Else():
- m.d.comb += entry_array[index].command.eq(0)
-
- # Read Operation
- with m.Else():
- m.d.comb += entry_array[index].command.eq(1)
-
- # Send data input to all entries
- m.d.comb += entry_array[index].data_in.eq(self.data_in)
- # Send all entry matches to encoder
- ematch = entry_array[index].match
- encoder_vector.append(ematch)
-
- # Give input to and accept output from encoder module
- m.d.comb += [
- self.encoder.i.eq(Cat(*encoder_vector)),
- self.single_match.eq(self.encoder.single_match),
- self.multiple_match.eq(self.encoder.multiple_match),
- self.match_address.eq(self.encoder.o)
- ]
-
- # If the CAM is not enabled set all outputs to 0
- with m.Else():
- m.d.comb += [
- self.read_warning.eq(0),
- self.single_match.eq(0),
- self.multiple_match.eq(0),
- self.match_address.eq(0)
- ]
-
- return m
-
- def ports(self):
- return [self.enable, self.write_enable,
- self.data_in, self.data_mask,
- self.read_warning, self.single_match,
- self.multiple_match, self.match_address]
-
-
-if __name__ == '__main__':
- cam = Cam(4, 4)
- main(cam, ports=cam.ports())
-
+++ /dev/null
-from nmigen import Module, Signal
-
-class CamEntry:
- """ Content Addressable Memory (CAM) Entry
-
- The purpose of this module is to represent an entry within a CAM.
- This module when given a read command will compare the given data
- and output whether a match was found or not. When given a write
- command it will write the given data into internal registers.
- """
-
- def __init__(self, data_size):
- """ Arguments:
- * data_size: (bit count) The size of the data
- """
- # Input
- self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
- self.data_in = Signal(data_size) # Data input when writing
-
- # Output
- self.match = Signal(1) # Result of the internal/input key comparison
- self.data = Signal(data_size)
-
- def elaborate(self, platform=None):
- m = Module()
- with m.Switch(self.command):
- with m.Case("00"):
- m.d.sync += self.match.eq(0)
- with m.Case("01"):
- with m.If(self.data == self.data_in):
- m.d.sync += self.match.eq(1)
- with m.Else():
- m.d.sync += self.match.eq(0)
- with m.Case("10"):
- m.d.sync += [
- self.data.eq(self.data_in),
- self.match.eq(0)
- ]
- with m.Case():
- m.d.sync += [
- self.match.eq(0),
- self.data.eq(0)
- ]
-
- return m
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-from nmigen.cli import verilog, rtlil
-
-
-class LFSRPolynomial(set):
- """ implements a polynomial for use in LFSR
- """
- def __init__(self, exponents=()):
- for e in exponents:
- assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
- assert (e >= 0), ValueError("%d must not be negative" % e)
- set.__init__(self, set(exponents).union({0})) # must contain zero
-
- @property
- def max_exponent(self):
- return max(self) # derived from set, so this returns the max exponent
-
- @property
- def exponents(self):
- exponents = list(self) # get elements of set as a list
- exponents.sort(reverse=True)
- return exponents
-
- def __str__(self):
- expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
- retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
- return " + ".join(retval)
-
- def __repr__(self):
- return "LFSRPolynomial(%s)" % self.exponents
-
-
-# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa
-LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
-LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
-LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
-LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
-LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
-LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
-LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
-LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
-LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
-LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
-LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
-LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
-LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
-LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
-LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
-LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
-LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
-LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
-LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
-LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
-LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
-LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
-LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
-
-
-class LFSR(LFSRPolynomial, Elaboratable):
- """ implements a Linear Feedback Shift Register
- """
- def __init__(self, polynomial):
- """ Inputs:
- ------
- :polynomial: the polynomial to feedback on. may be a LFSRPolynomial
- instance or an iterable of ints (list/tuple/generator)
- :enable: enable (set LO to disable. NOTE: defaults to HI)
-
- Outputs:
- -------
- :state: the LFSR state. bitwidth is taken from the polynomial
- maximum exponent.
-
- Note: if an LFSRPolynomial is passed in as the input, because
- LFSRPolynomial is derived from set() it's ok:
- LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
- """
- LFSRPolynomial.__init__(self, polynomial)
- self.state = Signal(self.max_exponent, reset=1)
- self.enable = Signal(reset=1)
-
- def elaborate(self, platform):
- m = Module()
- # do absolutely nothing if the polynomial is empty (always has a zero)
- if self.max_exponent <= 1:
- return m
-
- # create XOR-bunch, select bits from state based on exponent
- feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
- for exponent in self:
- if exponent > 0: # don't have to skip, saves CPU cycles though
- feedback ^= self.state[exponent - 1]
-
- # if enabled, shift-and-feedback
- with m.If(self.enable):
- # shift up lower bits by Cat'ing in a new bit zero (feedback)
- newstate = Cat(feedback, self.state[:-1])
- m.d.sync += self.state.eq(newstate)
-
- return m
-
-
-# example: Poly24
-if __name__ == '__main__':
- p24 = rtlil.convert(LFSR(LFSR_POLY_24))
- with open("lfsr2_p24.il", "w") as f:
- f.write(p24)
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Module
-from typing import Iterable, Optional, Iterator, Any, Union
-from typing_extensions import final
-
-
-@final
-class LFSRPolynomial(set):
- def __init__(self, exponents: Iterable[int] = ()):
- def elements() -> Iterable[int]: ...
- @property
- def exponents(self) -> list[int]: ...
- def __str__(self) -> str: ...
- def __repr__(self) -> str: ...
-
-
-@final
-class LFSR:
- def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
- @property
- def width(self) -> int: ...
- def elaborate(self, platform: Any) -> Module: ...
+++ /dev/null
-verilog:
- python3 Cam.py generate -t v > Cam.v
+++ /dev/null
-from nmigen import Cat, Memory, Module, Signal, Elaboratable
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-
-
-class MemorySet(Elaboratable):
- def __init__(self, data_size, tag_size, set_count, active):
- self.active = active
- input_size = tag_size + data_size # Size of the input data
- memory_width = input_size + 1 # The width of the cache memory
- self.active = active
- self.data_size = data_size
- self.tag_size = tag_size
-
- # XXX TODO, use rd-enable and wr-enable?
- self.mem = Memory(memory_width, set_count)
- self.r = self.mem.read_port()
- self.w = self.mem.write_port()
-
- # inputs (address)
- self.cset = Signal(max=set_count) # The set to be checked
- self.tag = Signal(tag_size) # The tag to find
- self.data_i = Signal(data_size) # Incoming data
-
- # outputs
- self.valid = Signal()
- self.data_o = Signal(data_size) # Outgoing data (excludes tag)
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.mem = self.mem
- m.submodules.r = self.r
- m.submodules.w = self.w
-
- # temporaries
- active_bit = Signal()
- tag_valid = Signal()
- data_start = self.active + 1
- data_end = data_start + self.data_size
- tag_start = data_end
- tag_end = tag_start + self.tag_size
-
- # connect the read port address to the set/entry
- read_port = self.r
- m.d.comb += read_port.addr.eq(self.cset)
- # Pull out active bit from data
- data = read_port.data
- m.d.comb += active_bit.eq(data[self.active])
- # Validate given tag vs stored tag
- tag = data[tag_start:tag_end]
- m.d.comb += tag_valid.eq(self.tag == tag)
- # An entry is only valid if the tags match AND
- # is marked as a valid entry
- m.d.comb += self.valid.eq(tag_valid & active_bit)
-
- # output data: TODO, check rd-enable?
- m.d.comb += self.data_o.eq(data[data_start:data_end])
-
- # connect the write port addr to the set/entry (only if write enabled)
- # (which is only done on a match, see SAC.write_entry below)
- write_port = self.w
- with m.If(write_port.en):
- m.d.comb += write_port.addr.eq(self.cset)
- m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
-
- return m
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.cli import main
-
-from PteEntry import PteEntry
-
-class PermissionValidator():
- """ The purpose of this Module is to check the Permissions of a given PTE
- against the requested access permissions.
-
- This module will either validate (by setting the valid bit HIGH)
- the request or find a permission fault and invalidate (by setting
- the valid bit LOW) the request
- """
-
- def __init__(self, asid_size, pte_size):
- """ Arguments:
- * asid_size: (bit count) The size of the asid to be processed
- * pte_size: (bit count) The size of the pte to be processed
-
- Return:
- * valid HIGH when permissions are correct
- """
- # Internal
- self.pte_entry = PteEntry(asid_size, pte_size)
-
- # Input
- self.data = Signal(asid_size + pte_size);
- self.xwr = Signal(3) # Execute, Write, Read
- self.super_mode = Signal(1) # Supervisor Mode
- self.super_access = Signal(1) # Supervisor Access
- self.asid = Signal(15) # Address Space IDentifier (ASID)
-
- # Output
- self.valid = Signal(1) # Denotes if the permissions are correct
-
- def elaborate(self, platform=None):
- m = Module()
-
- m.submodules.pte_entry = self.pte_entry
-
- m.d.comb += self.pte_entry.i.eq(self.data)
-
- # Check if the entry is valid
- with m.If(self.pte_entry.v):
- # ASID match or Global Permission
- # Note that the MSB bound is exclusive
- with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
- # Check Execute, Write, Read (XWR) Permissions
- with m.If(self.pte_entry.xwr == self.xwr):
- # Supervisor Logic
- with m.If(self.super_mode):
- # Valid if entry is not in user mode or supervisor
- # has Supervisor User Memory (SUM) access via the
- # SUM bit in the sstatus register
- m.d.comb += self.valid.eq((~self.pte_entry.u) \
- | self.super_access)
- # User logic
- with m.Else():
- # Valid if the entry is in user mode only
- m.d.comb += self.valid.eq(self.pte_entry.u)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- return m
\ No newline at end of file
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.cli import main
-
-class PteEntry():
- """ The purpose of this Module is to centralize the parsing of Page
- Table Entries (PTE) into one module to prevent common mistakes
- and duplication of code. The control bits are parsed out for
- ease of use.
-
- This module parses according to the standard PTE given by the
- Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
- The Address Space IDentifier (ASID) is appended to the MSB of the input
- and is parsed out as such.
-
- An valid input Signal would be:
- ASID PTE
- Bits:[78-64][63-0]
-
- The output PTE value will include the control bits.
- """
- def __init__(self, asid_size, pte_size):
- """ Arguments:
- * asid_size: (bit count) The size of the asid to be processed
- * pte_size: (bit count) The size of the pte to be processed
-
- Return:
- * d The Dirty bit from the PTE portion of i
- * a The Accessed bit from the PTE portion of i
- * g The Global bit from the PTE portion of i
- * u The User Mode bit from the PTE portion of i
- * xwr The Execute/Write/Read bit from the PTE portion of i
- * v The Valid bit from the PTE portion of i
- * asid The asid portion of i
- * pte The pte portion of i
- """
- # Internal
- self.asid_start = pte_size
- self.asid_end = pte_size + asid_size
-
- # Input
- self.i = Signal(asid_size + pte_size)
-
- # Output
- self.d = Signal(1) # Dirty bit (From pte)
- self.a = Signal(1) # Accessed bit (From pte)
- self.g = Signal(1) # Global Access (From pte)
- self.u = Signal(1) # User Mode (From pte)
- self.xwr = Signal(3) # Execute Read Write (From pte)
- self.v = Signal(1) # Valid (From pte)
- self.asid = Signal(asid_size) # Associated Address Space IDentifier
- self.pte = Signal(pte_size) # Full Page Table Entry
-
- def elaborate(self, platform=None):
- m = Module()
- # Pull out all control bites from PTE
- m.d.comb += [
- self.d.eq(self.i[7]),
- self.a.eq(self.i[6]),
- self.g.eq(self.i[5]),
- self.u.eq(self.i[4]),
- self.xwr.eq(self.i[1:4]),
- self.v.eq(self.i[0])
- ]
- m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
- m.d.comb += self.pte.eq(self.i[0:self.asid_start])
- return m
\ No newline at end of file
+++ /dev/null
-"""
-
-Online simulator of 4-way set-associative cache:
-http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html
-
-Python simulator of a N-way set-associative cache:
-https://github.com/vaskevich/CacheSim/blob/master/cachesim.py
-"""
-import sys
-sys.path.append("ariane/src/")
-
-from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
-from nmigen.compat.genlib import fsm
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-
-from AddressEncoder import AddressEncoder
-from MemorySet import MemorySet
-
-# TODO: use a LFSR that advances continuously and picking the bottom
-# few bits from it to select which cache line to replace, instead of PLRU
-# http://bugs.libre-riscv.org/show_bug.cgi?id=71
-from plru import PLRU
-from LFSR import LFSR, LFSR_POLY_24
-
-SA_NA = "00" # no action (none)
-SA_RD = "01" # read
-SA_WR = "10" # write
-
-
-class SetAssociativeCache(Elaboratable):
- """ Set Associative Cache Memory
-
- The purpose of this module is to generate a memory cache given the
- constraints passed in. This will create a n-way set associative cache.
- It is expected for the SV TLB that the VMA will provide the set number
- while the ASID provides the tag (still to be decided).
-
- """
- def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
- """ Arguments
- * tag_size (bits): The bit count of the tag
- * data_size (bits): The bit count of the data to be stored
- * set_count (number): The number of sets/entries in the cache
- * way_count (number): The number of slots a data can be stored
- in one set
- * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
- set/entry to write to. otherwise, use a PLRU
- """
- # Internals
- self.lfsr_mode = lfsr
- self.way_count = way_count # The number of slots in one set
- self.tag_size = tag_size # The bit count of the tag
- self.data_size = data_size # The bit count of the data to be stored
-
- # set up Memory array
- self.mem_array = Array() # memory array
- for i in range(way_count):
- ms = MemorySet(data_size, tag_size, set_count, active=0)
- self.mem_array.append(ms)
-
- # Finds valid entries
- self.encoder = AddressEncoder(way_count)
-
- # setup PLRU or LFSR
- if lfsr:
- # LFSR mode
- self.lfsr = LFSR(LFSR_POLY_24)
- else:
- # PLRU mode
- self.plru = PLRU(way_count) # One block to handle plru calculations
- self.plru_array = Array() # PLRU data on each set
- for i in range(set_count):
- name="plru%d" % i
- self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
-
- # Input
- self.enable = Signal(1) # Whether the cache is enabled
- self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX)
- self.cset = Signal(max=set_count) # The set to be checked
- self.tag = Signal(tag_size) # The tag to find
- self.data_i = Signal(data_size) # The input data
-
- # Output
- self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
- self.hit = Signal(1) # Tag matched one way in the given set
- self.multiple_hit = Signal(1) # Tag matched many ways in the given set
- self.data_o = Signal(data_size) # The data linked to the matched tag
-
- def check_tags(self, m):
- """ Validate the tags in the selected set. If one and only one
- tag matches set its state to zero and increment all others
- by one. We only advance to next state if a single hit is found.
- """
- # Vector to store way valid results
- # A zero denotes a way is invalid
- valid_vector = []
- # Loop through memory to prep read/write ports and set valid_vector
- for i in range(self.way_count):
- valid_vector.append(self.mem_array[i].valid)
-
- # Pass encoder the valid vector
- m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
-
- # Only one entry should be marked
- # This is due to already verifying the tags
- # matched and the valid bit is high
- with m.If(self.hit):
- m.next = "FINISHED_READ"
- # Pull out data from the read port
- data = self.mem_array[self.encoder.o].data_o
- m.d.comb += self.data_o.eq(data)
- if not self.lfsr_mode:
- self.access_plru(m)
-
- # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
- with m.Elif(self.multiple_hit):
- # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
- m.d.comb += self.data_o.eq(0)
-
- # No tag matches means no data
- with m.Else():
- # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
- m.d.comb += self.data_o.eq(0)
-
- def access_plru(self, m):
- """ An entry was accessed and the plru tree must now be updated
- """
- # Pull out the set's entry being edited
- plru_entry = self.plru_array[self.cset]
- m.d.comb += [
- # Set the plru data to the current state
- self.plru.plru_tree.eq(plru_entry),
- # Set that the cache was accessed
- self.plru.lu_access_i.eq(1)
- ]
-
- def read(self, m):
- """ Go through the read process of the cache.
- This takes two cycles to complete. First it checks for a valid tag
- and secondly it updates the LRU values.
- """
- with m.FSM() as fsm_read:
- with m.State("READY"):
- m.d.comb += self.ready.eq(0)
- # check_tags will set the state if the conditions are met
- self.check_tags(m)
- with m.State("FINISHED_READ"):
- m.next = "READY"
- m.d.comb += self.ready.eq(1)
- if not self.lfsr_mode:
- plru_tree_o = self.plru.plru_tree_o
- m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
-
- def write_entry(self, m):
- if not self.lfsr_mode:
- m.d.comb += [# set cset (mem address) into PLRU
- self.plru.plru_tree.eq(self.plru_array[self.cset]),
- # and connect plru to encoder for write
- self.encoder.i.eq(self.plru.replace_en_o)
- ]
- write_port = self.mem_array[self.encoder.o].w
- else:
- # use the LFSR to generate a random(ish) one of the mem array
- lfsr_output = Signal(max=self.way_count)
- lfsr_random = Signal(max=self.way_count)
- m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
- # address too big, limit to range of array
- m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
- lfsr_output - self.way_count,
- lfsr_output))
- write_port = self.mem_array[lfsr_random].w
-
- # then if there is a match from the encoder, enable the selected write
- with m.If(self.encoder.single_match):
- m.d.comb += write_port.en.eq(1)
-
- def write(self, m):
- """ Go through the write process of the cache.
- This takes two cycles to complete. First it writes the entry,
- and secondly it updates the PLRU (in plru mode)
- """
- with m.FSM() as fsm_write:
- with m.State("READY"):
- m.d.comb += self.ready.eq(0)
- self.write_entry(m)
- m.next ="FINISHED_WRITE"
- with m.State("FINISHED_WRITE"):
- m.d.comb += self.ready.eq(1)
- if not self.lfsr_mode:
- plru_entry = self.plru_array[self.cset]
- m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
- m.next = "READY"
-
-
- def elaborate(self, platform=None):
- m = Module()
-
- # ----
- # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
- # ----
-
- m.submodules.AddressEncoder = self.encoder
- if self.lfsr_mode:
- m.submodules.LFSR = self.lfsr
- else:
- m.submodules.PLRU = self.plru
-
- for i, mem in enumerate(self.mem_array):
- setattr(m.submodules, "mem%d" % i, mem)
-
- # ----
- # select mode: PLRU connect to encoder, LFSR do... something
- # ----
-
- if not self.lfsr_mode:
- # Set what entry was hit
- m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
- else:
- # enable LFSR
- m.d.comb += self.lfsr.enable.eq(self.enable)
-
- # ----
- # connect hit/multiple hit to encoder output
- # ----
-
- m.d.comb += [
- self.hit.eq(self.encoder.single_match),
- self.multiple_hit.eq(self.encoder.multiple_match),
- ]
-
- # ----
- # connect incoming data/tag/cset(addr) to mem_array
- # ----
-
- for mem in self.mem_array:
- write_port = mem.w
- m.d.comb += [mem.cset.eq(self.cset),
- mem.tag.eq(self.tag),
- mem.data_i.eq(self.data_i),
- write_port.en.eq(0), # default: disable write
- ]
- # ----
- # Commands: READ/WRITE/TODO
- # ----
-
- with m.If(self.enable):
- with m.Switch(self.command):
- # Search all sets at a particular tag
- with m.Case(SA_RD):
- self.read(m)
- with m.Case(SA_WR):
- self.write(m)
- # Maybe catch multiple tags write here?
- # TODO
- # TODO: invalidate/flush, flush-all?
-
- return m
-
- def ports(self):
- return [self.enable, self.command, self.cset, self.tag, self.data_i,
- self.ready, self.hit, self.multiple_hit, self.data_o]
-
-
-if __name__ == '__main__':
- sac = SetAssociativeCache(4, 8, 4, 6)
- vl = rtlil.convert(sac, ports=sac.ports())
- with open("SetAssociativeCache.il", "w") as f:
- f.write(vl)
-
- sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
- vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
- with open("SetAssociativeCacheLFSR.il", "w") as f:
- f.write(vl)
+++ /dev/null
-""" TLB Module
-
- The expected form of the data is:
- * Item (Bits)
- * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
-"""
-
-from nmigen import Memory, Module, Signal, Cat
-from nmigen.cli import main
-
-from PermissionValidator import PermissionValidator
-from Cam import Cam
-
-class TLB():
- def __init__(self, asid_size, vma_size, pte_size, L1_size):
- """ Arguments
- * asid_size: Address Space IDentifier (ASID) typically 15 bits
- * vma_size: Virtual Memory Address (VMA) typically 36 bits
- * pte_size: Page Table Entry (PTE) typically 64 bits
-
- Notes:
- These arguments should represent the largest possible size
- defined by the MODE settings. See
- Volume II: RISC-V Privileged Architectures V1.10 Page 57
- """
-
- # Internal
- self.state = 0
- # L1 Cache Modules
- L1_size = 8 # XXX overridden incoming argument?
- self.cam_L1 = Cam(vma_size, L1_size)
- self.mem_L1 = Memory(asid_size + pte_size, L1_size)
-
- # Permission Validator
- self.perm_validator = PermissionValidator(asid_size, pte_size)
-
- # Inputs
- self.supermode = Signal(1) # Supervisor Mode
- self.super_access = Signal(1) # Supervisor Access
- self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
- self.xwr = Signal(3) # Execute, Write, Read
- self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
- self.address_L1 = Signal(max=L1_size)
- self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
- self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
- self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
-
- # Outputs
- self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
- self.perm_valid = Signal(1) # Denotes if the permissions are correct
- self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
-
- def search(self, m, read_L1, write_L1):
- """ searches the TLB
- """
- m.d.comb += [
- write_L1.en.eq(0),
- self.cam_L1.write_enable.eq(0),
- self.cam_L1.data_in.eq(self.vma)
- ]
- # Match found in L1 CAM
- match_found = Signal(reset_less=True)
- m.d.comb += match_found.eq(self.cam_L1.single_match
- | self.cam_L1.multiple_match)
- with m.If(match_found):
- # Memory shortcut variables
- mem_address = self.cam_L1.match_address
- # Memory Logic
- m.d.comb += read_L1.addr.eq(mem_address)
- # Permission Validator Logic
- m.d.comb += [
- self.hit.eq(1),
- # Set permission validator data to the correct
- # register file data according to CAM match
- # address
- self.perm_validator.data.eq(read_L1.data),
- # Execute, Read, Write
- self.perm_validator.xwr.eq(self.xwr),
- # Supervisor Mode
- self.perm_validator.super_mode.eq(self.supermode),
- # Supverisor Access
- self.perm_validator.super_access.eq(self.super_access),
- # Address Space IDentifier (ASID)
- self.perm_validator.asid.eq(self.asid),
- # Output result of permission validation
- self.perm_valid.eq(self.perm_validator.valid)
- ]
- # Only output PTE if permissions are valid
- with m.If(self.perm_validator.valid):
- # XXX TODO - dummy for now
- reg_data = Signal.like(self.pte_out)
- m.d.comb += [
- self.pte_out.eq(reg_data)
- ]
- with m.Else():
- m.d.comb += [
- self.pte_out.eq(0)
- ]
- # Miss Logic
- with m.Else():
- m.d.comb += [
- self.hit.eq(0),
- self.perm_valid.eq(0),
- self.pte_out.eq(0)
- ]
-
- def write_l1(self, m, read_L1, write_L1):
- """ writes to the L1 cache
- """
- # Memory_L1 Logic
- m.d.comb += [
- write_L1.en.eq(1),
- write_L1.addr.eq(self.address_L1),
- # The Cat places arguments from LSB -> MSB
- write_L1.data.eq(Cat(self.pte_in, self.asid))
- ]
- # CAM_L1 Logic
- m.d.comb += [
- self.cam_L1.write_enable.eq(1),
- self.cam_L1.data_in.eq(self.vma),
- ]
-
- def elaborate(self, platform):
- m = Module()
- # Add submodules
- # Submodules for L1 Cache
- m.d.submodules.cam_L1 = self.cam_L1
- m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port()
- m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port()
- # Permission Validator Submodule
- m.d.submodules.perm_valididator = self.perm_validator
-
- # When MODE specifies translation
- # TODO add in different bit length handling ie prefix 0s
- tlb_enable = Signal(reset_less=True)
- m.d.comb += tlb_enable.eq(self.mode != 0)
-
- with m.If(tlb_enable):
- m.d.comb += [
- self.cam_L1.enable.eq(1)
- ]
- with m.Switch(self.command):
- # Search
- with m.Case("01"):
- self.search(m, read_L1, write_L1)
-
- # Write L1
- # Expected that the miss will be handled in software
- with m.Case("10"):
- self.write_l1(m, read_L1, write_L1)
-
- # TODO
- #with m.Case("11"):
-
- # When disabled
- with m.Else():
- m.d.comb += [
- self.cam_L1.enable.eq(0),
- # XXX TODO - self.reg_file.enable.eq(0),
- self.hit.eq(0),
- self.perm_valid.eq(0), # XXX TODO, check this
- self.pte_out.eq(0)
- ]
- return m
-
-
-if __name__ == '__main__':
- tlb = TLB(15, 36, 64, 4)
- main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command,
- tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
- tlb.vma, tlb.pte_in,
- tlb.hit, tlb.perm_valid, tlb.pte_out,
- ] + tlb.cam_L1.ports())
+++ /dev/null
-#include <cstdint>
-#include <iostream>
-#include <cmath>
-
-
-#define NWAY 4
-#define NLINE 256
-#define HIT 0
-#define MISS 1
-#define MS 1000
-/*
-Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
-Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
-four-way set associative - three bits
- each bit represents one branch point in a binary decision tree; let 1
- represent that the left side has been referenced more recently than the
- right side, and 0 vice-versa
- are all 4 lines valid?
- / \
- yes no, use an invalid line
- |
- |
- |
- bit_0 == 0? state | replace ref to | next state
- / \ ------+-------- -------+-----------
- y n 00x | line_0 line_0 | 11_
- / \ 01x | line_1 line_1 | 10_
- bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
- / \ / \ 1x1 | line_3 line_3 | 0_0
- y n y n
- / \ / \ ('x' means ('_' means unchanged)
- line_0 line_1 line_2 line_3 don't care)
- 8-way set associative - 7 = 1+2+4 bits
-16-way set associative - 15 = 1+2+4+8 bits
-32-way set associative - 31 = 1+2+4+8+16 bits
-64-way set associative - 63 = 1+2+4+8+16+32 bits
-*/
-using namespace std;
-struct AddressField {
- uint64_t wd_idx : 2;//Unused
- uint64_t offset : 4;//Unused
- uint64_t index : 8;//NLINE = 256 = 2^8
- uint64_t tag : 50;
-};
-
-union Address {
- uint32_t* p;
- AddressField fields;
-};
-
-struct Cell {
- bool v;
- uint64_t tag;
-
- Cell() : v(false), tag(0) {}
-
- bool isHit(uint64_t tag) {
- return v && (tag == this->tag);
- }
-
- void fetch(uint32_t* address) {
- Address addr;
- addr.p = address;
- addr.fields.offset = 0;
- addr.fields.wd_idx = 0;
- tag = addr.fields.tag;
- v = true;
- }
-};
-
-ostream& operator<<(ostream & out, const Cell& cell) {
- out << " v:" << cell.v << " tag:" << hex << cell.tag;
- return out;
-}
-
-struct Block {
- Cell cell[NWAY];
- uint32_t state;
- uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
- uint64_t *value;
- uint64_t *next_value;
-
- Block() : state(0) {
- switch (NWAY) {
- case 4:
- mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
- value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
- next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
- break;
- case 8:
- mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
- 0b1010001};
- value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
- 0b1010001};
- next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
- 0b0000001, 0b0000000};
- break;
- //TODO - more NWAY goes here.
- default:
- std::cout << "Error definition NWAY = " << NWAY << std::endl;
- }
- }
-
- uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
- for (int i = 0; i < NWAY; ++i) {
- if (cell[i].isHit(tag)) {
- *pway = i;
- return pway;
- }
- }
- return NULL;
- }
-
- void setLRU(uint32_t *address) {
- int way = 0;
- uint32_t st = state;
- for (int i = 0; i < NWAY; ++i) {
- if ((state & mask[i]) == value[i]) {
- state ^= mask[i];
- way = i;
- break;
- }
- }
- cell[way].fetch(address);
- cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
- }
-
- uint32_t *get(uint32_t *address, uint32_t *pway) {
- Address addr;
- addr.p = address;
- uint32_t *d = getByTag(addr.fields.tag, pway);
- if (d != NULL) {
- return &d[addr.fields.offset];
- }
- return d;
- }
-
- int set(uint32_t *address) {
- uint32_t way = 0;
- uint32_t *p = get(address, &way);
- if (p != NULL) {
- printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
- state &= ~mask[way];
- printf("%X --> ", state);
- state |= next_value[way];
- printf("%X\n", state);
- // *p = *address; //skip since address is fake.
- return HIT;
- } else {
- setLRU(address);
- return MISS;
- }
- }
-};
-
-ostream& operator<<(ostream & out, const Block& block) {
- out << "state:" << block.state << " ";
- for (int i = 0; i<NWAY; i++) {
- out << block.cell[i];
- }
- return out;
-}
-
-struct Cache {
- Block block[NLINE];
- uint32_t count[2];
- Cache() { count[HIT] = 0; count[MISS] = 0; }
-
- void access(uint32_t* address) {
- Address addr;
- addr.p = address;
- Block& b = block[addr.fields.index];
- ++count[b.set(address)];
- }
-
-};
-ostream& operator<<(ostream & out, const Cache& cache) {
- out << "\n==Summary==\n\tHit: " << cache.count[HIT] << " Miss: " << cache.count[MISS] << std::endl;
- for (int i = 0; i < NLINE; i++) {
- out << cache.block[i] << endl;
- }
- return out;
-}
-
-Cache cache;
-void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
-{
- int x, i, j;
- for (i = 0; i < MS; i++) {
- for (j = 0; j < MS; j++) {
- cache.access(res + i*MS +j);
- for (x = 0; x < MS; x++) {
- cache.access(m1 + i*MS + x);
- cache.access(m2 + x*MS + j);
- cache.access(res + i*MS +j);
- // res[i][j] += m1[i][x] * m2[x][j];
- cache.access(res + i*MS +j);
- }
- }
- }
-}
-
-int main()
-{
- uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL; // fake virtual address; don’t access it
- uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL; // fake virtual address; don’t access it
- uint32_t* res = (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
- multiply(m1, m2, res);
- cout << cache << endl;
- return 0;
-}
+++ /dev/null
-pseudo-LRU
-
-two-way set associative - one bit
-
- indicates which line of the two has been reference more recently
-
-
-four-way set associative - three bits
-
- each bit represents one branch point in a binary decision tree; let 1
- represent that the left side has been referenced more recently than the
- right side, and 0 vice-versa
-
- are all 4 lines valid?
- / \
- yes no, use an invalid line
- |
- |
- |
- bit_0 == 0? state | replace ref to | next state
- / \ ------+-------- -------+-----------
- y n 00x | line_0 line_0 | 11_
- / \ 01x | line_1 line_1 | 10_
- bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
- / \ / \ 1x1 | line_3 line_3 | 0_0
- y n y n
- / \ / \ ('x' means ('_' means unchanged)
- line_0 line_1 line_2 line_3 don't care)
-
- (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
- Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
-
-
-note that there is a 6-bit encoding for true LRU for four-way set associative
-
- bit 0: bank[1] more recently used than bank[0]
- bit 1: bank[2] more recently used than bank[0]
- bit 2: bank[2] more recently used than bank[1]
- bit 3: bank[3] more recently used than bank[0]
- bit 4: bank[3] more recently used than bank[1]
- bit 5: bank[3] more recently used than bank[2]
-
- this results in 24 valid bit patterns within the 64 possible bit patterns
- (4! possible valid traces for bank references)
-
- e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
-
- you can implement a state machine with a 256x6 ROM (6-bit state encoding
- appended with a 2-bit bank reference input will yield a new 6-bit state),
- and you can implement an LRU bank indicator with a 64x2 ROM
-
+++ /dev/null
-from nmigen import Const
-
-INSTR_ADDR_MISALIGNED = Const(0, 64)
-INSTR_ACCESS_FAULT = Const(1, 64)
-ILLEGAL_INSTR = Const(2, 64)
-BREAKPOINT = Const(3, 64)
-LD_ADDR_MISALIGNED = Const(4, 64)
-LD_ACCESS_FAULT = Const(5, 64)
-ST_ADDR_MISALIGNED = Const(6, 64)
-ST_ACCESS_FAULT = Const(7, 64)
-ENV_CALL_UMODE = Const(8, 64) # environment call from user mode
-ENV_CALL_SMODE = Const(9, 64) # environment call from supervisor mode
-ENV_CALL_MMODE = Const(11, 64) # environment call from machine mode
-INSTR_PAGE_FAULT = Const(12, 64) # Instruction page fault
-LOAD_PAGE_FAULT = Const(13, 64) # Load page fault
-STORE_PAGE_FAULT = Const(15, 64) # Store page fault
+++ /dev/null
-"""
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: Florian Zaruba, ETH Zurich
-# Date: 19/04/2017
-# Description: Memory Management Unit for Ariane, contains TLB and
-# address translation unit. SV39 as defined in RISC-V
-# privilege specification 1.11-WIP
-
-import ariane_pkg::*;
-"""
-
-from nmigen import Const, Signal, Cat, Module, Mux
-from nmigen.cli import verilog, rtlil
-
-from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
-from tlb import TLB
-from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
- LOAD_PAGE_FAULT, STORE_PAGE_FAULT)
-
-PRIV_LVL_M = Const(0b11, 2)
-PRIV_LVL_S = Const(0b01, 2)
-PRIV_LVL_U = Const(0b00, 2)
-
-
-class RVException:
- def __init__(self):
- self.cause = Signal(64) # cause of exception
- self.tval = Signal(64) # more info of causing exception
- # (e.g.: instruction causing it),
- # address of LD/ST fault
- self.valid = Signal()
-
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
-
- def __iter__(self):
- yield self.cause
- yield self.tval
- yield self.valid
-
- def ports(self):
- return list(self)
-
-
-class ICacheReqI:
- def __init__(self):
- self.fetch_valid = Signal() # address translation valid
- self.fetch_paddr = Signal(64) # physical address in
- self.fetch_exception = RVException() # exception occurred during fetch
-
- def __iter__(self):
- yield self.fetch_valid
- yield self.fetch_paddr
- yield from self.fetch_exception
-
- def ports(self):
- return list(self)
-
-
-class ICacheReqO:
- def __init__(self):
- self.fetch_req = Signal() # address translation request
- self.fetch_vaddr = Signal(64) # virtual address out
-
- def __iter__(self):
- yield self.fetch_req
- yield self.fetch_vaddr
-
- def ports(self):
- return list(self)
-
-
-class MMU:
- def __init__(self, instr_tlb_entries = 4,
- data_tlb_entries = 4,
- asid_width = 1):
- self.instr_tlb_entries = instr_tlb_entries
- self.data_tlb_entries = data_tlb_entries
- self.asid_width = asid_width
-
- self.flush_i = Signal()
- self.enable_translation_i = Signal()
- self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
- # IF interface
- self.icache_areq_i = ICacheReqO()
- self.icache_areq_o = ICacheReqI()
- # LSU interface
- # this is a more minimalistic interface because the actual addressing
- # logic is handled in the LSU as we distinguish load and stores,
- # what we do here is simple address translation
- self.misaligned_ex_i = RVException()
- self.lsu_req_i = Signal() # request address translation
- self.lsu_vaddr_i = Signal(64) # virtual address in
- self.lsu_is_store_i = Signal() # the translation is requested by a store
- # if we need to walk the page table we can't grant in the same cycle
-
- # Cycle 0
- self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
- # if translation hits in the DTLB
- # Cycle 1
- self.lsu_valid_o = Signal() # translation is valid
- self.lsu_paddr_o = Signal(64) # translated address
- self.lsu_exception_o = RVException() # addr translate threw exception
-
- # General control signals
- self.priv_lvl_i = Signal(2)
- self.ld_st_priv_lvl_i = Signal(2)
- self.sum_i = Signal()
- self.mxr_i = Signal()
- # input logic flag_mprv_i,
- self.satp_ppn_i = Signal(44)
- self.asid_i = Signal(self.asid_width)
- self.flush_tlb_i = Signal()
- # Performance counters
- self.itlb_miss_o = Signal()
- self.dtlb_miss_o = Signal()
- # PTW memory interface
- self.req_port_i = DCacheReqO()
- self.req_port_o = DCacheReqI()
-
- def elaborate(self, platform):
- m = Module()
-
- iaccess_err = Signal() # insufficient priv to access instr page
- daccess_err = Signal() # insufficient priv to access data page
- ptw_active = Signal() # PTW is currently walking a page table
- walking_instr = Signal() # PTW is walking because of an ITLB miss
- ptw_error = Signal() # PTW threw an exception
-
- update_vaddr = Signal(39)
- uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
- update_ptw_itlb = TLBUpdate(self.asid_width)
- update_ptw_dtlb = TLBUpdate(self.asid_width)
-
- itlb_lu_access = Signal()
- itlb_content = PTE()
- itlb_is_2M = Signal()
- itlb_is_1G = Signal()
- itlb_lu_hit = Signal()
-
- dtlb_lu_access = Signal()
- dtlb_content = PTE()
- dtlb_is_2M = Signal()
- dtlb_is_1G = Signal()
- dtlb_lu_hit = Signal()
-
- # Assignments
- m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
- dtlb_lu_access.eq(self.lsu_req_i)
- ]
-
- # ITLB
- m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
- self.asid_width)
- m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
- i_tlb.update_i.eq(update_ptw_itlb),
- i_tlb.lu_access_i.eq(itlb_lu_access),
- i_tlb.lu_asid_i.eq(self.asid_i),
- i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
- itlb_content.eq(i_tlb.lu_content_o),
- itlb_is_2M.eq(i_tlb.lu_is_2M_o),
- itlb_is_1G.eq(i_tlb.lu_is_1G_o),
- itlb_lu_hit.eq(i_tlb.lu_hit_o),
- ]
-
- # DTLB
- m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
- self.asid_width)
- m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
- d_tlb.update_i.eq(update_ptw_dtlb),
- d_tlb.lu_access_i.eq(dtlb_lu_access),
- d_tlb.lu_asid_i.eq(self.asid_i),
- d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
- dtlb_content.eq(d_tlb.lu_content_o),
- dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
- dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
- dtlb_lu_hit.eq(d_tlb.lu_hit_o),
- ]
-
- # PTW
- m.submodules.ptw = ptw = PTW(self.asid_width)
- m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
- walking_instr.eq(ptw.walking_instr_o),
- ptw_error.eq(ptw.ptw_error_o),
- ptw.enable_translation_i.eq(self.enable_translation_i),
-
- update_vaddr.eq(ptw.update_vaddr_o),
- update_ptw_itlb.eq(ptw.itlb_update_o),
- update_ptw_dtlb.eq(ptw.dtlb_update_o),
-
- ptw.itlb_access_i.eq(itlb_lu_access),
- ptw.itlb_hit_i.eq(itlb_lu_hit),
- ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
-
- ptw.dtlb_access_i.eq(dtlb_lu_access),
- ptw.dtlb_hit_i.eq(dtlb_lu_hit),
- ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
-
- ptw.req_port_i.eq(self.req_port_i),
- self.req_port_o.eq(ptw.req_port_o),
- ]
-
- # ila_1 i_ila_1 (
- # .clk(clk_i), # input wire clk
- # .probe0({req_port_o.address_tag, req_port_o.address_index}),
- # .probe1(req_port_o.data_req), # input wire [63:0] probe1
- # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2
- # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3
- # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4
- # .probe5(ptw_error), # input wire [1:0] probe5
- # .probe6(update_vaddr), # input wire [0:0] probe6
- # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7
- # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8
- # .probe9(dtlb_lu_access), # input wire [0:0] probe9
- # .probe10(lsu_vaddr_i), # input wire [0:0] probe10
- # .probe11(dtlb_lu_hit), # input wire [0:0] probe11
- # .probe12(itlb_lu_access), # input wire [0:0] probe12
- # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13
- # .probe14(itlb_lu_hit) # input wire [0:0] probe13
- # );
-
- #-----------------------
- # Instruction Interface
- #-----------------------
- # The instruction interface is a simple request response interface
-
- # MMU disabled: just pass through
- m.d.comb += [self.icache_areq_o.fetch_valid.eq(
- self.icache_areq_i.fetch_req),
- # play through in case we disabled address translation
- self.icache_areq_o.fetch_paddr.eq(
- self.icache_areq_i.fetch_vaddr)
- ]
- # two potential exception sources:
- # 1. HPTW threw an exception -> signal with a page fault exception
- # 2. We got an access error because of insufficient permissions ->
- # throw an access exception
- m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
- # Check whether we are allowed to access this memory region
- # from a fetch perspective
-
- # XXX TODO: use PermissionValidator instead [we like modules]
- m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
- (((self.priv_lvl_i == PRIV_LVL_U) & \
- ~itlb_content.u) | \
- ((self.priv_lvl_i == PRIV_LVL_S) & \
- itlb_content.u)))
-
- # MMU enabled: address from TLB, request delayed until hit.
- # Error when TLB hit and no access right or TLB hit and
- # translated address not valid (e.g. AXI decode error),
- # or when PTW performs walk due to ITLB miss and raises
- # an error.
- with m.If (self.enable_translation_i):
- # we work with SV39, so if VM is enabled, check that
- # all bits [63:38] are equal
- with m.If (self.icache_areq_i.fetch_req & \
- ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \
- (self.icache_areq_i.fetch_vaddr[38:64]) == 0)):
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
- fe.tval.eq(self.icache_areq_i.fetch_vaddr),
- fe.valid.eq(1)
- ]
-
- m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
-
- # 4K page
- paddr = Signal.like(self.icache_areq_o.fetch_paddr)
- paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
- itlb_content.ppn)
- m.d.comb += paddr.eq(paddr4k)
- # Mega page
- with m.If(itlb_is_2M):
- m.d.comb += paddr[12:21].eq(
- self.icache_areq_i.fetch_vaddr[12:21])
- # Giga page
- with m.If(itlb_is_1G):
- m.d.comb += paddr[12:30].eq(
- self.icache_areq_i.fetch_vaddr[12:30])
- m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
-
- # ---------
- # ITLB Hit
- # --------
- # if we hit the ITLB output the request signal immediately
- with m.If(itlb_lu_hit):
- m.d.comb += self.icache_areq_o.fetch_valid.eq(
- self.icache_areq_i.fetch_req)
- # we got an access error
- with m.If (iaccess_err):
- # throw a page fault
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
- fe.tval.eq(self.icache_areq_i.fetch_vaddr),
- fe.valid.eq(1)
- ]
- # ---------
- # ITLB Miss
- # ---------
- # watch out for exceptions happening during walking the page table
- with m.Elif(ptw_active & walking_instr):
- m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
- fe.tval.eq(uaddr64),
- fe.valid.eq(1)
- ]
-
- #-----------------------
- # Data Interface
- #-----------------------
-
- lsu_vaddr = Signal(64)
- dtlb_pte = PTE()
- misaligned_ex = RVException()
- lsu_req = Signal()
- lsu_is_store = Signal()
- dtlb_hit = Signal()
- dtlb_is_2M = Signal()
- dtlb_is_1G = Signal()
-
- # check if we need to do translation or if we are always
- # ready (e.g.: we are not translating anything)
- m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
- dtlb_lu_hit, 1))
-
- # The data interface is simpler and only consists of a
- # request/response interface
- m.d.comb += [
- # save request and DTLB response
- lsu_vaddr.eq(self.lsu_vaddr_i),
- lsu_req.eq(self.lsu_req_i),
- misaligned_ex.eq(self.misaligned_ex_i),
- dtlb_pte.eq(dtlb_content),
- dtlb_hit.eq(dtlb_lu_hit),
- lsu_is_store.eq(self.lsu_is_store_i),
- dtlb_is_2M.eq(dtlb_is_2M),
- dtlb_is_1G.eq(dtlb_is_1G),
- ]
- m.d.sync += [
- self.lsu_paddr_o.eq(lsu_vaddr),
- self.lsu_valid_o.eq(lsu_req),
- self.lsu_exception_o.eq(misaligned_ex),
- ]
-
- sverr = Signal()
- usrerr = Signal()
-
- m.d.comb += [
- # mute misaligned exceptions if there is no request
- # otherwise they will throw accidental exceptions
- misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
-
- # SUM is not set and we are trying to access a user
- # page in supervisor mode
- sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
- dtlb_pte.u),
- # this is not a user page but we are in user mode and
- # trying to access it
- usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
-
- # Check if the User flag is set, then we may only
- # access it in supervisor mode if SUM is enabled
- daccess_err.eq(sverr | usrerr),
- ]
-
- # translation is enabled and no misaligned exception occurred
- with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
- m.d.comb += lsu_req.eq(0)
- # 4K page
- paddr = Signal.like(lsu_vaddr)
- paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
- m.d.comb += paddr.eq(paddr4k)
- # Mega page
- with m.If(dtlb_is_2M):
- m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
- # Giga page
- with m.If(dtlb_is_1G):
- m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
- m.d.sync += self.lsu_paddr_o.eq(paddr)
-
- # ---------
- # DTLB Hit
- # --------
- with m.If(dtlb_hit & lsu_req):
- m.d.comb += lsu_req.eq(1)
- # this is a store
- with m.If (lsu_is_store):
- # check if the page is write-able and
- # we are not violating privileges
- # also check if the dirty flag is set
- with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
- le.tval.eq(lsu_vaddr),
- le.valid.eq(1)
- ]
-
- # this is a load, check for sufficient access
- # privileges - throw a page fault if necessary
- with m.Elif(daccess_err):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
- le.tval.eq(lsu_vaddr),
- le.valid.eq(1)
- ]
- # ---------
- # DTLB Miss
- # ---------
- # watch out for exceptions
- with m.Elif (ptw_active & ~walking_instr):
- # page table walker threw an exception
- with m.If (ptw_error):
- # an error makes the translation valid
- m.d.comb += lsu_req.eq(1)
- # the page table walker can only throw page faults
- with m.If (lsu_is_store):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
- le.tval.eq(uaddr64),
- le.valid.eq(1)
- ]
- with m.Else():
- m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
- le.tval.eq(uaddr64),
- le.valid.eq(1)
- ]
-
- return m
-
- def ports(self):
- return [self.flush_i, self.enable_translation_i,
- self.en_ld_st_translation_i,
- self.lsu_req_i,
- self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
- self.lsu_valid_o, self.lsu_paddr_o,
- self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
- self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
- self.itlb_miss_o, self.dtlb_miss_o] + \
- self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
- self.req_port_i.ports() + self.req_port_o.ports() + \
- self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
-
-if __name__ == '__main__':
- mmu = MMU()
- vl = rtlil.convert(mmu, ports=mmu.ports())
- with open("test_mmu.il", "w") as f:
- f.write(vl)
-
+++ /dev/null
-from nmigen import Signal, Module, Cat, Const
-from nmigen.hdl.ir import Elaboratable
-from math import log2
-
-from ptw import TLBUpdate, PTE, ASID_WIDTH
-
-class PLRU(Elaboratable):
- """ PLRU - Pseudo Least Recently Used Replacement
-
- PLRU-tree indexing:
- lvl0 0
- / \
- / \
- lvl1 1 2
- / \ / \
- lvl2 3 4 5 6
- / \ /\/\ /\
- ... ... ... ...
- """
- def __init__(self, entries):
- self.entries = entries
- self.lu_hit = Signal(entries)
- self.replace_en_o = Signal(entries)
- self.lu_access_i = Signal()
- # Tree (bit per entry)
- self.TLBSZ = 2*(self.entries-1)
- self.plru_tree = Signal(self.TLBSZ)
- self.plru_tree_o = Signal(self.TLBSZ)
-
- def elaborate(self, platform=None):
- m = Module()
-
- # Just predefine which nodes will be set/cleared
- # E.g. for a TLB with 8 entries, the for-loop is semantically
- # equivalent to the following pseudo-code:
- # unique case (1'b1)
- # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1};
- # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0};
- # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1};
- # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0};
- # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1};
- # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0};
- # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1};
- # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0};
- # default: begin /* No hit */ end
- # endcase
- LOG_TLB = int(log2(self.entries))
- print(LOG_TLB)
- for i in range(self.entries):
- # we got a hit so update the pointer as it was least recently used
- hit = Signal(reset_less=True)
- m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i)
- with m.If(hit):
- # Set the nodes to the values we would expect
- for lvl in range(LOG_TLB):
- idx_base = (1<<lvl)-1
- # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
- shift = LOG_TLB - lvl;
- new_idx = Const(~((i >> (shift-1)) & 1), (1, False))
- plru_idx = idx_base + (i >> shift)
- print ("plru", i, lvl, hex(idx_base),
- plru_idx, shift, new_idx)
- m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx)
-
- # Decode tree to write enable signals
- # Next for-loop basically creates the following logic for e.g.
- # an 8 entry TLB (note: pseudo-code obviously):
- # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1}
- # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0}
- # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1}
- # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0}
- # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1}
- # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0}
- # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1}
- # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0}
- # For each entry traverse the tree. If every tree-node matches
- # the corresponding bit of the entry's index, this is
- # the next entry to replace.
- replace = []
- for i in range(self.entries):
- en = []
- for lvl in range(LOG_TLB):
- idx_base = (1<<lvl)-1
- # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
- shift = LOG_TLB - lvl;
- new_idx = (i >> (shift-1)) & 1;
- plru_idx = idx_base + (i>>shift)
- plru = Signal(reset_less=True,
- name="plru-%d-%d-%d" % (i, lvl, plru_idx))
- m.d.comb += plru.eq(self.plru_tree[plru_idx])
- # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx;
- if new_idx:
- en.append(~plru) # yes inverted (using bool())
- else:
- en.append(plru) # yes inverted (using bool())
- print ("plru", i, en)
- # boolean logic manipulation:
- # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
- replace.append(~Cat(*en).bool())
- m.d.comb += self.replace_en_o.eq(Cat(*replace))
-
- return m
-
- def ports(self):
- return [self.entries, self.lu_hit, self.replace_en_o,
- self.lu_access_i, self.plru_tree, self.plru_tree_o]
\ No newline at end of file
+++ /dev/null
-"""
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 24.4.2017
-# Description: Hardware-PTW
-
-/* verilator lint_off WIDTH */
-import ariane_pkg::*;
-
-see linux kernel source:
-
-* "arch/riscv/include/asm/page.h"
-* "arch/riscv/include/asm/mmu_context.h"
-* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
-
-"""
-
-from nmigen import Const, Signal, Cat, Module
-from nmigen.hdl.ast import ArrayProxy
-from nmigen.cli import verilog, rtlil
-from math import log2
-
-
-DCACHE_SET_ASSOC = 8
-CONFIG_L1D_SIZE = 32*1024
-DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC))
-DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH
-
-ASID_WIDTH = 8
-
-
-class DCacheReqI:
- def __init__(self):
- self.address_index = Signal(DCACHE_INDEX_WIDTH)
- self.address_tag = Signal(DCACHE_TAG_WIDTH)
- self.data_wdata = Signal(64)
- self.data_req = Signal()
- self.data_we = Signal()
- self.data_be = Signal(8)
- self.data_size = Signal(2)
- self.kill_req = Signal()
- self.tag_valid = Signal()
-
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
-
- def ports(self):
- return [self.address_index, self.address_tag,
- self.data_wdata, self.data_req,
- self.data_we, self.data_be, self.data_size,
- self.kill_req, self.tag_valid,
- ]
-
-class DCacheReqO:
- def __init__(self):
- self.data_gnt = Signal()
- self.data_rvalid = Signal()
- self.data_rdata = Signal(64) # actually in PTE object format
-
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
-
- def ports(self):
- return [self.data_gnt, self.data_rvalid, self.data_rdata]
-
-
-class PTE: #(RecordObject):
- def __init__(self):
- self.v = Signal()
- self.r = Signal()
- self.w = Signal()
- self.x = Signal()
- self.u = Signal()
- self.g = Signal()
- self.a = Signal()
- self.d = Signal()
- self.rsw = Signal(2)
- self.ppn = Signal(44)
- self.reserved = Signal(10)
-
- def flatten(self):
- return Cat(*self.ports())
-
- def eq(self, x):
- if isinstance(x, ArrayProxy):
- res = []
- for o in self.ports():
- i = getattr(x, o.name)
- res.append(i)
- x = Cat(*res)
- else:
- x = x.flatten()
- return self.flatten().eq(x)
-
- def __iter__(self):
- """ order is critical so that flatten creates LSB to MSB
- """
- yield self.v
- yield self.r
- yield self.w
- yield self.x
- yield self.u
- yield self.g
- yield self.a
- yield self.d
- yield self.rsw
- yield self.ppn
- yield self.reserved
-
- def ports(self):
- return list(self)
-
-
-class TLBUpdate:
- def __init__(self, asid_width):
- self.valid = Signal() # valid flag
- self.is_2M = Signal()
- self.is_1G = Signal()
- self.vpn = Signal(27)
- self.asid = Signal(asid_width)
- self.content = PTE()
-
- def flatten(self):
- return Cat(*self.ports())
-
- def eq(self, x):
- return self.flatten().eq(x.flatten())
-
- def ports(self):
- return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
- self.content.ports()
-
-
-# SV39 defines three levels of page tables
-LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
-LVL2 = Const(1, 2)
-LVL3 = Const(2, 2)
-
-
-class PTW:
- def __init__(self, asid_width=8):
- self.asid_width = asid_width
-
- self.flush_i = Signal() # flush everything, we need to do this because
- # actually everything we do is speculative at this stage
- # e.g.: there could be a CSR instruction that changes everything
- self.ptw_active_o = Signal(reset=1) # active if not IDLE
- self.walking_instr_o = Signal() # set when walking for TLB
- self.ptw_error_o = Signal() # set when an error occurred
- self.enable_translation_i = Signal() # CSRs indicate to enable SV39
- self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
-
- self.lsu_is_store_i = Signal() # translation triggered by store
- # PTW memory interface
- self.req_port_i = DCacheReqO()
- self.req_port_o = DCacheReqI()
-
- # to TLBs, update logic
- self.itlb_update_o = TLBUpdate(asid_width)
- self.dtlb_update_o = TLBUpdate(asid_width)
-
- self.update_vaddr_o = Signal(39)
-
- self.asid_i = Signal(self.asid_width)
- # from TLBs
- # did we miss?
- self.itlb_access_i = Signal()
- self.itlb_hit_i = Signal()
- self.itlb_vaddr_i = Signal(64)
-
- self.dtlb_access_i = Signal()
- self.dtlb_hit_i = Signal()
- self.dtlb_vaddr_i = Signal(64)
- # from CSR file
- self.satp_ppn_i = Signal(44) # ppn from satp
- self.mxr_i = Signal()
- # Performance counters
- self.itlb_miss_o = Signal()
- self.dtlb_miss_o = Signal()
-
- def ports(self):
- return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
- ]
- return [
- self.enable_translation_i, self.en_ld_st_translation_i,
- self.lsu_is_store_i, self.req_port_i, self.req_port_o,
- self.update_vaddr_o,
- self.asid_i,
- self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
- self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
- self.satp_ppn_i, self.mxr_i,
- self.itlb_miss_o, self.dtlb_miss_o
- ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
-
- def elaborate(self, platform):
- m = Module()
-
- # input registers
- data_rvalid = Signal()
- data_rdata = Signal(64)
-
- # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
- # is spec'd in 64-bit binary-format: better to spec as Record?
- pte = PTE()
- m.d.comb += pte.flatten().eq(data_rdata)
-
- # SV39 defines three levels of page tables
- ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
- ptw_lvl1 = Signal()
- ptw_lvl2 = Signal()
- ptw_lvl3 = Signal()
- m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
- ptw_lvl2.eq(ptw_lvl == LVL2),
- ptw_lvl3.eq(ptw_lvl == LVL3)]
-
- # is this an instruction page table walk?
- is_instr_ptw = Signal()
- global_mapping = Signal()
- # latched tag signal
- tag_valid = Signal()
- # register the ASID
- tlb_update_asid = Signal(self.asid_width)
- # register VPN we need to walk, SV39 defines a 39 bit virtual addr
- vaddr = Signal(64)
- # 4 byte aligned physical pointer
- ptw_pptr = Signal(56)
-
- end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH
- m.d.sync += [
- # Assignments
- self.update_vaddr_o.eq(vaddr),
-
- self.walking_instr_o.eq(is_instr_ptw),
- # directly output the correct physical address
- self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
- self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
- # we are never going to kill this request
- self.req_port_o.kill_req.eq(0), # XXX assign comb?
- # we are never going to write with the HPTW
- self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
- # -----------
- # TLB Update
- # -----------
- self.itlb_update_o.vpn.eq(vaddr[12:39]),
- self.dtlb_update_o.vpn.eq(vaddr[12:39]),
- # update the correct page table level
- self.itlb_update_o.is_2M.eq(ptw_lvl2),
- self.itlb_update_o.is_1G.eq(ptw_lvl1),
- self.dtlb_update_o.is_2M.eq(ptw_lvl2),
- self.dtlb_update_o.is_1G.eq(ptw_lvl1),
- # output the correct ASID
- self.itlb_update_o.asid.eq(tlb_update_asid),
- self.dtlb_update_o.asid.eq(tlb_update_asid),
- # set the global mapping bit
- self.itlb_update_o.content.eq(pte),
- self.itlb_update_o.content.g.eq(global_mapping),
- self.dtlb_update_o.content.eq(pte),
- self.dtlb_update_o.content.g.eq(global_mapping),
-
- self.req_port_o.tag_valid.eq(tag_valid),
- ]
-
- #-------------------
- # Page table walker
- #-------------------
- # A virtual address va is translated into a physical address pa as
- # follows:
- # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
- # PAGESIZE=2^12 and LEVELS=3.)
- # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
- # (For Sv32, PTESIZE=4.)
- # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
- # access exception.
- # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
- # step 5. Otherwise, this PTE is a pointer to the next level of
- # the page table.
- # Let i=i-1. If i < 0, stop and raise an access exception.
- # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2.
- # 5. A leaf PTE has been found. Determine if the requested memory
- # access is allowed by the pte.r, pte.w, and pte.x bits. If not,
- # stop and raise an access exception. Otherwise, the translation is
- # successful. Set pte.a to 1, and, if the memory access is a
- # store, set pte.d to 1.
- # The translated physical address is given as follows:
- # - pa.pgoff = va.pgoff.
- # - If i > 0, then this is a superpage translation and
- # pa.ppn[i-1:0] = va.vpn[i-1:0].
- # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
- # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned
- # superpage stop and raise a page-fault exception.
-
- m.d.sync += tag_valid.eq(0)
-
- # default assignments
- m.d.comb += [
- # PTW memory interface
- self.req_port_o.data_req.eq(0),
- self.req_port_o.data_be.eq(Const(0xFF, 8)),
- self.req_port_o.data_size.eq(Const(0b11, 2)),
- self.req_port_o.data_we.eq(0),
- self.ptw_error_o.eq(0),
- self.itlb_update_o.valid.eq(0),
- self.dtlb_update_o.valid.eq(0),
-
- self.itlb_miss_o.eq(0),
- self.dtlb_miss_o.eq(0),
- ]
-
- # ------------
- # State Machine
- # ------------
-
- with m.FSM() as fsm:
-
- with m.State("IDLE"):
- self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
- ptw_pptr, vaddr, tlb_update_asid)
-
- with m.State("WAIT_GRANT"):
- self.grant(m, tag_valid, data_rvalid)
-
- with m.State("PTE_LOOKUP"):
- # we wait for the valid signal
- with m.If(data_rvalid):
- self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
- data_rvalid, global_mapping,
- is_instr_ptw, ptw_pptr)
-
- # Propagate error to MMU/LSU
- with m.State("PROPAGATE_ERROR"):
- m.next = "IDLE"
- m.d.comb += self.ptw_error_o.eq(1)
-
- # wait for the rvalid before going back to IDLE
- with m.State("WAIT_RVALID"):
- with m.If(data_rvalid):
- m.next = "IDLE"
-
- m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
- data_rvalid.eq(self.req_port_i.data_rvalid)
- ]
-
- return m
-
- def set_grant_state(self, m):
- # should we have flushed before we got an rvalid,
- # wait for it until going back to IDLE
- with m.If(self.flush_i):
- with m.If (self.req_port_i.data_gnt):
- m.next = "WAIT_RVALID"
- with m.Else():
- m.next = "IDLE"
- with m.Else():
- m.next = "WAIT_GRANT"
-
- def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
- ptw_pptr, vaddr, tlb_update_asid):
- # by default we start with the top-most page table
- m.d.sync += [is_instr_ptw.eq(0),
- ptw_lvl.eq(LVL1),
- global_mapping.eq(0),
- self.ptw_active_o.eq(0), # deactive (IDLE)
- ]
- # work out itlb/dtlb miss
- m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
- self.itlb_access_i & \
- ~self.itlb_hit_i & \
- ~self.dtlb_access_i)
- m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
- self.dtlb_access_i & \
- ~self.dtlb_hit_i)
- # we got an ITLB miss?
- with m.If(self.itlb_miss_o):
- pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39],
- self.satp_ppn_i)
- m.d.sync += [ptw_pptr.eq(pptr),
- is_instr_ptw.eq(1),
- vaddr.eq(self.itlb_vaddr_i),
- tlb_update_asid.eq(self.asid_i),
- ]
- self.set_grant_state(m)
-
- # we got a DTLB miss?
- with m.Elif(self.dtlb_miss_o):
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39],
- self.satp_ppn_i)
- m.d.sync += [ptw_pptr.eq(pptr),
- vaddr.eq(self.dtlb_vaddr_i),
- tlb_update_asid.eq(self.asid_i),
- ]
- self.set_grant_state(m)
-
- def grant(self, m, tag_valid, data_rvalid):
- # we've got a data WAIT_GRANT so tell the
- # cache that the tag is valid
-
- # send a request out
- m.d.comb += self.req_port_o.data_req.eq(1)
- # wait for the WAIT_GRANT
- with m.If(self.req_port_i.data_gnt):
- # send the tag valid signal one cycle later
- m.d.sync += tag_valid.eq(1)
- # should we have flushed before we got an rvalid,
- # wait for it until going back to IDLE
- with m.If(self.flush_i):
- with m.If (~data_rvalid):
- m.next = "WAIT_RVALID"
- with m.Else():
- m.next = "IDLE"
- with m.Else():
- m.next = "PTE_LOOKUP"
-
- def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
- data_rvalid, global_mapping,
- is_instr_ptw, ptw_pptr):
- # temporaries
- pte_rx = Signal(reset_less=True)
- pte_exe = Signal(reset_less=True)
- pte_inv = Signal(reset_less=True)
- pte_a = Signal(reset_less=True)
- st_wd = Signal(reset_less=True)
- m.d.comb += [pte_rx.eq(pte.r | pte.x),
- pte_exe.eq(~pte.x | ~pte.a),
- pte_inv.eq(~pte.v | (~pte.r & pte.w)),
- pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
- st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
-
- l1err = Signal(reset_less=True)
- l2err = Signal(reset_less=True)
- m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)),
- l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ]
-
- # check if the global mapping bit is set
- with m.If (pte.g):
- m.d.sync += global_mapping.eq(1)
-
- m.next = "IDLE"
-
- # -------------
- # Invalid PTE
- # -------------
- # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
- # stop and raise a page-fault exception.
- with m.If (pte_inv):
- m.next = "PROPAGATE_ERROR"
-
- # -----------
- # Valid PTE
- # -----------
-
- # it is a valid PTE
- # if pte.r = 1 or pte.x = 1 it is a valid PTE
- with m.Elif (pte_rx):
- # Valid translation found (either 1G, 2M or 4K)
- with m.If(is_instr_ptw):
- # ------------
- # Update ITLB
- # ------------
- # If page not executable, we can directly raise error.
- # This doesn't put a useless entry into the TLB.
- # The same idea applies to the access flag since we let
- # the access flag be managed by SW.
- with m.If (pte_exe):
- m.next = "IDLE"
- with m.Else():
- m.d.comb += self.itlb_update_o.valid.eq(1)
-
- with m.Else():
- # ------------
- # Update DTLB
- # ------------
- # Check if the access flag has been set, otherwise
- # throw page-fault and let software handle those bits.
- # If page not readable (there are no write-only pages)
- # directly raise an error. This doesn't put a useless
- # entry into the TLB.
- with m.If(pte_a):
- m.d.comb += self.dtlb_update_o.valid.eq(1)
- with m.Else():
- m.next = "PROPAGATE_ERROR"
- # Request is a store: perform additional checks
- # If the request was a store and the page not
- # write-able, raise an error
- # the same applies if the dirty flag is not set
- with m.If (st_wd):
- m.d.comb += self.dtlb_update_o.valid.eq(0)
- m.next = "PROPAGATE_ERROR"
-
- # check if the ppn is correctly aligned: Case (6)
- with m.If(l1err | l2err):
- m.next = "PROPAGATE_ERROR"
- m.d.comb += [self.dtlb_update_o.valid.eq(0),
- self.itlb_update_o.valid.eq(0)]
-
- # this is a pointer to the next TLB level
- with m.Else():
- # pointer to next level of page table
- with m.If (ptw_lvl1):
- # we are in the second level now
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL2)
- ]
- with m.If(ptw_lvl2):
- # here we received a pointer to the third level
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL3)
- ]
- self.set_grant_state(m)
-
- with m.If (ptw_lvl3):
- # Should already be the last level
- # page table => Error
- m.d.sync += ptw_lvl.eq(LVL3)
- m.next = "PROPAGATE_ERROR"
-
-
-if __name__ == '__main__':
- ptw = PTW()
- vl = rtlil.convert(ptw, ports=ptw.ports())
- with open("test_ptw.il", "w") as f:
- f.write(vl)
+++ /dev/null
-"""
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 21.4.2017
-# Description: Translation Lookaside Buffer, SV39
-# fully set-associative
-
-Implementation in c++:
-https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp
-
-Text description:
-https://people.cs.clemson.edu/~mark/464/p_lru.txt
-
-Online simulator:
-http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html
-"""
-from math import log2
-from nmigen import Signal, Module, Cat, Const, Array
-from nmigen.cli import verilog, rtlil
-from nmigen.lib.coding import Encoder
-
-from ptw import TLBUpdate, PTE, ASID_WIDTH
-from plru import PLRU
-from tlb_content import TLBContent
-
-TLB_ENTRIES = 8
-
-class TLB:
- def __init__(self, tlb_entries=8, asid_width=8):
- self.tlb_entries = tlb_entries
- self.asid_width = asid_width
-
- self.flush_i = Signal() # Flush signal
- # Lookup signals
- self.lu_access_i = Signal()
- self.lu_asid_i = Signal(self.asid_width)
- self.lu_vaddr_i = Signal(64)
- self.lu_content_o = PTE()
- self.lu_is_2M_o = Signal()
- self.lu_is_1G_o = Signal()
- self.lu_hit_o = Signal()
- # Update TLB
- self.pte_width = len(self.lu_content_o.flatten())
- self.update_i = TLBUpdate(asid_width)
-
- def elaborate(self, platform):
- m = Module()
-
- vpn2 = Signal(9)
- vpn1 = Signal(9)
- vpn0 = Signal(9)
-
- #-------------
- # Translation
- #-------------
-
- # SV39 defines three levels of page tables
- m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]),
- vpn1.eq(self.lu_vaddr_i[21:30]),
- vpn2.eq(self.lu_vaddr_i[30:39]),
- ]
-
- tc = []
- for i in range(self.tlb_entries):
- tlc = TLBContent(self.pte_width, self.asid_width)
- setattr(m.submodules, "tc%d" % i, tlc)
- tc.append(tlc)
- # connect inputs
- tlc.update_i = self.update_i # saves a lot of graphviz links
- m.d.comb += [tlc.vpn0.eq(vpn0),
- tlc.vpn1.eq(vpn1),
- tlc.vpn2.eq(vpn2),
- tlc.flush_i.eq(self.flush_i),
- #tlc.update_i.eq(self.update_i),
- tlc.lu_asid_i.eq(self.lu_asid_i)]
- tc = Array(tc)
-
- #--------------
- # Select hit
- #--------------
-
- # use Encoder to select hit index
- # XXX TODO: assert that there's only one valid entry (one lu_hit)
- hitsel = Encoder(self.tlb_entries)
- m.submodules.hitsel = hitsel
-
- hits = []
- for i in range(self.tlb_entries):
- hits.append(tc[i].lu_hit_o)
- m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
- idx = hitsel.o
-
- active = Signal(reset_less=True)
- m.d.comb += active.eq(~hitsel.n)
- with m.If(active):
- # active hit, send selected as output
- m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
- self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
- self.lu_hit_o.eq(1),
- self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
- ]
-
- #--------------
- # PLRU.
- #--------------
-
- p = PLRU(self.tlb_entries)
- plru_tree = Signal(p.TLBSZ)
- m.submodules.plru = p
-
- # connect PLRU inputs/outputs
- # XXX TODO: assert that there's only one valid entry (one replace_en)
- en = []
- for i in range(self.tlb_entries):
- en.append(tc[i].replace_en_i)
- m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
- p.lu_hit.eq(hitsel.i),
- p.lu_access_i.eq(self.lu_access_i),
- p.plru_tree.eq(plru_tree)]
- m.d.sync += plru_tree.eq(p.plru_tree_o)
-
- #--------------
- # Sanity checks
- #--------------
-
- assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
- "TLB size must be a multiple of 2 and greater than 1"
- assert (self.asid_width >= 1), \
- "ASID width must be at least 1"
-
- return m
-
- """
- # Just for checking
- function int countSetBits(logic[self.tlb_entries-1:0] vector);
- automatic int count = 0;
- foreach (vector[idx]) begin
- count += vector[idx];
- end
- return count;
- endfunction
-
- assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
- else $error("More then one hit in TLB!"); $stop(); end
- assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
- else $error("More then one TLB entry selected for next replace!");
- """
-
- def ports(self):
- return [self.flush_i, self.lu_access_i,
- self.lu_asid_i, self.lu_vaddr_i,
- self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
- ] + self.lu_content_o.ports() + self.update_i.ports()
-
-if __name__ == '__main__':
- tlb = TLB()
- vl = rtlil.convert(tlb, ports=tlb.ports())
- with open("test_tlb.il", "w") as f:
- f.write(vl)
-
+++ /dev/null
-from nmigen import Signal, Module, Cat, Const
-
-from ptw import TLBUpdate, PTE
-
-class TLBEntry:
- def __init__(self, asid_width):
- self.asid = Signal(asid_width)
- # SV39 defines three levels of page tables
- self.vpn0 = Signal(9)
- self.vpn1 = Signal(9)
- self.vpn2 = Signal(9)
- self.is_2M = Signal()
- self.is_1G = Signal()
- self.valid = Signal()
-
- def flatten(self):
- return Cat(*self.ports())
-
- def eq(self, x):
- return self.flatten().eq(x.flatten())
-
- def ports(self):
- return [self.asid, self.vpn0, self.vpn1, self.vpn2,
- self.is_2M, self.is_1G, self.valid]
-
-class TLBContent:
- def __init__(self, pte_width, asid_width):
- self.asid_width = asid_width
- self.pte_width = pte_width
- self.flush_i = Signal() # Flush signal
- # Update TLB
- self.update_i = TLBUpdate(asid_width)
- self.vpn2 = Signal(9)
- self.vpn1 = Signal(9)
- self.vpn0 = Signal(9)
- self.replace_en_i = Signal() # replace the following entry,
- # set by replacement strategy
- # Lookup signals
- self.lu_asid_i = Signal(asid_width)
- self.lu_content_o = Signal(pte_width)
- self.lu_is_2M_o = Signal()
- self.lu_is_1G_o = Signal()
- self.lu_hit_o = Signal()
-
- def elaborate(self, platform):
- m = Module()
-
- tags = TLBEntry(self.asid_width)
- content = Signal(self.pte_width)
-
- m.d.comb += [self.lu_hit_o.eq(0),
- self.lu_is_2M_o.eq(0),
- self.lu_is_1G_o.eq(0)]
-
- # temporaries for 1st level match
- asid_ok = Signal(reset_less=True)
- vpn2_ok = Signal(reset_less=True)
- tags_ok = Signal(reset_less=True)
- vpn2_hit = Signal(reset_less=True)
- m.d.comb += [tags_ok.eq(tags.valid),
- asid_ok.eq(tags.asid == self.lu_asid_i),
- vpn2_ok.eq(tags.vpn2 == self.vpn2),
- vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)]
- # temporaries for 2nd level match
- vpn1_ok = Signal(reset_less=True)
- tags_2M = Signal(reset_less=True)
- vpn0_ok = Signal(reset_less=True)
- vpn0_or_2M = Signal(reset_less=True)
- m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1),
- tags_2M.eq(tags.is_2M),
- vpn0_ok.eq(self.vpn0 == tags.vpn0),
- vpn0_or_2M.eq(tags_2M | vpn0_ok)]
- # first level match, this may be a giga page,
- # check the ASID flags as well
- with m.If(vpn2_hit):
- # second level
- with m.If (tags.is_1G):
- m.d.comb += [ self.lu_content_o.eq(content),
- self.lu_is_1G_o.eq(1),
- self.lu_hit_o.eq(1),
- ]
- # not a giga page hit so check further
- with m.Elif(vpn1_ok):
- # this could be a 2 mega page hit or a 4 kB hit
- # output accordingly
- with m.If(vpn0_or_2M):
- m.d.comb += [ self.lu_content_o.eq(content),
- self.lu_is_2M_o.eq(tags.is_2M),
- self.lu_hit_o.eq(1),
- ]
- # ------------------
- # Update or Flush
- # ------------------
-
- # temporaries
- replace_valid = Signal(reset_less=True)
- m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
-
- # flush
- with m.If (self.flush_i):
- # invalidate (flush) conditions: all if zero or just this ASID
- with m.If (self.lu_asid_i == Const(0, self.asid_width) |
- (self.lu_asid_i == tags.asid)):
- m.d.sync += tags.valid.eq(0)
-
- # normal replacement
- with m.Elif(replace_valid):
- m.d.sync += [ # update tag array
- tags.asid.eq(self.update_i.asid),
- tags.vpn2.eq(self.update_i.vpn[18:27]),
- tags.vpn1.eq(self.update_i.vpn[9:18]),
- tags.vpn0.eq(self.update_i.vpn[0:9]),
- tags.is_1G.eq(self.update_i.is_1G),
- tags.is_2M.eq(self.update_i.is_2M),
- tags.valid.eq(1),
- # and content as well
- content.eq(self.update_i.content.flatten())
- ]
- return m
-
- def ports(self):
- return [self.flush_i,
- self.lu_asid_i,
- self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
- ] + self.update_i.content.ports() + self.update_i.ports()
+++ /dev/null
-import sys
-sys.path.append("../src")
-sys.path.append("../../../TestUtil")
-
-from plru import PLRU
-
-from nmigen.compat.sim import run_simulation
-
-def testbench(dut):
- yield
-
-if __name__ == "__main__":
- dut = PLRU(4)
- run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd")
- print("PLRU Unit Test Success")
\ No newline at end of file
+++ /dev/null
-import sys
-sys.path.append("../src")
-sys.path.append("../../../TestUtil")
-
-from nmigen.compat.sim import run_simulation
-
-from ptw import PTW, PTE
-
-
-def testbench(dut):
-
- addr = 0x8000000
-
- #pte = PTE()
- #yield pte.v.eq(1)
- #yield pte.r.eq(1)
-
- yield dut.req_port_i.data_gnt.eq(1)
- yield dut.req_port_i.data_rvalid.eq(1)
- yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten())
-
- # data lookup
- yield dut.en_ld_st_translation_i.eq(1)
- yield dut.asid_i.eq(1)
-
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000000)
-
- yield
- yield
- yield
-
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x200000)
-
- yield
- yield
- yield
-
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000011)
-
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
-
- # data lookup, PTW levels 1-2-3
- addr = 0x4000000
- yield dut.dtlb_vaddr_i.eq(addr)
- yield dut.mxr_i.eq(0x1)
- yield dut.req_port_i.data_gnt.eq(1)
- yield dut.req_port_i.data_rvalid.eq(1)
- yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten())
-
- yield dut.en_ld_st_translation_i.eq(1)
- yield dut.asid_i.eq(1)
-
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(addr)
-
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
-
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000011)
-
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
- yield
- yield
-
-
- # instruction lookup
- yield dut.en_ld_st_translation_i.eq(0)
- yield dut.enable_translation_i.eq(1)
- yield dut.asid_i.eq(1)
-
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x800000)
-
- yield
- yield
- yield
-
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x200000)
-
- yield
- yield
- yield
-
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x800011)
-
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
-
- yield
-
-
-
-if __name__ == "__main__":
- dut = PTW()
- run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd")
- print("PTW Unit Test Success")
+++ /dev/null
-import sys
-sys.path.append("../src")
-sys.path.append("../../../TestUtil")
-
-from nmigen.compat.sim import run_simulation
-
-from tlb import TLB
-
-def set_vaddr(addr):
- yield dut.lu_vaddr_i.eq(addr)
- yield dut.update_i.vpn.eq(addr>>12)
-
-
-def testbench(dut):
- yield dut.lu_access_i.eq(1)
- yield dut.lu_asid_i.eq(1)
- yield dut.update_i.valid.eq(1)
- yield dut.update_i.is_1G.eq(0)
- yield dut.update_i.is_2M.eq(0)
- yield dut.update_i.asid.eq(1)
- yield dut.update_i.content.ppn.eq(0)
- yield dut.update_i.content.rsw.eq(0)
- yield dut.update_i.content.r.eq(1)
-
- yield
-
- addr = 0x80000
- yield from set_vaddr(addr)
- yield
-
- addr = 0x90001
- yield from set_vaddr(addr)
- yield
-
- addr = 0x28000000
- yield from set_vaddr(addr)
- yield
-
- addr = 0x28000001
- yield from set_vaddr(addr)
-
- addr = 0x28000001
- yield from set_vaddr(addr)
- yield
-
- addr = 0x1000040000
- yield from set_vaddr(addr)
- yield
-
- addr = 0x1000040001
- yield from set_vaddr(addr)
- yield
-
- yield dut.update_i.is_1G.eq(1)
- addr = 0x2040000
- yield from set_vaddr(addr)
- yield
-
- yield dut.update_i.is_1G.eq(1)
- addr = 0x2040001
- yield from set_vaddr(addr)
- yield
-
- yield
-
-
-if __name__ == "__main__":
- dut = TLB()
- run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd")