+++ /dev/null
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.lib.coding import Encoder, PriorityEncoder
-class AddressEncoder():
- """Address Encoder
- The purpose of this module is to take in a vector and
- encode the bits that are one hot into an address. This module
- combines both nmigen's Encoder and PriorityEncoder and will state
- whether the input line has a single bit hot, multiple bits hot,
- or no bits hot. The output line will always have the lowest value
- address output.
- Usage:
- The output is valid when either single or multiple match is high.
- Otherwise output is 0.
- """
- def __init__(self, width):
- """ Arguments:
- * width: The desired length of the input vector
- """
- # Internal
- self.encoder = Encoder(width)
- self.p_encoder = PriorityEncoder(width)
- # Input
- self.i = Signal(width)
- # Output
- self.single_match = Signal(1)
- self.multiple_match = Signal(1)
- self.o = Signal(max=width)
- def elaborate(self, platform=None):
- m = Module()
- # Add internal submodules
- m.submodules.encoder = self.encoder
- m.submodules.p_encoder = self.p_encoder
- m.d.comb += [
- self.encoder.i.eq(self.i),
- self.p_encoder.i.eq(self.i)
- ]
- # Steps:
- # 1. check if the input vector is non-zero
- # 2. if non-zero, check if single match or multiple match
- # 3. set output line to be lowest value address output
- # If the priority encoder recieves an input of 0
- # If n is 1 then the output is not valid
- with m.If(self.p_encoder.n):
- m.d.comb += [
- self.single_match.eq(0),
- self.multiple_match.eq(0),
- self.o.eq(0)
- ]
- # If the priority encoder recieves an input > 0
- with m.Else():
- # Multiple Match if encoder n is invalid
- with m.If(self.encoder.n):
- m.d.comb += [
- self.single_match.eq(0),
- self.multiple_match.eq(1)
- ]
- # Single Match if encoder n is valid
- with m.Else():
- m.d.comb += [
- self.single_match.eq(1),
- self.multiple_match.eq(0)
- ]
- # Always set output based on priority encoder output
- m.d.comb += self.o.eq(self.p_encoder.o)
- return m
+++ /dev/null
-from nmigen import Array, Cat, Module, Signal
-from nmigen.lib.coding import Decoder
-from nmigen.cli import main #, verilog
-from CamEntry import CamEntry
-from AddressEncoder import AddressEncoder
-class Cam():
- """ Content Addressable Memory (CAM)
- The purpose of this module is to quickly look up whether an
- entry exists given a data key.
- This module will search for the given data in all internal entries
- and output whether a single or multiple match was found.
- If an single entry is found the address be returned and single_match
- is set HIGH. If multiple entries are found the lowest address is
- returned and multiple_match is set HIGH. If neither single_match or
- multiple_match are HIGH this implies no match was found. To write
- to the CAM set the address bus to the desired entry and set write_enable
- HIGH. Entry managment should be performed one level above this block
- as lookup is performed within.
- Notes:
- The read and write operations take one clock cycle to complete.
- Currently the read_warning line is present for interfacing but
- is not necessary for this design. This module is capable of writing
- in the first cycle, reading on the second, and output the correct
- address on the third.
- """
- def __init__(self, data_size, cam_size):
- """ Arguments:
- * data_size: (bits) The bit size of the data
- * cam_size: (number) The number of entries in the CAM
- """
- # Internal
- self.cam_size = cam_size
- self.encoder = AddressEncoder(cam_size)
- self.decoder = Decoder(cam_size)
- self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
- # Input
- self.enable = Signal(1)
- self.write_enable = Signal(1)
- self.data_in = Signal(data_size) # The data to be written
- self.data_mask = Signal(data_size) # mask for ternary writes
- self.address_in = Signal(max=cam_size) # address of CAM Entry to write
- # Output
- self.read_warning = Signal(1) # High when a read interrupts a write
- self.single_match = Signal(1) # High when there is only one match
- self.multiple_match = Signal(1) # High when there at least two matches
- self.match_address = Signal(max=cam_size) # The lowest address matched
- def elaborate(self, platform=None):
- m = Module()
- # AddressEncoder for match types and output address
- m.submodules.AddressEncoder = self.encoder
- # Decoder is used to select which entry will be written to
- m.submodules.Decoder = self.decoder
- # CamEntry Array Submodules
- # Note these area added anonymously
- entry_array = self.entry_array
- m.submodules += entry_array
- # Decoder logic
- m.d.comb += [
- self.decoder.i.eq(self.address_in),
- self.decoder.n.eq(0)
- ]
- encoder_vector = []
- with m.If(self.enable):
- # Set the key value for every CamEntry
- for index in range(self.cam_size):
- # Write Operation
- with m.If(self.write_enable):
- with m.If(self.decoder.o[index]):
- m.d.comb += entry_array[index].command.eq(2)
- with m.Else():
- m.d.comb += entry_array[index].command.eq(0)
- # Read Operation
- with m.Else():
- m.d.comb += entry_array[index].command.eq(1)
- # Send data input to all entries
- m.d.comb += entry_array[index].data_in.eq(self.data_in)
- # Send all entry matches to encoder
- ematch = entry_array[index].match
- encoder_vector.append(ematch)
- # Give input to and accept output from encoder module
- m.d.comb += [
- self.encoder.i.eq(Cat(*encoder_vector)),
- self.single_match.eq(self.encoder.single_match),
- self.multiple_match.eq(self.encoder.multiple_match),
- self.match_address.eq(self.encoder.o)
- ]
- # If the CAM is not enabled set all outputs to 0
- with m.Else():
- m.d.comb += [
- self.read_warning.eq(0),
- self.single_match.eq(0),
- self.multiple_match.eq(0),
- self.match_address.eq(0)
- ]
- return m
- def ports(self):
- return [self.enable, self.write_enable,
- self.data_in, self.data_mask,
- self.read_warning, self.single_match,
- self.multiple_match, self.match_address]
-if __name__ == '__main__':
- cam = Cam(4, 4)
- main(cam, ports=cam.ports())
+++ /dev/null
-from nmigen import Module, Signal
-class CamEntry:
- """ Content Addressable Memory (CAM) Entry
- The purpose of this module is to represent an entry within a CAM.
- This module when given a read command will compare the given data
- and output whether a match was found or not. When given a write
- command it will write the given data into internal registers.
- """
- def __init__(self, data_size):
- """ Arguments:
- * data_size: (bit count) The size of the data
- """
- # Input
- self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
- self.data_in = Signal(data_size) # Data input when writing
- # Output
- self.match = Signal(1) # Result of the internal/input key comparison
- self.data = Signal(data_size)
- def elaborate(self, platform=None):
- m = Module()
- with m.Switch(self.command):
- with m.Case("00"):
- m.d.sync += self.match.eq(0)
- with m.Case("01"):
- with m.If(self.data == self.data_in):
- m.d.sync += self.match.eq(1)
- with m.Else():
- m.d.sync += self.match.eq(0)
- with m.Case("10"):
- m.d.sync += [
- self.data.eq(self.data_in),
- self.match.eq(0)
- ]
- with m.Case():
- m.d.sync += [
- self.match.eq(0),
- self.data.eq(0)
- ]
- return m
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-from nmigen.cli import verilog, rtlil
-class LFSRPolynomial(set):
- """ implements a polynomial for use in LFSR
- """
- def __init__(self, exponents=()):
- for e in exponents:
- assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
- assert (e >= 0), ValueError("%d must not be negative" % e)
- set.__init__(self, set(exponents).union({0})) # must contain zero
- @property
- def max_exponent(self):
- return max(self) # derived from set, so this returns the max exponent
- @property
- def exponents(self):
- exponents = list(self) # get elements of set as a list
- exponents.sort(reverse=True)
- return exponents
- def __str__(self):
- expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
- retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
- return " + ".join(retval)
- def __repr__(self):
- return "LFSRPolynomial(%s)" % self.exponents
-# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa
-LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
-LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
-LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
-LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
-LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
-LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
-LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
-LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
-LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
-LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
-LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
-LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
-LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
-LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
-LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
-LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
-LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
-LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
-LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
-LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
-LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
-LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
-LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
-class LFSR(LFSRPolynomial, Elaboratable):
- """ implements a Linear Feedback Shift Register
- """
- def __init__(self, polynomial):
- """ Inputs:
- ------
- :polynomial: the polynomial to feedback on. may be a LFSRPolynomial
- instance or an iterable of ints (list/tuple/generator)
- :enable: enable (set LO to disable. NOTE: defaults to HI)
- Outputs:
- -------
- :state: the LFSR state. bitwidth is taken from the polynomial
- maximum exponent.
- Note: if an LFSRPolynomial is passed in as the input, because
- LFSRPolynomial is derived from set() it's ok:
- LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
- """
- LFSRPolynomial.__init__(self, polynomial)
- self.state = Signal(self.max_exponent, reset=1)
- self.enable = Signal(reset=1)
- def elaborate(self, platform):
- m = Module()
- # do absolutely nothing if the polynomial is empty (always has a zero)
- if self.max_exponent <= 1:
- return m
- # create XOR-bunch, select bits from state based on exponent
- feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
- for exponent in self:
- if exponent > 0: # don't have to skip, saves CPU cycles though
- feedback ^= self.state[exponent - 1]
- # if enabled, shift-and-feedback
- with m.If(self.enable):
- # shift up lower bits by Cat'ing in a new bit zero (feedback)
- newstate = Cat(feedback, self.state[:-1])
- m.d.sync += self.state.eq(newstate)
- return m
-# example: Poly24
-if __name__ == '__main__':
- p24 = rtlil.convert(LFSR(LFSR_POLY_24))
- with open("lfsr2_p24.il", "w") as f:
- f.write(p24)
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Module
-from typing import Iterable, Optional, Iterator, Any, Union
-from typing_extensions import final
-class LFSRPolynomial(set):
- def __init__(self, exponents: Iterable[int] = ()):
- def elements() -> Iterable[int]: ...
- @property
- def exponents(self) -> list[int]: ...
- def __str__(self) -> str: ...
- def __repr__(self) -> str: ...
-class LFSR:
- def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
- @property
- def width(self) -> int: ...
- def elaborate(self, platform: Any) -> Module: ...
+++ /dev/null
- python3 Cam.py generate -t v > Cam.v
+++ /dev/null
-from nmigen import Cat, Memory, Module, Signal, Elaboratable
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-class MemorySet(Elaboratable):
- def __init__(self, data_size, tag_size, set_count, active):
- self.active = active
- input_size = tag_size + data_size # Size of the input data
- memory_width = input_size + 1 # The width of the cache memory
- self.active = active
- self.data_size = data_size
- self.tag_size = tag_size
- # XXX TODO, use rd-enable and wr-enable?
- self.mem = Memory(memory_width, set_count)
- self.r = self.mem.read_port()
- self.w = self.mem.write_port()
- # inputs (address)
- self.cset = Signal(max=set_count) # The set to be checked
- self.tag = Signal(tag_size) # The tag to find
- self.data_i = Signal(data_size) # Incoming data
- # outputs
- self.valid = Signal()
- self.data_o = Signal(data_size) # Outgoing data (excludes tag)
- def elaborate(self, platform):
- m = Module()
- m.submodules.mem = self.mem
- m.submodules.r = self.r
- m.submodules.w = self.w
- # temporaries
- active_bit = Signal()
- tag_valid = Signal()
- data_start = self.active + 1
- data_end = data_start + self.data_size
- tag_start = data_end
- tag_end = tag_start + self.tag_size
- # connect the read port address to the set/entry
- read_port = self.r
- m.d.comb += read_port.addr.eq(self.cset)
- # Pull out active bit from data
- data = read_port.data
- m.d.comb += active_bit.eq(data[self.active])
- # Validate given tag vs stored tag
- tag = data[tag_start:tag_end]
- m.d.comb += tag_valid.eq(self.tag == tag)
- # An entry is only valid if the tags match AND
- # is marked as a valid entry
- m.d.comb += self.valid.eq(tag_valid & active_bit)
- # output data: TODO, check rd-enable?
- m.d.comb += self.data_o.eq(data[data_start:data_end])
- # connect the write port addr to the set/entry (only if write enabled)
- # (which is only done on a match, see SAC.write_entry below)
- write_port = self.w
- with m.If(write_port.en):
- m.d.comb += write_port.addr.eq(self.cset)
- m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
- return m
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.cli import main
-from PteEntry import PteEntry
-class PermissionValidator():
- """ The purpose of this Module is to check the Permissions of a given PTE
- against the requested access permissions.
- This module will either validate (by setting the valid bit HIGH)
- the request or find a permission fault and invalidate (by setting
- the valid bit LOW) the request
- """
- def __init__(self, asid_size, pte_size):
- """ Arguments:
- * asid_size: (bit count) The size of the asid to be processed
- * pte_size: (bit count) The size of the pte to be processed
- Return:
- * valid HIGH when permissions are correct
- """
- # Internal
- self.pte_entry = PteEntry(asid_size, pte_size)
- # Input
- self.data = Signal(asid_size + pte_size);
- self.xwr = Signal(3) # Execute, Write, Read
- self.super_mode = Signal(1) # Supervisor Mode
- self.super_access = Signal(1) # Supervisor Access
- self.asid = Signal(15) # Address Space IDentifier (ASID)
- # Output
- self.valid = Signal(1) # Denotes if the permissions are correct
- def elaborate(self, platform=None):
- m = Module()
- m.submodules.pte_entry = self.pte_entry
- m.d.comb += self.pte_entry.i.eq(self.data)
- # Check if the entry is valid
- with m.If(self.pte_entry.v):
- # ASID match or Global Permission
- # Note that the MSB bound is exclusive
- with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
- # Check Execute, Write, Read (XWR) Permissions
- with m.If(self.pte_entry.xwr == self.xwr):
- # Supervisor Logic
- with m.If(self.super_mode):
- # Valid if entry is not in user mode or supervisor
- # has Supervisor User Memory (SUM) access via the
- # SUM bit in the sstatus register
- m.d.comb += self.valid.eq((~self.pte_entry.u) \
- | self.super_access)
- # User logic
- with m.Else():
- # Valid if the entry is in user mode only
- m.d.comb += self.valid.eq(self.pte_entry.u)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- return m
\ No newline at end of file
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.cli import main
-class PteEntry():
- """ The purpose of this Module is to centralize the parsing of Page
- Table Entries (PTE) into one module to prevent common mistakes
- and duplication of code. The control bits are parsed out for
- ease of use.
- This module parses according to the standard PTE given by the
- Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
- The Address Space IDentifier (ASID) is appended to the MSB of the input
- and is parsed out as such.
- An valid input Signal would be:
- Bits:[78-64][63-0]
- The output PTE value will include the control bits.
- """
- def __init__(self, asid_size, pte_size):
- """ Arguments:
- * asid_size: (bit count) The size of the asid to be processed
- * pte_size: (bit count) The size of the pte to be processed
- Return:
- * d The Dirty bit from the PTE portion of i
- * a The Accessed bit from the PTE portion of i
- * g The Global bit from the PTE portion of i
- * u The User Mode bit from the PTE portion of i
- * xwr The Execute/Write/Read bit from the PTE portion of i
- * v The Valid bit from the PTE portion of i
- * asid The asid portion of i
- * pte The pte portion of i
- """
- # Internal
- self.asid_start = pte_size
- self.asid_end = pte_size + asid_size
- # Input
- self.i = Signal(asid_size + pte_size)
- # Output
- self.d = Signal(1) # Dirty bit (From pte)
- self.a = Signal(1) # Accessed bit (From pte)
- self.g = Signal(1) # Global Access (From pte)
- self.u = Signal(1) # User Mode (From pte)
- self.xwr = Signal(3) # Execute Read Write (From pte)
- self.v = Signal(1) # Valid (From pte)
- self.asid = Signal(asid_size) # Associated Address Space IDentifier
- self.pte = Signal(pte_size) # Full Page Table Entry
- def elaborate(self, platform=None):
- m = Module()
- # Pull out all control bites from PTE
- m.d.comb += [
- self.d.eq(self.i[7]),
- self.a.eq(self.i[6]),
- self.g.eq(self.i[5]),
- self.u.eq(self.i[4]),
- self.xwr.eq(self.i[1:4]),
- self.v.eq(self.i[0])
- ]
- m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
- m.d.comb += self.pte.eq(self.i[0:self.asid_start])
- return m
\ No newline at end of file
+++ /dev/null
-Online simulator of 4-way set-associative cache:
-Python simulator of a N-way set-associative cache:
-import sys
-from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
-from nmigen.compat.genlib import fsm
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-from AddressEncoder import AddressEncoder
-from MemorySet import MemorySet
-# TODO: use a LFSR that advances continuously and picking the bottom
-# few bits from it to select which cache line to replace, instead of PLRU
-# http://bugs.libre-riscv.org/show_bug.cgi?id=71
-from plru import PLRU
-from LFSR import LFSR, LFSR_POLY_24
-SA_NA = "00" # no action (none)
-SA_RD = "01" # read
-SA_WR = "10" # write
-class SetAssociativeCache(Elaboratable):
- """ Set Associative Cache Memory
- The purpose of this module is to generate a memory cache given the
- constraints passed in. This will create a n-way set associative cache.
- It is expected for the SV TLB that the VMA will provide the set number
- while the ASID provides the tag (still to be decided).
- """
- def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
- """ Arguments
- * tag_size (bits): The bit count of the tag
- * data_size (bits): The bit count of the data to be stored
- * set_count (number): The number of sets/entries in the cache
- * way_count (number): The number of slots a data can be stored
- in one set
- * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
- set/entry to write to. otherwise, use a PLRU
- """
- # Internals
- self.lfsr_mode = lfsr
- self.way_count = way_count # The number of slots in one set
- self.tag_size = tag_size # The bit count of the tag
- self.data_size = data_size # The bit count of the data to be stored
- # set up Memory array
- self.mem_array = Array() # memory array
- for i in range(way_count):
- ms = MemorySet(data_size, tag_size, set_count, active=0)
- self.mem_array.append(ms)
- # Finds valid entries
- self.encoder = AddressEncoder(way_count)
- # setup PLRU or LFSR
- if lfsr:
- # LFSR mode
- self.lfsr = LFSR(LFSR_POLY_24)
- else:
- # PLRU mode
- self.plru = PLRU(way_count) # One block to handle plru calculations
- self.plru_array = Array() # PLRU data on each set
- for i in range(set_count):
- name="plru%d" % i
- self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
- # Input
- self.enable = Signal(1) # Whether the cache is enabled
- self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX)
- self.cset = Signal(max=set_count) # The set to be checked
- self.tag = Signal(tag_size) # The tag to find
- self.data_i = Signal(data_size) # The input data
- # Output
- self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
- self.hit = Signal(1) # Tag matched one way in the given set
- self.multiple_hit = Signal(1) # Tag matched many ways in the given set
- self.data_o = Signal(data_size) # The data linked to the matched tag
- def check_tags(self, m):
- """ Validate the tags in the selected set. If one and only one
- tag matches set its state to zero and increment all others
- by one. We only advance to next state if a single hit is found.
- """
- # Vector to store way valid results
- # A zero denotes a way is invalid
- valid_vector = []
- # Loop through memory to prep read/write ports and set valid_vector
- for i in range(self.way_count):
- valid_vector.append(self.mem_array[i].valid)
- # Pass encoder the valid vector
- m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
- # Only one entry should be marked
- # This is due to already verifying the tags
- # matched and the valid bit is high
- with m.If(self.hit):
- m.next = "FINISHED_READ"
- # Pull out data from the read port
- data = self.mem_array[self.encoder.o].data_o
- m.d.comb += self.data_o.eq(data)
- if not self.lfsr_mode:
- self.access_plru(m)
- # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
- with m.Elif(self.multiple_hit):
- # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
- m.d.comb += self.data_o.eq(0)
- # No tag matches means no data
- with m.Else():
- # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
- m.d.comb += self.data_o.eq(0)
- def access_plru(self, m):
- """ An entry was accessed and the plru tree must now be updated
- """
- # Pull out the set's entry being edited
- plru_entry = self.plru_array[self.cset]
- m.d.comb += [
- # Set the plru data to the current state
- self.plru.plru_tree.eq(plru_entry),
- # Set that the cache was accessed
- self.plru.lu_access_i.eq(1)
- ]
- def read(self, m):
- """ Go through the read process of the cache.
- This takes two cycles to complete. First it checks for a valid tag
- and secondly it updates the LRU values.
- """
- with m.FSM() as fsm_read:
- with m.State("READY"):
- m.d.comb += self.ready.eq(0)
- # check_tags will set the state if the conditions are met
- self.check_tags(m)
- with m.State("FINISHED_READ"):
- m.next = "READY"
- m.d.comb += self.ready.eq(1)
- if not self.lfsr_mode:
- plru_tree_o = self.plru.plru_tree_o
- m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
- def write_entry(self, m):
- if not self.lfsr_mode:
- m.d.comb += [# set cset (mem address) into PLRU
- self.plru.plru_tree.eq(self.plru_array[self.cset]),
- # and connect plru to encoder for write
- self.encoder.i.eq(self.plru.replace_en_o)
- ]
- write_port = self.mem_array[self.encoder.o].w
- else:
- # use the LFSR to generate a random(ish) one of the mem array
- lfsr_output = Signal(max=self.way_count)
- lfsr_random = Signal(max=self.way_count)
- m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
- # address too big, limit to range of array
- m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
- lfsr_output - self.way_count,
- lfsr_output))
- write_port = self.mem_array[lfsr_random].w
- # then if there is a match from the encoder, enable the selected write
- with m.If(self.encoder.single_match):
- m.d.comb += write_port.en.eq(1)
- def write(self, m):
- """ Go through the write process of the cache.
- This takes two cycles to complete. First it writes the entry,
- and secondly it updates the PLRU (in plru mode)
- """
- with m.FSM() as fsm_write:
- with m.State("READY"):
- m.d.comb += self.ready.eq(0)
- self.write_entry(m)
- m.next ="FINISHED_WRITE"
- with m.State("FINISHED_WRITE"):
- m.d.comb += self.ready.eq(1)
- if not self.lfsr_mode:
- plru_entry = self.plru_array[self.cset]
- m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
- m.next = "READY"
- def elaborate(self, platform=None):
- m = Module()
- # ----
- # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
- # ----
- m.submodules.AddressEncoder = self.encoder
- if self.lfsr_mode:
- m.submodules.LFSR = self.lfsr
- else:
- m.submodules.PLRU = self.plru
- for i, mem in enumerate(self.mem_array):
- setattr(m.submodules, "mem%d" % i, mem)
- # ----
- # select mode: PLRU connect to encoder, LFSR do... something
- # ----
- if not self.lfsr_mode:
- # Set what entry was hit
- m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
- else:
- # enable LFSR
- m.d.comb += self.lfsr.enable.eq(self.enable)
- # ----
- # connect hit/multiple hit to encoder output
- # ----
- m.d.comb += [
- self.hit.eq(self.encoder.single_match),
- self.multiple_hit.eq(self.encoder.multiple_match),
- ]
- # ----
- # connect incoming data/tag/cset(addr) to mem_array
- # ----
- for mem in self.mem_array:
- write_port = mem.w
- m.d.comb += [mem.cset.eq(self.cset),
- mem.tag.eq(self.tag),
- mem.data_i.eq(self.data_i),
- write_port.en.eq(0), # default: disable write
- ]
- # ----
- # Commands: READ/WRITE/TODO
- # ----
- with m.If(self.enable):
- with m.Switch(self.command):
- # Search all sets at a particular tag
- with m.Case(SA_RD):
- self.read(m)
- with m.Case(SA_WR):
- self.write(m)
- # Maybe catch multiple tags write here?
- # TODO
- # TODO: invalidate/flush, flush-all?
- return m
- def ports(self):
- return [self.enable, self.command, self.cset, self.tag, self.data_i,
- self.ready, self.hit, self.multiple_hit, self.data_o]
-if __name__ == '__main__':
- sac = SetAssociativeCache(4, 8, 4, 6)
- vl = rtlil.convert(sac, ports=sac.ports())
- with open("SetAssociativeCache.il", "w") as f:
- f.write(vl)
- sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
- vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
- with open("SetAssociativeCacheLFSR.il", "w") as f:
- f.write(vl)
+++ /dev/null
-""" TLB Module
- The expected form of the data is:
- * Item (Bits)
- * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
-from nmigen import Memory, Module, Signal, Cat
-from nmigen.cli import main
-from PermissionValidator import PermissionValidator
-from Cam import Cam
-class TLB():
- def __init__(self, asid_size, vma_size, pte_size, L1_size):
- """ Arguments
- * asid_size: Address Space IDentifier (ASID) typically 15 bits
- * vma_size: Virtual Memory Address (VMA) typically 36 bits
- * pte_size: Page Table Entry (PTE) typically 64 bits
- Notes:
- These arguments should represent the largest possible size
- defined by the MODE settings. See
- Volume II: RISC-V Privileged Architectures V1.10 Page 57
- """
- # Internal
- self.state = 0
- # L1 Cache Modules
- L1_size = 8 # XXX overridden incoming argument?
- self.cam_L1 = Cam(vma_size, L1_size)
- self.mem_L1 = Memory(asid_size + pte_size, L1_size)
- # Permission Validator
- self.perm_validator = PermissionValidator(asid_size, pte_size)
- # Inputs
- self.supermode = Signal(1) # Supervisor Mode
- self.super_access = Signal(1) # Supervisor Access
- self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
- self.xwr = Signal(3) # Execute, Write, Read
- self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
- self.address_L1 = Signal(max=L1_size)
- self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
- self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
- self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
- # Outputs
- self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
- self.perm_valid = Signal(1) # Denotes if the permissions are correct
- self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
- def search(self, m, read_L1, write_L1):
- """ searches the TLB
- """
- m.d.comb += [
- write_L1.en.eq(0),
- self.cam_L1.write_enable.eq(0),
- self.cam_L1.data_in.eq(self.vma)
- ]
- # Match found in L1 CAM
- match_found = Signal(reset_less=True)
- m.d.comb += match_found.eq(self.cam_L1.single_match
- | self.cam_L1.multiple_match)
- with m.If(match_found):
- # Memory shortcut variables
- mem_address = self.cam_L1.match_address
- # Memory Logic
- m.d.comb += read_L1.addr.eq(mem_address)
- # Permission Validator Logic
- m.d.comb += [
- self.hit.eq(1),
- # Set permission validator data to the correct
- # register file data according to CAM match
- # address
- self.perm_validator.data.eq(read_L1.data),
- # Execute, Read, Write
- self.perm_validator.xwr.eq(self.xwr),
- # Supervisor Mode
- self.perm_validator.super_mode.eq(self.supermode),
- # Supverisor Access
- self.perm_validator.super_access.eq(self.super_access),
- # Address Space IDentifier (ASID)
- self.perm_validator.asid.eq(self.asid),
- # Output result of permission validation
- self.perm_valid.eq(self.perm_validator.valid)
- ]
- # Only output PTE if permissions are valid
- with m.If(self.perm_validator.valid):
- # XXX TODO - dummy for now
- reg_data = Signal.like(self.pte_out)
- m.d.comb += [
- self.pte_out.eq(reg_data)
- ]
- with m.Else():
- m.d.comb += [
- self.pte_out.eq(0)
- ]
- # Miss Logic
- with m.Else():
- m.d.comb += [
- self.hit.eq(0),
- self.perm_valid.eq(0),
- self.pte_out.eq(0)
- ]
- def write_l1(self, m, read_L1, write_L1):
- """ writes to the L1 cache
- """
- # Memory_L1 Logic
- m.d.comb += [
- write_L1.en.eq(1),
- write_L1.addr.eq(self.address_L1),
- # The Cat places arguments from LSB -> MSB
- write_L1.data.eq(Cat(self.pte_in, self.asid))
- ]
- # CAM_L1 Logic
- m.d.comb += [
- self.cam_L1.write_enable.eq(1),
- self.cam_L1.data_in.eq(self.vma),
- ]
- def elaborate(self, platform):
- m = Module()
- # Add submodules
- # Submodules for L1 Cache
- m.d.submodules.cam_L1 = self.cam_L1
- m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port()
- m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port()
- # Permission Validator Submodule
- m.d.submodules.perm_valididator = self.perm_validator
- # When MODE specifies translation
- # TODO add in different bit length handling ie prefix 0s
- tlb_enable = Signal(reset_less=True)
- m.d.comb += tlb_enable.eq(self.mode != 0)
- with m.If(tlb_enable):
- m.d.comb += [
- self.cam_L1.enable.eq(1)
- ]
- with m.Switch(self.command):
- # Search
- with m.Case("01"):
- self.search(m, read_L1, write_L1)
- # Write L1
- # Expected that the miss will be handled in software
- with m.Case("10"):
- self.write_l1(m, read_L1, write_L1)
- # TODO
- #with m.Case("11"):
- # When disabled
- with m.Else():
- m.d.comb += [
- self.cam_L1.enable.eq(0),
- # XXX TODO - self.reg_file.enable.eq(0),
- self.hit.eq(0),
- self.perm_valid.eq(0), # XXX TODO, check this
- self.pte_out.eq(0)
- ]
- return m
-if __name__ == '__main__':
- tlb = TLB(15, 36, 64, 4)
- main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command,
- tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
- tlb.vma, tlb.pte_in,
- tlb.hit, tlb.perm_valid, tlb.pte_out,
- ] + tlb.cam_L1.ports())
+++ /dev/null
-#include <cstdint>
-#include <iostream>
-#include <cmath>
-#define NWAY 4
-#define NLINE 256
-#define HIT 0
-#define MISS 1
-#define MS 1000
-Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
-Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
-four-way set associative - three bits
- each bit represents one branch point in a binary decision tree; let 1
- represent that the left side has been referenced more recently than the
- right side, and 0 vice-versa
- are all 4 lines valid?
- / \
- yes no, use an invalid line
- |
- |
- |
- bit_0 == 0? state | replace ref to | next state
- / \ ------+-------- -------+-----------
- y n 00x | line_0 line_0 | 11_
- / \ 01x | line_1 line_1 | 10_
- bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
- / \ / \ 1x1 | line_3 line_3 | 0_0
- y n y n
- / \ / \ ('x' means ('_' means unchanged)
- line_0 line_1 line_2 line_3 don't care)
- 8-way set associative - 7 = 1+2+4 bits
-16-way set associative - 15 = 1+2+4+8 bits
-32-way set associative - 31 = 1+2+4+8+16 bits
-64-way set associative - 63 = 1+2+4+8+16+32 bits
-using namespace std;
-struct AddressField {
- uint64_t wd_idx : 2;//Unused
- uint64_t offset : 4;//Unused
- uint64_t index : 8;//NLINE = 256 = 2^8
- uint64_t tag : 50;
-union Address {
- uint32_t* p;
- AddressField fields;
-struct Cell {
- bool v;
- uint64_t tag;
- Cell() : v(false), tag(0) {}
- bool isHit(uint64_t tag) {
- return v && (tag == this->tag);
- }
- void fetch(uint32_t* address) {
- Address addr;
- addr.p = address;
- addr.fields.offset = 0;
- addr.fields.wd_idx = 0;
- tag = addr.fields.tag;
- v = true;
- }
-ostream& operator<<(ostream & out, const Cell& cell) {
- out << " v:" << cell.v << " tag:" << hex << cell.tag;
- return out;
-struct Block {
- Cell cell[NWAY];
- uint32_t state;
- uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
- uint64_t *value;
- uint64_t *next_value;
- Block() : state(0) {
- switch (NWAY) {
- case 4:
- mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
- value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
- next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
- break;
- case 8:
- mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
- 0b1010001};
- value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
- 0b1010001};
- next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
- 0b0000001, 0b0000000};
- break;
- //TODO - more NWAY goes here.
- default:
- std::cout << "Error definition NWAY = " << NWAY << std::endl;
- }
- }
- uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
- for (int i = 0; i < NWAY; ++i) {
- if (cell[i].isHit(tag)) {
- *pway = i;
- return pway;
- }
- }
- return NULL;
- }
- void setLRU(uint32_t *address) {
- int way = 0;
- uint32_t st = state;
- for (int i = 0; i < NWAY; ++i) {
- if ((state & mask[i]) == value[i]) {
- state ^= mask[i];
- way = i;
- break;
- }
- }
- cell[way].fetch(address);
- cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
- }
- uint32_t *get(uint32_t *address, uint32_t *pway) {
- Address addr;
- addr.p = address;
- uint32_t *d = getByTag(addr.fields.tag, pway);
- if (d != NULL) {
- return &d[addr.fields.offset];
- }
- return d;
- }
- int set(uint32_t *address) {
- uint32_t way = 0;
- uint32_t *p = get(address, &way);
- if (p != NULL) {
- printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
- state &= ~mask[way];
- printf("%X --> ", state);
- state |= next_value[way];
- printf("%X\n", state);
- // *p = *address; //skip since address is fake.
- return HIT;
- } else {
- setLRU(address);
- return MISS;
- }
- }
-ostream& operator<<(ostream & out, const Block& block) {
- out << "state:" << block.state << " ";
- for (int i = 0; i<NWAY; i++) {
- out << block.cell[i];
- }
- return out;
-struct Cache {
- Block block[NLINE];
- uint32_t count[2];
- Cache() { count[HIT] = 0; count[MISS] = 0; }
- void access(uint32_t* address) {
- Address addr;
- addr.p = address;
- Block& b = block[addr.fields.index];
- ++count[b.set(address)];
- }
-ostream& operator<<(ostream & out, const Cache& cache) {
- out << "\n==Summary==\n\tHit: " << cache.count[HIT] << " Miss: " << cache.count[MISS] << std::endl;
- for (int i = 0; i < NLINE; i++) {
- out << cache.block[i] << endl;
- }
- return out;
-Cache cache;
-void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
- int x, i, j;
- for (i = 0; i < MS; i++) {
- for (j = 0; j < MS; j++) {
- cache.access(res + i*MS +j);
- for (x = 0; x < MS; x++) {
- cache.access(m1 + i*MS + x);
- cache.access(m2 + x*MS + j);
- cache.access(res + i*MS +j);
- // res[i][j] += m1[i][x] * m2[x][j];
- cache.access(res + i*MS +j);
- }
- }
- }
-int main()
- uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL; // fake virtual address; don’t access it
- uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL; // fake virtual address; don’t access it
- uint32_t* res = (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
- multiply(m1, m2, res);
- cout << cache << endl;
- return 0;
+++ /dev/null
-two-way set associative - one bit
- indicates which line of the two has been reference more recently
-four-way set associative - three bits
- each bit represents one branch point in a binary decision tree; let 1
- represent that the left side has been referenced more recently than the
- right side, and 0 vice-versa
- are all 4 lines valid?
- / \
- yes no, use an invalid line
- |
- |
- |
- bit_0 == 0? state | replace ref to | next state
- / \ ------+-------- -------+-----------
- y n 00x | line_0 line_0 | 11_
- / \ 01x | line_1 line_1 | 10_
- bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
- / \ / \ 1x1 | line_3 line_3 | 0_0
- y n y n
- / \ / \ ('x' means ('_' means unchanged)
- line_0 line_1 line_2 line_3 don't care)
- (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
- Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
-note that there is a 6-bit encoding for true LRU for four-way set associative
- bit 0: bank[1] more recently used than bank[0]
- bit 1: bank[2] more recently used than bank[0]
- bit 2: bank[2] more recently used than bank[1]
- bit 3: bank[3] more recently used than bank[0]
- bit 4: bank[3] more recently used than bank[1]
- bit 5: bank[3] more recently used than bank[2]
- this results in 24 valid bit patterns within the 64 possible bit patterns
- (4! possible valid traces for bank references)
- e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
- you can implement a state machine with a 256x6 ROM (6-bit state encoding
- appended with a 2-bit bank reference input will yield a new 6-bit state),
- and you can implement an LRU bank indicator with a 64x2 ROM
+++ /dev/null
-from nmigen import Const
-INSTR_ACCESS_FAULT = Const(1, 64)
-ILLEGAL_INSTR = Const(2, 64)
-BREAKPOINT = Const(3, 64)
-LD_ADDR_MISALIGNED = Const(4, 64)
-LD_ACCESS_FAULT = Const(5, 64)
-ST_ADDR_MISALIGNED = Const(6, 64)
-ST_ACCESS_FAULT = Const(7, 64)
-ENV_CALL_UMODE = Const(8, 64) # environment call from user mode
-ENV_CALL_SMODE = Const(9, 64) # environment call from supervisor mode
-ENV_CALL_MMODE = Const(11, 64) # environment call from machine mode
-INSTR_PAGE_FAULT = Const(12, 64) # Instruction page fault
-LOAD_PAGE_FAULT = Const(13, 64) # Load page fault
-STORE_PAGE_FAULT = Const(15, 64) # Store page fault
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-# Author: Florian Zaruba, ETH Zurich
-# Date: 19/04/2017
-# Description: Memory Management Unit for Ariane, contains TLB and
-# address translation unit. SV39 as defined in RISC-V
-# privilege specification 1.11-WIP
-import ariane_pkg::*;
-from nmigen import Const, Signal, Cat, Module, Mux
-from nmigen.cli import verilog, rtlil
-from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
-from tlb import TLB
-from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
-PRIV_LVL_M = Const(0b11, 2)
-PRIV_LVL_S = Const(0b01, 2)
-PRIV_LVL_U = Const(0b00, 2)
-class RVException:
- def __init__(self):
- self.cause = Signal(64) # cause of exception
- self.tval = Signal(64) # more info of causing exception
- # (e.g.: instruction causing it),
- # address of LD/ST fault
- self.valid = Signal()
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
- def __iter__(self):
- yield self.cause
- yield self.tval
- yield self.valid
- def ports(self):
- return list(self)
-class ICacheReqI:
- def __init__(self):
- self.fetch_valid = Signal() # address translation valid
- self.fetch_paddr = Signal(64) # physical address in
- self.fetch_exception = RVException() # exception occurred during fetch
- def __iter__(self):
- yield self.fetch_valid
- yield self.fetch_paddr
- yield from self.fetch_exception
- def ports(self):
- return list(self)
-class ICacheReqO:
- def __init__(self):
- self.fetch_req = Signal() # address translation request
- self.fetch_vaddr = Signal(64) # virtual address out
- def __iter__(self):
- yield self.fetch_req
- yield self.fetch_vaddr
- def ports(self):
- return list(self)
-class MMU:
- def __init__(self, instr_tlb_entries = 4,
- data_tlb_entries = 4,
- asid_width = 1):
- self.instr_tlb_entries = instr_tlb_entries
- self.data_tlb_entries = data_tlb_entries
- self.asid_width = asid_width
- self.flush_i = Signal()
- self.enable_translation_i = Signal()
- self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
- # IF interface
- self.icache_areq_i = ICacheReqO()
- self.icache_areq_o = ICacheReqI()
- # LSU interface
- # this is a more minimalistic interface because the actual addressing
- # logic is handled in the LSU as we distinguish load and stores,
- # what we do here is simple address translation
- self.misaligned_ex_i = RVException()
- self.lsu_req_i = Signal() # request address translation
- self.lsu_vaddr_i = Signal(64) # virtual address in
- self.lsu_is_store_i = Signal() # the translation is requested by a store
- # if we need to walk the page table we can't grant in the same cycle
- # Cycle 0
- self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
- # if translation hits in the DTLB
- # Cycle 1
- self.lsu_valid_o = Signal() # translation is valid
- self.lsu_paddr_o = Signal(64) # translated address
- self.lsu_exception_o = RVException() # addr translate threw exception
- # General control signals
- self.priv_lvl_i = Signal(2)
- self.ld_st_priv_lvl_i = Signal(2)
- self.sum_i = Signal()
- self.mxr_i = Signal()
- # input logic flag_mprv_i,
- self.satp_ppn_i = Signal(44)
- self.asid_i = Signal(self.asid_width)
- self.flush_tlb_i = Signal()
- # Performance counters
- self.itlb_miss_o = Signal()
- self.dtlb_miss_o = Signal()
- # PTW memory interface
- self.req_port_i = DCacheReqO()
- self.req_port_o = DCacheReqI()
- def elaborate(self, platform):
- m = Module()
- iaccess_err = Signal() # insufficient priv to access instr page
- daccess_err = Signal() # insufficient priv to access data page
- ptw_active = Signal() # PTW is currently walking a page table
- walking_instr = Signal() # PTW is walking because of an ITLB miss
- ptw_error = Signal() # PTW threw an exception
- update_vaddr = Signal(39)
- uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
- update_ptw_itlb = TLBUpdate(self.asid_width)
- update_ptw_dtlb = TLBUpdate(self.asid_width)
- itlb_lu_access = Signal()
- itlb_content = PTE()
- itlb_is_2M = Signal()
- itlb_is_1G = Signal()
- itlb_lu_hit = Signal()
- dtlb_lu_access = Signal()
- dtlb_content = PTE()
- dtlb_is_2M = Signal()
- dtlb_is_1G = Signal()
- dtlb_lu_hit = Signal()
- # Assignments
- m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
- dtlb_lu_access.eq(self.lsu_req_i)
- ]
- # ITLB
- m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
- self.asid_width)
- m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
- i_tlb.update_i.eq(update_ptw_itlb),
- i_tlb.lu_access_i.eq(itlb_lu_access),
- i_tlb.lu_asid_i.eq(self.asid_i),
- i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
- itlb_content.eq(i_tlb.lu_content_o),
- itlb_is_2M.eq(i_tlb.lu_is_2M_o),
- itlb_is_1G.eq(i_tlb.lu_is_1G_o),
- itlb_lu_hit.eq(i_tlb.lu_hit_o),
- ]
- # DTLB
- m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
- self.asid_width)
- m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
- d_tlb.update_i.eq(update_ptw_dtlb),
- d_tlb.lu_access_i.eq(dtlb_lu_access),
- d_tlb.lu_asid_i.eq(self.asid_i),
- d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
- dtlb_content.eq(d_tlb.lu_content_o),
- dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
- dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
- dtlb_lu_hit.eq(d_tlb.lu_hit_o),
- ]
- # PTW
- m.submodules.ptw = ptw = PTW(self.asid_width)
- m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
- walking_instr.eq(ptw.walking_instr_o),
- ptw_error.eq(ptw.ptw_error_o),
- ptw.enable_translation_i.eq(self.enable_translation_i),
- update_vaddr.eq(ptw.update_vaddr_o),
- update_ptw_itlb.eq(ptw.itlb_update_o),
- update_ptw_dtlb.eq(ptw.dtlb_update_o),
- ptw.itlb_access_i.eq(itlb_lu_access),
- ptw.itlb_hit_i.eq(itlb_lu_hit),
- ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
- ptw.dtlb_access_i.eq(dtlb_lu_access),
- ptw.dtlb_hit_i.eq(dtlb_lu_hit),
- ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
- ptw.req_port_i.eq(self.req_port_i),
- self.req_port_o.eq(ptw.req_port_o),
- ]
- # ila_1 i_ila_1 (
- # .clk(clk_i), # input wire clk
- # .probe0({req_port_o.address_tag, req_port_o.address_index}),
- # .probe1(req_port_o.data_req), # input wire [63:0] probe1
- # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2
- # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3
- # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4
- # .probe5(ptw_error), # input wire [1:0] probe5
- # .probe6(update_vaddr), # input wire [0:0] probe6
- # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7
- # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8
- # .probe9(dtlb_lu_access), # input wire [0:0] probe9
- # .probe10(lsu_vaddr_i), # input wire [0:0] probe10
- # .probe11(dtlb_lu_hit), # input wire [0:0] probe11
- # .probe12(itlb_lu_access), # input wire [0:0] probe12
- # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13
- # .probe14(itlb_lu_hit) # input wire [0:0] probe13
- # );
- #-----------------------
- # Instruction Interface
- #-----------------------
- # The instruction interface is a simple request response interface
- # MMU disabled: just pass through
- m.d.comb += [self.icache_areq_o.fetch_valid.eq(
- self.icache_areq_i.fetch_req),
- # play through in case we disabled address translation
- self.icache_areq_o.fetch_paddr.eq(
- self.icache_areq_i.fetch_vaddr)
- ]
- # two potential exception sources:
- # 1. HPTW threw an exception -> signal with a page fault exception
- # 2. We got an access error because of insufficient permissions ->
- # throw an access exception
- m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
- # Check whether we are allowed to access this memory region
- # from a fetch perspective
- # XXX TODO: use PermissionValidator instead [we like modules]
- m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
- (((self.priv_lvl_i == PRIV_LVL_U) & \
- ~itlb_content.u) | \
- ((self.priv_lvl_i == PRIV_LVL_S) & \
- itlb_content.u)))
- # MMU enabled: address from TLB, request delayed until hit.
- # Error when TLB hit and no access right or TLB hit and
- # translated address not valid (e.g. AXI decode error),
- # or when PTW performs walk due to ITLB miss and raises
- # an error.
- with m.If (self.enable_translation_i):
- # we work with SV39, so if VM is enabled, check that
- # all bits [63:38] are equal
- with m.If (self.icache_areq_i.fetch_req & \
- ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \
- (self.icache_areq_i.fetch_vaddr[38:64]) == 0)):
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
- fe.tval.eq(self.icache_areq_i.fetch_vaddr),
- fe.valid.eq(1)
- ]
- m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
- # 4K page
- paddr = Signal.like(self.icache_areq_o.fetch_paddr)
- paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
- itlb_content.ppn)
- m.d.comb += paddr.eq(paddr4k)
- # Mega page
- with m.If(itlb_is_2M):
- m.d.comb += paddr[12:21].eq(
- self.icache_areq_i.fetch_vaddr[12:21])
- # Giga page
- with m.If(itlb_is_1G):
- m.d.comb += paddr[12:30].eq(
- self.icache_areq_i.fetch_vaddr[12:30])
- m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
- # ---------
- # ITLB Hit
- # --------
- # if we hit the ITLB output the request signal immediately
- with m.If(itlb_lu_hit):
- m.d.comb += self.icache_areq_o.fetch_valid.eq(
- self.icache_areq_i.fetch_req)
- # we got an access error
- with m.If (iaccess_err):
- # throw a page fault
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
- fe.tval.eq(self.icache_areq_i.fetch_vaddr),
- fe.valid.eq(1)
- ]
- # ---------
- # ITLB Miss
- # ---------
- # watch out for exceptions happening during walking the page table
- with m.Elif(ptw_active & walking_instr):
- m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
- fe.tval.eq(uaddr64),
- fe.valid.eq(1)
- ]
- #-----------------------
- # Data Interface
- #-----------------------
- lsu_vaddr = Signal(64)
- dtlb_pte = PTE()
- misaligned_ex = RVException()
- lsu_req = Signal()
- lsu_is_store = Signal()
- dtlb_hit = Signal()
- dtlb_is_2M = Signal()
- dtlb_is_1G = Signal()
- # check if we need to do translation or if we are always
- # ready (e.g.: we are not translating anything)
- m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
- dtlb_lu_hit, 1))
- # The data interface is simpler and only consists of a
- # request/response interface
- m.d.comb += [
- # save request and DTLB response
- lsu_vaddr.eq(self.lsu_vaddr_i),
- lsu_req.eq(self.lsu_req_i),
- misaligned_ex.eq(self.misaligned_ex_i),
- dtlb_pte.eq(dtlb_content),
- dtlb_hit.eq(dtlb_lu_hit),
- lsu_is_store.eq(self.lsu_is_store_i),
- dtlb_is_2M.eq(dtlb_is_2M),
- dtlb_is_1G.eq(dtlb_is_1G),
- ]
- m.d.sync += [
- self.lsu_paddr_o.eq(lsu_vaddr),
- self.lsu_valid_o.eq(lsu_req),
- self.lsu_exception_o.eq(misaligned_ex),
- ]
- sverr = Signal()
- usrerr = Signal()
- m.d.comb += [
- # mute misaligned exceptions if there is no request
- # otherwise they will throw accidental exceptions
- misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
- # SUM is not set and we are trying to access a user
- # page in supervisor mode
- sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
- dtlb_pte.u),
- # this is not a user page but we are in user mode and
- # trying to access it
- usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
- # Check if the User flag is set, then we may only
- # access it in supervisor mode if SUM is enabled
- daccess_err.eq(sverr | usrerr),
- ]
- # translation is enabled and no misaligned exception occurred
- with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
- m.d.comb += lsu_req.eq(0)
- # 4K page
- paddr = Signal.like(lsu_vaddr)
- paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
- m.d.comb += paddr.eq(paddr4k)
- # Mega page
- with m.If(dtlb_is_2M):
- m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
- # Giga page
- with m.If(dtlb_is_1G):
- m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
- m.d.sync += self.lsu_paddr_o.eq(paddr)
- # ---------
- # DTLB Hit
- # --------
- with m.If(dtlb_hit & lsu_req):
- m.d.comb += lsu_req.eq(1)
- # this is a store
- with m.If (lsu_is_store):
- # check if the page is write-able and
- # we are not violating privileges
- # also check if the dirty flag is set
- with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
- le.tval.eq(lsu_vaddr),
- le.valid.eq(1)
- ]
- # this is a load, check for sufficient access
- # privileges - throw a page fault if necessary
- with m.Elif(daccess_err):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
- le.tval.eq(lsu_vaddr),
- le.valid.eq(1)
- ]
- # ---------
- # DTLB Miss
- # ---------
- # watch out for exceptions
- with m.Elif (ptw_active & ~walking_instr):
- # page table walker threw an exception
- with m.If (ptw_error):
- # an error makes the translation valid
- m.d.comb += lsu_req.eq(1)
- # the page table walker can only throw page faults
- with m.If (lsu_is_store):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
- le.tval.eq(uaddr64),
- le.valid.eq(1)
- ]
- with m.Else():
- m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
- le.tval.eq(uaddr64),
- le.valid.eq(1)
- ]
- return m
- def ports(self):
- return [self.flush_i, self.enable_translation_i,
- self.en_ld_st_translation_i,
- self.lsu_req_i,
- self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
- self.lsu_valid_o, self.lsu_paddr_o,
- self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
- self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
- self.itlb_miss_o, self.dtlb_miss_o] + \
- self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
- self.req_port_i.ports() + self.req_port_o.ports() + \
- self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
-if __name__ == '__main__':
- mmu = MMU()
- vl = rtlil.convert(mmu, ports=mmu.ports())
- with open("test_mmu.il", "w") as f:
- f.write(vl)
+++ /dev/null
-from nmigen import Signal, Module, Cat, Const
-from nmigen.hdl.ir import Elaboratable
-from math import log2
-from ptw import TLBUpdate, PTE, ASID_WIDTH
-class PLRU(Elaboratable):
- """ PLRU - Pseudo Least Recently Used Replacement
- PLRU-tree indexing:
- lvl0 0
- / \
- / \
- lvl1 1 2
- / \ / \
- lvl2 3 4 5 6
- / \ /\/\ /\
- ... ... ... ...
- """
- def __init__(self, entries):
- self.entries = entries
- self.lu_hit = Signal(entries)
- self.replace_en_o = Signal(entries)
- self.lu_access_i = Signal()
- # Tree (bit per entry)
- self.TLBSZ = 2*(self.entries-1)
- self.plru_tree = Signal(self.TLBSZ)
- self.plru_tree_o = Signal(self.TLBSZ)
- def elaborate(self, platform=None):
- m = Module()
- # Just predefine which nodes will be set/cleared
- # E.g. for a TLB with 8 entries, the for-loop is semantically
- # equivalent to the following pseudo-code:
- # unique case (1'b1)
- # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1};
- # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0};
- # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1};
- # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0};
- # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1};
- # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0};
- # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1};
- # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0};
- # default: begin /* No hit */ end
- # endcase
- LOG_TLB = int(log2(self.entries))
- print(LOG_TLB)
- for i in range(self.entries):
- # we got a hit so update the pointer as it was least recently used
- hit = Signal(reset_less=True)
- m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i)
- with m.If(hit):
- # Set the nodes to the values we would expect
- for lvl in range(LOG_TLB):
- idx_base = (1<<lvl)-1
- # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
- shift = LOG_TLB - lvl;
- new_idx = Const(~((i >> (shift-1)) & 1), (1, False))
- plru_idx = idx_base + (i >> shift)
- print ("plru", i, lvl, hex(idx_base),
- plru_idx, shift, new_idx)
- m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx)
- # Decode tree to write enable signals
- # Next for-loop basically creates the following logic for e.g.
- # an 8 entry TLB (note: pseudo-code obviously):
- # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1}
- # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0}
- # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1}
- # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0}
- # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1}
- # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0}
- # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1}
- # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0}
- # For each entry traverse the tree. If every tree-node matches
- # the corresponding bit of the entry's index, this is
- # the next entry to replace.
- replace = []
- for i in range(self.entries):
- en = []
- for lvl in range(LOG_TLB):
- idx_base = (1<<lvl)-1
- # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
- shift = LOG_TLB - lvl;
- new_idx = (i >> (shift-1)) & 1;
- plru_idx = idx_base + (i>>shift)
- plru = Signal(reset_less=True,
- name="plru-%d-%d-%d" % (i, lvl, plru_idx))
- m.d.comb += plru.eq(self.plru_tree[plru_idx])
- # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx;
- if new_idx:
- en.append(~plru) # yes inverted (using bool())
- else:
- en.append(plru) # yes inverted (using bool())
- print ("plru", i, en)
- # boolean logic manipulation:
- # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
- replace.append(~Cat(*en).bool())
- m.d.comb += self.replace_en_o.eq(Cat(*replace))
- return m
- def ports(self):
- return [self.entries, self.lu_hit, self.replace_en_o,
- self.lu_access_i, self.plru_tree, self.plru_tree_o]
\ No newline at end of file
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 24.4.2017
-# Description: Hardware-PTW
-/* verilator lint_off WIDTH */
-import ariane_pkg::*;
-see linux kernel source:
-* "arch/riscv/include/asm/page.h"
-* "arch/riscv/include/asm/mmu_context.h"
-* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
-from nmigen import Const, Signal, Cat, Module
-from nmigen.hdl.ast import ArrayProxy
-from nmigen.cli import verilog, rtlil
-from math import log2
-CONFIG_L1D_SIZE = 32*1024
-class DCacheReqI:
- def __init__(self):
- self.address_index = Signal(DCACHE_INDEX_WIDTH)
- self.address_tag = Signal(DCACHE_TAG_WIDTH)
- self.data_wdata = Signal(64)
- self.data_req = Signal()
- self.data_we = Signal()
- self.data_be = Signal(8)
- self.data_size = Signal(2)
- self.kill_req = Signal()
- self.tag_valid = Signal()
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
- def ports(self):
- return [self.address_index, self.address_tag,
- self.data_wdata, self.data_req,
- self.data_we, self.data_be, self.data_size,
- self.kill_req, self.tag_valid,
- ]
-class DCacheReqO:
- def __init__(self):
- self.data_gnt = Signal()
- self.data_rvalid = Signal()
- self.data_rdata = Signal(64) # actually in PTE object format
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
- def ports(self):
- return [self.data_gnt, self.data_rvalid, self.data_rdata]
-class PTE: #(RecordObject):
- def __init__(self):
- self.v = Signal()
- self.r = Signal()
- self.w = Signal()
- self.x = Signal()
- self.u = Signal()
- self.g = Signal()
- self.a = Signal()
- self.d = Signal()
- self.rsw = Signal(2)
- self.ppn = Signal(44)
- self.reserved = Signal(10)
- def flatten(self):
- return Cat(*self.ports())
- def eq(self, x):
- if isinstance(x, ArrayProxy):
- res = []
- for o in self.ports():
- i = getattr(x, o.name)
- res.append(i)
- x = Cat(*res)
- else:
- x = x.flatten()
- return self.flatten().eq(x)
- def __iter__(self):
- """ order is critical so that flatten creates LSB to MSB
- """
- yield self.v
- yield self.r
- yield self.w
- yield self.x
- yield self.u
- yield self.g
- yield self.a
- yield self.d
- yield self.rsw
- yield self.ppn
- yield self.reserved
- def ports(self):
- return list(self)
-class TLBUpdate:
- def __init__(self, asid_width):
- self.valid = Signal() # valid flag
- self.is_2M = Signal()
- self.is_1G = Signal()
- self.vpn = Signal(27)
- self.asid = Signal(asid_width)
- self.content = PTE()
- def flatten(self):
- return Cat(*self.ports())
- def eq(self, x):
- return self.flatten().eq(x.flatten())
- def ports(self):
- return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
- self.content.ports()
-# SV39 defines three levels of page tables
-LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
-LVL2 = Const(1, 2)
-LVL3 = Const(2, 2)
-class PTW:
- def __init__(self, asid_width=8):
- self.asid_width = asid_width
- self.flush_i = Signal() # flush everything, we need to do this because
- # actually everything we do is speculative at this stage
- # e.g.: there could be a CSR instruction that changes everything
- self.ptw_active_o = Signal(reset=1) # active if not IDLE
- self.walking_instr_o = Signal() # set when walking for TLB
- self.ptw_error_o = Signal() # set when an error occurred
- self.enable_translation_i = Signal() # CSRs indicate to enable SV39
- self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
- self.lsu_is_store_i = Signal() # translation triggered by store
- # PTW memory interface
- self.req_port_i = DCacheReqO()
- self.req_port_o = DCacheReqI()
- # to TLBs, update logic
- self.itlb_update_o = TLBUpdate(asid_width)
- self.dtlb_update_o = TLBUpdate(asid_width)
- self.update_vaddr_o = Signal(39)
- self.asid_i = Signal(self.asid_width)
- # from TLBs
- # did we miss?
- self.itlb_access_i = Signal()
- self.itlb_hit_i = Signal()
- self.itlb_vaddr_i = Signal(64)
- self.dtlb_access_i = Signal()
- self.dtlb_hit_i = Signal()
- self.dtlb_vaddr_i = Signal(64)
- # from CSR file
- self.satp_ppn_i = Signal(44) # ppn from satp
- self.mxr_i = Signal()
- # Performance counters
- self.itlb_miss_o = Signal()
- self.dtlb_miss_o = Signal()
- def ports(self):
- return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
- ]
- return [
- self.enable_translation_i, self.en_ld_st_translation_i,
- self.lsu_is_store_i, self.req_port_i, self.req_port_o,
- self.update_vaddr_o,
- self.asid_i,
- self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
- self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
- self.satp_ppn_i, self.mxr_i,
- self.itlb_miss_o, self.dtlb_miss_o
- ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
- def elaborate(self, platform):
- m = Module()
- # input registers
- data_rvalid = Signal()
- data_rdata = Signal(64)
- # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
- # is spec'd in 64-bit binary-format: better to spec as Record?
- pte = PTE()
- m.d.comb += pte.flatten().eq(data_rdata)
- # SV39 defines three levels of page tables
- ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
- ptw_lvl1 = Signal()
- ptw_lvl2 = Signal()
- ptw_lvl3 = Signal()
- m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
- ptw_lvl2.eq(ptw_lvl == LVL2),
- ptw_lvl3.eq(ptw_lvl == LVL3)]
- # is this an instruction page table walk?
- is_instr_ptw = Signal()
- global_mapping = Signal()
- # latched tag signal
- tag_valid = Signal()
- # register the ASID
- tlb_update_asid = Signal(self.asid_width)
- # register VPN we need to walk, SV39 defines a 39 bit virtual addr
- vaddr = Signal(64)
- # 4 byte aligned physical pointer
- ptw_pptr = Signal(56)
- m.d.sync += [
- # Assignments
- self.update_vaddr_o.eq(vaddr),
- self.walking_instr_o.eq(is_instr_ptw),
- # directly output the correct physical address
- self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
- self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
- # we are never going to kill this request
- self.req_port_o.kill_req.eq(0), # XXX assign comb?
- # we are never going to write with the HPTW
- self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
- # -----------
- # TLB Update
- # -----------
- self.itlb_update_o.vpn.eq(vaddr[12:39]),
- self.dtlb_update_o.vpn.eq(vaddr[12:39]),
- # update the correct page table level
- self.itlb_update_o.is_2M.eq(ptw_lvl2),
- self.itlb_update_o.is_1G.eq(ptw_lvl1),
- self.dtlb_update_o.is_2M.eq(ptw_lvl2),
- self.dtlb_update_o.is_1G.eq(ptw_lvl1),
- # output the correct ASID
- self.itlb_update_o.asid.eq(tlb_update_asid),
- self.dtlb_update_o.asid.eq(tlb_update_asid),
- # set the global mapping bit
- self.itlb_update_o.content.eq(pte),
- self.itlb_update_o.content.g.eq(global_mapping),
- self.dtlb_update_o.content.eq(pte),
- self.dtlb_update_o.content.g.eq(global_mapping),
- self.req_port_o.tag_valid.eq(tag_valid),
- ]
- #-------------------
- # Page table walker
- #-------------------
- # A virtual address va is translated into a physical address pa as
- # follows:
- # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
- # PAGESIZE=2^12 and LEVELS=3.)
- # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
- # (For Sv32, PTESIZE=4.)
- # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
- # access exception.
- # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
- # step 5. Otherwise, this PTE is a pointer to the next level of
- # the page table.
- # Let i=i-1. If i < 0, stop and raise an access exception.
- # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2.
- # 5. A leaf PTE has been found. Determine if the requested memory
- # access is allowed by the pte.r, pte.w, and pte.x bits. If not,
- # stop and raise an access exception. Otherwise, the translation is
- # successful. Set pte.a to 1, and, if the memory access is a
- # store, set pte.d to 1.
- # The translated physical address is given as follows:
- # - pa.pgoff = va.pgoff.
- # - If i > 0, then this is a superpage translation and
- # pa.ppn[i-1:0] = va.vpn[i-1:0].
- # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
- # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned
- # superpage stop and raise a page-fault exception.
- m.d.sync += tag_valid.eq(0)
- # default assignments
- m.d.comb += [
- # PTW memory interface
- self.req_port_o.data_req.eq(0),
- self.req_port_o.data_be.eq(Const(0xFF, 8)),
- self.req_port_o.data_size.eq(Const(0b11, 2)),
- self.req_port_o.data_we.eq(0),
- self.ptw_error_o.eq(0),
- self.itlb_update_o.valid.eq(0),
- self.dtlb_update_o.valid.eq(0),
- self.itlb_miss_o.eq(0),
- self.dtlb_miss_o.eq(0),
- ]
- # ------------
- # State Machine
- # ------------
- with m.FSM() as fsm:
- with m.State("IDLE"):
- self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
- ptw_pptr, vaddr, tlb_update_asid)
- with m.State("WAIT_GRANT"):
- self.grant(m, tag_valid, data_rvalid)
- with m.State("PTE_LOOKUP"):
- # we wait for the valid signal
- with m.If(data_rvalid):
- self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
- data_rvalid, global_mapping,
- is_instr_ptw, ptw_pptr)
- # Propagate error to MMU/LSU
- with m.State("PROPAGATE_ERROR"):
- m.next = "IDLE"
- m.d.comb += self.ptw_error_o.eq(1)
- # wait for the rvalid before going back to IDLE
- with m.State("WAIT_RVALID"):
- with m.If(data_rvalid):
- m.next = "IDLE"
- m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
- data_rvalid.eq(self.req_port_i.data_rvalid)
- ]
- return m
- def set_grant_state(self, m):
- # should we have flushed before we got an rvalid,
- # wait for it until going back to IDLE
- with m.If(self.flush_i):
- with m.If (self.req_port_i.data_gnt):
- m.next = "WAIT_RVALID"
- with m.Else():
- m.next = "IDLE"
- with m.Else():
- m.next = "WAIT_GRANT"
- def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
- ptw_pptr, vaddr, tlb_update_asid):
- # by default we start with the top-most page table
- m.d.sync += [is_instr_ptw.eq(0),
- ptw_lvl.eq(LVL1),
- global_mapping.eq(0),
- self.ptw_active_o.eq(0), # deactive (IDLE)
- ]
- # work out itlb/dtlb miss
- m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
- self.itlb_access_i & \
- ~self.itlb_hit_i & \
- ~self.dtlb_access_i)
- m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
- self.dtlb_access_i & \
- ~self.dtlb_hit_i)
- # we got an ITLB miss?
- with m.If(self.itlb_miss_o):
- pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39],
- self.satp_ppn_i)
- m.d.sync += [ptw_pptr.eq(pptr),
- is_instr_ptw.eq(1),
- vaddr.eq(self.itlb_vaddr_i),
- tlb_update_asid.eq(self.asid_i),
- ]
- self.set_grant_state(m)
- # we got a DTLB miss?
- with m.Elif(self.dtlb_miss_o):
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39],
- self.satp_ppn_i)
- m.d.sync += [ptw_pptr.eq(pptr),
- vaddr.eq(self.dtlb_vaddr_i),
- tlb_update_asid.eq(self.asid_i),
- ]
- self.set_grant_state(m)
- def grant(self, m, tag_valid, data_rvalid):
- # we've got a data WAIT_GRANT so tell the
- # cache that the tag is valid
- # send a request out
- m.d.comb += self.req_port_o.data_req.eq(1)
- # wait for the WAIT_GRANT
- with m.If(self.req_port_i.data_gnt):
- # send the tag valid signal one cycle later
- m.d.sync += tag_valid.eq(1)
- # should we have flushed before we got an rvalid,
- # wait for it until going back to IDLE
- with m.If(self.flush_i):
- with m.If (~data_rvalid):
- m.next = "WAIT_RVALID"
- with m.Else():
- m.next = "IDLE"
- with m.Else():
- m.next = "PTE_LOOKUP"
- def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
- data_rvalid, global_mapping,
- is_instr_ptw, ptw_pptr):
- # temporaries
- pte_rx = Signal(reset_less=True)
- pte_exe = Signal(reset_less=True)
- pte_inv = Signal(reset_less=True)
- pte_a = Signal(reset_less=True)
- st_wd = Signal(reset_less=True)
- m.d.comb += [pte_rx.eq(pte.r | pte.x),
- pte_exe.eq(~pte.x | ~pte.a),
- pte_inv.eq(~pte.v | (~pte.r & pte.w)),
- pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
- st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
- l1err = Signal(reset_less=True)
- l2err = Signal(reset_less=True)
- m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)),
- l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ]
- # check if the global mapping bit is set
- with m.If (pte.g):
- m.d.sync += global_mapping.eq(1)
- m.next = "IDLE"
- # -------------
- # Invalid PTE
- # -------------
- # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
- # stop and raise a page-fault exception.
- with m.If (pte_inv):
- m.next = "PROPAGATE_ERROR"
- # -----------
- # Valid PTE
- # -----------
- # it is a valid PTE
- # if pte.r = 1 or pte.x = 1 it is a valid PTE
- with m.Elif (pte_rx):
- # Valid translation found (either 1G, 2M or 4K)
- with m.If(is_instr_ptw):
- # ------------
- # Update ITLB
- # ------------
- # If page not executable, we can directly raise error.
- # This doesn't put a useless entry into the TLB.
- # The same idea applies to the access flag since we let
- # the access flag be managed by SW.
- with m.If (pte_exe):
- m.next = "IDLE"
- with m.Else():
- m.d.comb += self.itlb_update_o.valid.eq(1)
- with m.Else():
- # ------------
- # Update DTLB
- # ------------
- # Check if the access flag has been set, otherwise
- # throw page-fault and let software handle those bits.
- # If page not readable (there are no write-only pages)
- # directly raise an error. This doesn't put a useless
- # entry into the TLB.
- with m.If(pte_a):
- m.d.comb += self.dtlb_update_o.valid.eq(1)
- with m.Else():
- m.next = "PROPAGATE_ERROR"
- # Request is a store: perform additional checks
- # If the request was a store and the page not
- # write-able, raise an error
- # the same applies if the dirty flag is not set
- with m.If (st_wd):
- m.d.comb += self.dtlb_update_o.valid.eq(0)
- m.next = "PROPAGATE_ERROR"
- # check if the ppn is correctly aligned: Case (6)
- with m.If(l1err | l2err):
- m.next = "PROPAGATE_ERROR"
- m.d.comb += [self.dtlb_update_o.valid.eq(0),
- self.itlb_update_o.valid.eq(0)]
- # this is a pointer to the next TLB level
- with m.Else():
- # pointer to next level of page table
- with m.If (ptw_lvl1):
- # we are in the second level now
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL2)
- ]
- with m.If(ptw_lvl2):
- # here we received a pointer to the third level
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL3)
- ]
- self.set_grant_state(m)
- with m.If (ptw_lvl3):
- # Should already be the last level
- # page table => Error
- m.d.sync += ptw_lvl.eq(LVL3)
- m.next = "PROPAGATE_ERROR"
-if __name__ == '__main__':
- ptw = PTW()
- vl = rtlil.convert(ptw, ports=ptw.ports())
- with open("test_ptw.il", "w") as f:
- f.write(vl)
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 21.4.2017
-# Description: Translation Lookaside Buffer, SV39
-# fully set-associative
-Implementation in c++:
-Text description:
-Online simulator:
-from math import log2
-from nmigen import Signal, Module, Cat, Const, Array
-from nmigen.cli import verilog, rtlil
-from nmigen.lib.coding import Encoder
-from ptw import TLBUpdate, PTE, ASID_WIDTH
-from plru import PLRU
-from tlb_content import TLBContent
-class TLB:
- def __init__(self, tlb_entries=8, asid_width=8):
- self.tlb_entries = tlb_entries
- self.asid_width = asid_width
- self.flush_i = Signal() # Flush signal
- # Lookup signals
- self.lu_access_i = Signal()
- self.lu_asid_i = Signal(self.asid_width)
- self.lu_vaddr_i = Signal(64)
- self.lu_content_o = PTE()
- self.lu_is_2M_o = Signal()
- self.lu_is_1G_o = Signal()
- self.lu_hit_o = Signal()
- # Update TLB
- self.pte_width = len(self.lu_content_o.flatten())
- self.update_i = TLBUpdate(asid_width)
- def elaborate(self, platform):
- m = Module()
- vpn2 = Signal(9)
- vpn1 = Signal(9)
- vpn0 = Signal(9)
- #-------------
- # Translation
- #-------------
- # SV39 defines three levels of page tables
- m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]),
- vpn1.eq(self.lu_vaddr_i[21:30]),
- vpn2.eq(self.lu_vaddr_i[30:39]),
- ]
- tc = []
- for i in range(self.tlb_entries):
- tlc = TLBContent(self.pte_width, self.asid_width)
- setattr(m.submodules, "tc%d" % i, tlc)
- tc.append(tlc)
- # connect inputs
- tlc.update_i = self.update_i # saves a lot of graphviz links
- m.d.comb += [tlc.vpn0.eq(vpn0),
- tlc.vpn1.eq(vpn1),
- tlc.vpn2.eq(vpn2),
- tlc.flush_i.eq(self.flush_i),
- #tlc.update_i.eq(self.update_i),
- tlc.lu_asid_i.eq(self.lu_asid_i)]
- tc = Array(tc)
- #--------------
- # Select hit
- #--------------
- # use Encoder to select hit index
- # XXX TODO: assert that there's only one valid entry (one lu_hit)
- hitsel = Encoder(self.tlb_entries)
- m.submodules.hitsel = hitsel
- hits = []
- for i in range(self.tlb_entries):
- hits.append(tc[i].lu_hit_o)
- m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
- idx = hitsel.o
- active = Signal(reset_less=True)
- m.d.comb += active.eq(~hitsel.n)
- with m.If(active):
- # active hit, send selected as output
- m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
- self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
- self.lu_hit_o.eq(1),
- self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
- ]
- #--------------
- # PLRU.
- #--------------
- p = PLRU(self.tlb_entries)
- plru_tree = Signal(p.TLBSZ)
- m.submodules.plru = p
- # connect PLRU inputs/outputs
- # XXX TODO: assert that there's only one valid entry (one replace_en)
- en = []
- for i in range(self.tlb_entries):
- en.append(tc[i].replace_en_i)
- m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
- p.lu_hit.eq(hitsel.i),
- p.lu_access_i.eq(self.lu_access_i),
- p.plru_tree.eq(plru_tree)]
- m.d.sync += plru_tree.eq(p.plru_tree_o)
- #--------------
- # Sanity checks
- #--------------
- assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
- "TLB size must be a multiple of 2 and greater than 1"
- assert (self.asid_width >= 1), \
- "ASID width must be at least 1"
- return m
- """
- # Just for checking
- function int countSetBits(logic[self.tlb_entries-1:0] vector);
- automatic int count = 0;
- foreach (vector[idx]) begin
- count += vector[idx];
- end
- return count;
- endfunction
- assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
- else $error("More then one hit in TLB!"); $stop(); end
- assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
- else $error("More then one TLB entry selected for next replace!");
- """
- def ports(self):
- return [self.flush_i, self.lu_access_i,
- self.lu_asid_i, self.lu_vaddr_i,
- self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
- ] + self.lu_content_o.ports() + self.update_i.ports()
-if __name__ == '__main__':
- tlb = TLB()
- vl = rtlil.convert(tlb, ports=tlb.ports())
- with open("test_tlb.il", "w") as f:
- f.write(vl)
+++ /dev/null
-from nmigen import Signal, Module, Cat, Const
-from ptw import TLBUpdate, PTE
-class TLBEntry:
- def __init__(self, asid_width):
- self.asid = Signal(asid_width)
- # SV39 defines three levels of page tables
- self.vpn0 = Signal(9)
- self.vpn1 = Signal(9)
- self.vpn2 = Signal(9)
- self.is_2M = Signal()
- self.is_1G = Signal()
- self.valid = Signal()
- def flatten(self):
- return Cat(*self.ports())
- def eq(self, x):
- return self.flatten().eq(x.flatten())
- def ports(self):
- return [self.asid, self.vpn0, self.vpn1, self.vpn2,
- self.is_2M, self.is_1G, self.valid]
-class TLBContent:
- def __init__(self, pte_width, asid_width):
- self.asid_width = asid_width
- self.pte_width = pte_width
- self.flush_i = Signal() # Flush signal
- # Update TLB
- self.update_i = TLBUpdate(asid_width)
- self.vpn2 = Signal(9)
- self.vpn1 = Signal(9)
- self.vpn0 = Signal(9)
- self.replace_en_i = Signal() # replace the following entry,
- # set by replacement strategy
- # Lookup signals
- self.lu_asid_i = Signal(asid_width)
- self.lu_content_o = Signal(pte_width)
- self.lu_is_2M_o = Signal()
- self.lu_is_1G_o = Signal()
- self.lu_hit_o = Signal()
- def elaborate(self, platform):
- m = Module()
- tags = TLBEntry(self.asid_width)
- content = Signal(self.pte_width)
- m.d.comb += [self.lu_hit_o.eq(0),
- self.lu_is_2M_o.eq(0),
- self.lu_is_1G_o.eq(0)]
- # temporaries for 1st level match
- asid_ok = Signal(reset_less=True)
- vpn2_ok = Signal(reset_less=True)
- tags_ok = Signal(reset_less=True)
- vpn2_hit = Signal(reset_less=True)
- m.d.comb += [tags_ok.eq(tags.valid),
- asid_ok.eq(tags.asid == self.lu_asid_i),
- vpn2_ok.eq(tags.vpn2 == self.vpn2),
- vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)]
- # temporaries for 2nd level match
- vpn1_ok = Signal(reset_less=True)
- tags_2M = Signal(reset_less=True)
- vpn0_ok = Signal(reset_less=True)
- vpn0_or_2M = Signal(reset_less=True)
- m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1),
- tags_2M.eq(tags.is_2M),
- vpn0_ok.eq(self.vpn0 == tags.vpn0),
- vpn0_or_2M.eq(tags_2M | vpn0_ok)]
- # first level match, this may be a giga page,
- # check the ASID flags as well
- with m.If(vpn2_hit):
- # second level
- with m.If (tags.is_1G):
- m.d.comb += [ self.lu_content_o.eq(content),
- self.lu_is_1G_o.eq(1),
- self.lu_hit_o.eq(1),
- ]
- # not a giga page hit so check further
- with m.Elif(vpn1_ok):
- # this could be a 2 mega page hit or a 4 kB hit
- # output accordingly
- with m.If(vpn0_or_2M):
- m.d.comb += [ self.lu_content_o.eq(content),
- self.lu_is_2M_o.eq(tags.is_2M),
- self.lu_hit_o.eq(1),
- ]
- # ------------------
- # Update or Flush
- # ------------------
- # temporaries
- replace_valid = Signal(reset_less=True)
- m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
- # flush
- with m.If (self.flush_i):
- # invalidate (flush) conditions: all if zero or just this ASID
- with m.If (self.lu_asid_i == Const(0, self.asid_width) |
- (self.lu_asid_i == tags.asid)):
- m.d.sync += tags.valid.eq(0)
- # normal replacement
- with m.Elif(replace_valid):
- m.d.sync += [ # update tag array
- tags.asid.eq(self.update_i.asid),
- tags.vpn2.eq(self.update_i.vpn[18:27]),
- tags.vpn1.eq(self.update_i.vpn[9:18]),
- tags.vpn0.eq(self.update_i.vpn[0:9]),
- tags.is_1G.eq(self.update_i.is_1G),
- tags.is_2M.eq(self.update_i.is_2M),
- tags.valid.eq(1),
- # and content as well
- content.eq(self.update_i.content.flatten())
- ]
- return m
- def ports(self):
- return [self.flush_i,
- self.lu_asid_i,
- self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
- ] + self.update_i.content.ports() + self.update_i.ports()
+++ /dev/null
-import sys
-from plru import PLRU
-from nmigen.compat.sim import run_simulation
-def testbench(dut):
- yield
-if __name__ == "__main__":
- dut = PLRU(4)
- run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd")
- print("PLRU Unit Test Success")
\ No newline at end of file
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from ptw import PTW, PTE
-def testbench(dut):
- addr = 0x8000000
- #pte = PTE()
- #yield pte.v.eq(1)
- #yield pte.r.eq(1)
- yield dut.req_port_i.data_gnt.eq(1)
- yield dut.req_port_i.data_rvalid.eq(1)
- yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten())
- # data lookup
- yield dut.en_ld_st_translation_i.eq(1)
- yield dut.asid_i.eq(1)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000000)
- yield
- yield
- yield
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x200000)
- yield
- yield
- yield
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000011)
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
- # data lookup, PTW levels 1-2-3
- addr = 0x4000000
- yield dut.dtlb_vaddr_i.eq(addr)
- yield dut.mxr_i.eq(0x1)
- yield dut.req_port_i.data_gnt.eq(1)
- yield dut.req_port_i.data_rvalid.eq(1)
- yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten())
- yield dut.en_ld_st_translation_i.eq(1)
- yield dut.asid_i.eq(1)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(addr)
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000011)
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
- yield
- yield
- # instruction lookup
- yield dut.en_ld_st_translation_i.eq(0)
- yield dut.enable_translation_i.eq(1)
- yield dut.asid_i.eq(1)
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x800000)
- yield
- yield
- yield
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x200000)
- yield
- yield
- yield
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x800011)
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
- yield
-if __name__ == "__main__":
- dut = PTW()
- run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd")
- print("PTW Unit Test Success")
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from tlb import TLB
-def set_vaddr(addr):
- yield dut.lu_vaddr_i.eq(addr)
- yield dut.update_i.vpn.eq(addr>>12)
-def testbench(dut):
- yield dut.lu_access_i.eq(1)
- yield dut.lu_asid_i.eq(1)
- yield dut.update_i.valid.eq(1)
- yield dut.update_i.is_1G.eq(0)
- yield dut.update_i.is_2M.eq(0)
- yield dut.update_i.asid.eq(1)
- yield dut.update_i.content.ppn.eq(0)
- yield dut.update_i.content.rsw.eq(0)
- yield dut.update_i.content.r.eq(1)
- yield
- addr = 0x80000
- yield from set_vaddr(addr)
- yield
- addr = 0x90001
- yield from set_vaddr(addr)
- yield
- addr = 0x28000000
- yield from set_vaddr(addr)
- yield
- addr = 0x28000001
- yield from set_vaddr(addr)
- addr = 0x28000001
- yield from set_vaddr(addr)
- yield
- addr = 0x1000040000
- yield from set_vaddr(addr)
- yield
- addr = 0x1000040001
- yield from set_vaddr(addr)
- yield
- yield dut.update_i.is_1G.eq(1)
- addr = 0x2040000
- yield from set_vaddr(addr)
- yield
- yield dut.update_i.is_1G.eq(1)
- addr = 0x2040001
- yield from set_vaddr(addr)
- yield
- yield
-if __name__ == "__main__":
- dut = TLB()
- run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd")
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-import sys
-from LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3
-from nmigen.back.pysim import Simulator, Delay, Tick
-import unittest
-class TestLFSR(unittest.TestCase):
- def test_poly(self):
- v = LFSRPolynomial()
- self.assertEqual(repr(v), "LFSRPolynomial([0])")
- self.assertEqual(str(v), "1")
- v = LFSRPolynomial([1])
- self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
- self.assertEqual(str(v), "x + 1")
- v = LFSRPolynomial([0, 1])
- self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
- self.assertEqual(str(v), "x + 1")
- v = LFSRPolynomial([1, 2])
- self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])")
- self.assertEqual(str(v), "x^2 + x + 1")
- v = LFSRPolynomial([2])
- self.assertEqual(repr(v), "LFSRPolynomial([2, 0])")
- self.assertEqual(str(v), "x^2 + 1")
- self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1")
- def test_lfsr_3(self):
- module = LFSR(LFSR_POLY_3)
- traces = [module.state, module.enable]
- with Simulator(module,
- vcd_file=open("Waveforms/test_LFSR2.vcd", "w"),
- gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"),
- traces=traces) as sim:
- sim.add_clock(1e-6, 0.25e-6)
- delay = Delay(1e-7)
- def async_process():
- yield module.enable.eq(0)
- yield Tick()
- self.assertEqual((yield module.state), 0x1)
- yield Tick()
- self.assertEqual((yield module.state), 0x1)
- yield module.enable.eq(1)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x2)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x5)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x3)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x7)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x6)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x4)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x1)
- yield Tick()
- sim.add_process(async_process)
- sim.run()
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from AddressEncoder import AddressEncoder
-from test_helper import assert_eq, assert_ne, assert_op
-# This function allows for the easy setting of values to the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# i (Input): The array of single bits to be written
-def set_encoder(dut, i):
- yield dut.i.eq(i)
- yield
-# Checks the single match of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# sm (Single Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_single_match(dut, sm, op):
- out_sm = yield dut.single_match
- assert_op("Single Match", out_sm, sm, op)
-# Checks the multiple match of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# mm (Multiple Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_multiple_match(dut, mm, op):
- out_mm = yield dut.multiple_match
- assert_op("Multiple Match", out_mm, mm, op)
-# Checks the output of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# o (Output): The expected output
-# op (Operation): (0 => ==), (1 => !=)
-def check_output(dut, o, op):
- out_o = yield dut.o
- assert_op("Output", out_o, o, op)
-# Checks the state of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# sm (Single Match): The expected match result
-# mm (Multiple Match): The expected match result
-# o (Output): The expected output
-# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-def check_all(dut, sm, mm, o, sm_op, mm_op, o_op):
- yield from check_single_match(dut, sm, sm_op)
- yield from check_multiple_match(dut, mm, mm_op)
- yield from check_output(dut, o, o_op)
-def testbench(dut):
- # Check invalid input
- in_val = 0b000
- single_match = 0
- multiple_match = 0
- output = 0
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
- # Check single bit
- in_val = 0b001
- single_match = 1
- multiple_match = 0
- output = 0
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
- # Check another single bit
- in_val = 0b100
- single_match = 1
- multiple_match = 0
- output = 2
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
- # Check multiple match
- # We expected the lowest bit to be returned which is address 0
- in_val = 0b101
- single_match = 0
- multiple_match = 1
- output = 0
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
- # Check another multiple match
- # We expected the lowest bit to be returned which is address 1
- in_val = 0b110
- single_match = 0
- multiple_match = 1
- output = 1
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
-if __name__ == "__main__":
- dut = AddressEncoder(4)
- run_simulation(dut, testbench(dut),
- vcd_name="Waveforms/test_address_encoder.vcd")
- print("AddressEncoder Unit Test Success")
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from Cam import Cam
-from test_helper import assert_eq, assert_ne, assert_op
-# This function allows for the easy setting of values to the Cam
-# Arguments:
-# dut: The Cam being tested
-# e (Enable): Whether the block is going to be enabled
-# we (Write Enable): Whether the Cam will write on the next cycle
-# a (Address): Where the data will be written if write enable is high
-# d (Data): Either what we are looking for or will write to the address
-def set_cam(dut, e, we, a, d):
- yield dut.enable.eq(e)
- yield dut.write_enable.eq(we)
- yield dut.address_in.eq(a)
- yield dut.data_in.eq(d)
- yield
-# Checks the multiple match of the Cam
-# Arguments:
-# dut: The Cam being tested
-# mm (Multiple Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_multiple_match(dut, mm, op):
- out_mm = yield dut.multiple_match
- assert_op("Multiple Match", out_mm, mm, op)
-# Checks the single match of the Cam
-# Arguments:
-# dut: The Cam being tested
-# sm (Single Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_single_match(dut, sm, op):
- out_sm = yield dut.single_match
- assert_op("Single Match", out_sm, sm, op)
-# Checks the address output of the Cam
-# Arguments:
-# dut: The Cam being tested
-# ma (Match Address): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_match_address(dut, ma, op):
- out_ma = yield dut.match_address
- assert_op("Match Address", out_ma, ma, op)
-# Checks the state of the Cam
-# Arguments:
-# dut: The Cam being tested
-# sm (Single Match): The expected match result
-# mm (Multiple Match): The expected match result
-# ma: (Match Address): The expected address output
-# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=)
-def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op):
- yield from check_multiple_match(dut, mm, mm_op)
- yield from check_single_match(dut, sm, sm_op)
- yield from check_match_address(dut, ma, ma_op)
-def testbench(dut):
- # NA
- enable = 0
- write_enable = 0
- address = 0
- data = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Read Miss Multiple
- # Note that the default starting entry data bits are all 0
- enable = 1
- write_enable = 0
- address = 0
- data = 0
- multiple_match = 1
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_multiple_match(dut, multiple_match, 0)
- # Read Miss
- # Note that the default starting entry data bits are all 0
- enable = 1
- write_enable = 0
- address = 0
- data = 1
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Write Entry 0
- enable = 1
- write_enable = 1
- address = 0
- data = 4
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Read Hit Entry 0
- enable = 1
- write_enable = 0
- address = 0
- data = 4
- multiple_match = 0
- single_match = 1
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
- # Search Hit
- enable = 1
- write_enable = 0
- address = 0
- data = 4
- multiple_match = 0
- single_match = 1
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
- # Search Miss
- enable = 1
- write_enable = 0
- address = 0
- data = 5
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Multiple Match test
- # Write Entry 1
- enable = 1
- write_enable = 1
- address = 1
- data = 5
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Write Entry 2
- # Same data as Entry 1
- enable = 1
- write_enable = 1
- address = 2
- data = 5
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Read Hit Data 5
- enable = 1
- write_enable = 0
- address = 1
- data = 5
- multiple_match = 1
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_all(dut, multiple_match, single_match, address,0,0,0)
- # Verify read_warning is not caused
- # Write Entry 0
- enable = 1
- write_enable = 1
- address = 0
- data = 7
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- # Note there is no yield we immediately attempt to read in the next cycle
- # Read Hit Data 7
- enable = 1
- write_enable = 0
- address = 0
- data = 7
- multiple_match = 0
- single_match = 1
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- yield
-if __name__ == "__main__":
- dut = Cam(4, 4)
- run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_cam.vcd")
- print("Cam Unit Test Success")
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from test_helper import assert_eq, assert_ne, assert_op
-from CamEntry import CamEntry
-# This function allows for the easy setting of values to the Cam Entry
-# Arguments:
-# dut: The CamEntry being tested
-# c (command): NA (0), Read (1), Write (2), Reserve (3)
-# d (data): The data to be set
-def set_cam_entry(dut, c, d):
- # Write desired values
- yield dut.command.eq(c)
- yield dut.data_in.eq(d)
- yield
- # Reset all lines
- yield dut.command.eq(0)
- yield dut.data_in.eq(0)
- yield
-# Checks the data state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# d (Data): The expected data
-# op (Operation): (0 => ==), (1 => !=)
-def check_data(dut, d, op):
- out_d = yield dut.data
- assert_op("Data", out_d, d, op)
-# Checks the match state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# m (Match): The expected match
-# op (Operation): (0 => ==), (1 => !=)
-def check_match(dut, m, op):
- out_m = yield dut.match
- assert_op("Match", out_m, m, op)
-# Checks the state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# d (data): The expected data
-# m (match): The expected match
-# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=)
-# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-def check_all(dut, d, m, d_op, m_op):
- yield from check_data(dut, d, d_op)
- yield from check_match(dut, m, m_op)
-# This testbench goes through the paces of testing the CamEntry module
-# It is done by writing and then reading various combinations of key/data pairs
-# and reading the results with varying keys to verify the resulting stored
-# data is correct.
-def testbench(dut):
- # Check write
- command = 2
- data = 1
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
- # Check read miss
- command = 1
- data = 2
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 1, 0)
- # Check read hit
- command = 1
- data = 1
- match = 1
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
- # Check overwrite
- command = 2
- data = 5
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield
- yield from check_all(dut, data, match, 0, 0)
- # Check read hit
- command = 1
- data = 5
- match = 1
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
- # Check reset
- command = 3
- data = 0
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
- # Extra clock cycle for waveform
- yield
-if __name__ == "__main__":
- dut = CamEntry(4)
- run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_cam_entry.vcd")
- print("CamEntry Unit Test Success")
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from LFSR import LFSR
-from test_helper import assert_eq, assert_ne, assert_op
-def testbench(dut):
- yield dut.enable.eq(1)
- yield dut.o.eq(9)
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
-if __name__ == "__main__":
- dut = LFSR()
- run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_lfsr.vcd")
- print("LFSR Unit Test Success")
\ No newline at end of file
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from PermissionValidator import PermissionValidator
-from test_helper import assert_op
-def set_validator(dut, d, xwr, sm, sa, asid):
- yield dut.data.eq(d)
- yield dut.xwr.eq(xwr)
- yield dut.super_mode.eq(sm)
- yield dut.super_access.eq(sa)
- yield dut.asid.eq(asid)
- yield
-def check_valid(dut, v, op):
- out_v = yield dut.valid
- assert_op("Valid", out_v, v, op)
-def testbench(dut):
- # 80 bits represented. Ignore the MSB as it will be truncated
- # ASID is bits first 4 hex values (bits 64 - 78)
- # Test user mode entry valid
- # Global Bit matching ASID
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000031
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test user mode entry valid
- # Global Bit nonmatching ASID
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000031
- # Ignore MSB it will be truncated
- asid = 0x7FF6
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test user mode entry invalid
- # Global Bit nonmatching ASID
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000021
- # Ignore MSB it will be truncated
- asid = 0x7FF6
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 0
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test user mode entry valid
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test user mode entry invalid
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FF6
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 0
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test supervisor mode entry valid
- # The entry is NOT in user mode
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000001
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test supervisor mode entry invalid
- # The entry is in user mode
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 0
- xwr = 0
- valid = 0
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test supervisor mode entry valid
- # The entry is NOT in user mode with access
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000001
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 1
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test supervisor mode entry valid
- # The entry is in user mode with access
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 1
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-if __name__ == "__main__":
- dut = PermissionValidator(15, 64);
- run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_permission_validator.vcd")
- print("PermissionValidator Unit Test Success")
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from PteEntry import PteEntry
-from test_helper import assert_op
-def set_entry(dut, i):
- yield dut.i.eq(i)
- yield
-def check_dirty(dut, d, op):
- out_d = yield dut.d
- assert_op("Dirty", out_d, d, op)
-def check_accessed(dut, a, op):
- out_a = yield dut.a
- assert_op("Accessed", out_a, a, op)
-def check_global(dut, o, op):
- out = yield dut.g
- assert_op("Global", out, o, op)
-def check_user(dut, o, op):
- out = yield dut.u
- assert_op("User Mode", out, o, op)
-def check_xwr(dut, o, op):
- out = yield dut.xwr
- assert_op("XWR", out, o, op)
-def check_asid(dut, o, op):
- out = yield dut.asid
- assert_op("ASID", out, o, op)
-def check_pte(dut, o, op):
- out = yield dut.pte
- assert_op("ASID", out, o, op)
-def check_valid(dut, v, op):
- out_v = yield dut.v
- assert_op("Valid", out_v, v, op)
-def check_all(dut, d, a, g, u, xwr, v, asid, pte):
- yield from check_dirty(dut, d, 0)
- yield from check_accessed(dut, a, 0)
- yield from check_global(dut, g, 0)
- yield from check_user(dut, u, 0)
- yield from check_xwr(dut, xwr, 0)
- yield from check_asid(dut, asid, 0)
- yield from check_pte(dut, pte, 0)
- yield from check_valid(dut, v, 0)
-def testbench(dut):
- # 80 bits represented. Ignore the MSB as it will be truncated
- # ASID is bits first 4 hex values (bits 64 - 78)
- i = 0x7FFF0000000000000031
- dirty = 0
- access = 0
- glob = 1
- user = 1
- xwr = 0
- valid = 1
- asid = 0x7FFF
- pte = 0x0000000000000031
- yield from set_entry(dut, i)
- yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
- i = 0x0FFF00000000000000FF
- dirty = 1
- access = 1
- glob = 1
- user = 1
- xwr = 7
- valid = 1
- asid = 0x0FFF
- pte = 0x00000000000000FF
- yield from set_entry(dut, i)
- yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
- i = 0x0721000000001100001F
- dirty = 0
- access = 0
- glob = 0
- user = 1
- xwr = 7
- valid = 1
- asid = 0x0721
- pte = 0x000000001100001F
- yield from set_entry(dut, i)
- yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
- yield
-if __name__ == "__main__":
- dut = PteEntry(15, 64);
- run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_pte_entry.vcd")
- print("PteEntry Unit Test Success")
\ No newline at end of file
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from SetAssociativeCache import SetAssociativeCache
-from test_helper import assert_eq, assert_ne, assert_op
-def set_sac(dut, e, c, s, t, d):
- yield dut.enable.eq(e)
- yield dut.command.eq(c)
- yield dut.cset.eq(s)
- yield dut.tag.eq(t)
- yield dut.data_i.eq(d)
- yield
-def testbench(dut):
- enable = 1
- command = 2
- cset = 1
- tag = 2
- data = 3
- yield from set_sac(dut, enable, command, cset, tag, data)
- yield
- enable = 1
- command = 2
- cset = 1
- tag = 5
- data = 8
- yield from set_sac(dut, enable, command, cset, tag, data)
- yield
-if __name__ == "__main__":
- dut = SetAssociativeCache(4, 4, 4, 4)
- run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_set_associative_cache.vcd")
- print("Set Associative Cache Unit Test Success")
+++ /dev/null
-# Verifies the given values given the particular operand
-# Arguments:
-# p (Prefix): Appended to the front of the assert statement
-# e (Expected): The expected value
-# o (Output): The output result
-# op (Operation): (0 => ==), (1 => !=)
-def assert_op(pre, o, e, op):
- if op == 0:
- assert_eq(pre, o, e)
- else:
- assert_ne(pre, o, e)
-# Verifies the given values are equal
-# Arguments:
-# p (Prefix): Appended to the front of the assert statement
-# e (Expected): The expected value
-# o (Output): The output result
-def assert_eq(p, o, e):
- assert o == e, p + " Output " + str(o) + " Expected " + str(e)
-# Verifies the given values are not equal
-# Arguments:
-# p (Prefix): Appended to the front of the assert statement
-# e (Expected): The expected value
-# o (Output): The output result
-def assert_ne(p, o, e):
- assert o != e, p + " Output " + str(o) + " Not Expecting " + str(e)
\ No newline at end of file
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable
-from nmutil.latch import SRLatch
-class DependenceCell(Elaboratable):
- """ implements 11.4.7 mitch alsup dependence cell, p27
- """
- def __init__(self):
- # inputs
- self.dest_i = Signal(reset_less=True) # Dest in (top)
- self.src1_i = Signal(reset_less=True) # oper1 in (top)
- self.src2_i = Signal(reset_less=True) # oper2 in (top)
- self.issue_i = Signal(reset_less=True) # Issue in (top)
- self.go_write_i = Signal(reset_less=True) # Go Write in (left)
- self.go_read_i = Signal(reset_less=True) # Go Read in (left)
- # for Register File Select Lines (vertical)
- self.dest_rsel_o = Signal(reset_less=True) # dest reg sel (bottom)
- self.src1_rsel_o = Signal(reset_less=True) # src1 reg sel (bottom)
- self.src2_rsel_o = Signal(reset_less=True) # src2 reg sel (bottom)
- # for Function Unit "forward progress" (horizontal)
- self.dest_fwd_o = Signal(reset_less=True) # dest FU fw (right)
- self.src1_fwd_o = Signal(reset_less=True) # src1 FU fw (right)
- self.src2_fwd_o = Signal(reset_less=True) # src2 FU fw (right)
- def elaborate(self, platform):
- m = Module()
- m.submodules.dest_l = dest_l = SRLatch()
- m.submodules.src1_l = src1_l = SRLatch()
- m.submodules.src2_l = src2_l = SRLatch()
- # destination latch: reset on go_write HI, set on dest and issue
- m.d.comb += dest_l.s.eq(self.issue_i & self.dest_i)
- m.d.comb += dest_l.r.eq(self.go_write_i)
- # src1 latch: reset on go_read HI, set on src1_i and issue
- m.d.comb += src1_l.s.eq(self.issue_i & self.src1_i)
- m.d.comb += src1_l.r.eq(self.go_read_i)
- # src2 latch: reset on go_read HI, set on op2_i and issue
- m.d.comb += src2_l.s.eq(self.issue_i & self.src2_i)
- m.d.comb += src2_l.r.eq(self.go_read_i)
- # FU "Forward Progress" (read out horizontally)
- m.d.comb += self.dest_fwd_o.eq(dest_l.qn & self.dest_i)
- m.d.comb += self.src1_fwd_o.eq(src1_l.qn & self.src1_i)
- m.d.comb += self.src2_fwd_o.eq(src2_l.qn & self.src2_i)
- # Register File Select (read out vertically)
- m.d.comb += self.dest_rsel_o.eq(dest_l.qn & self.go_write_i)
- m.d.comb += self.src1_rsel_o.eq(src1_l.qn & self.go_read_i)
- m.d.comb += self.src2_rsel_o.eq(src2_l.qn & self.go_read_i)
- return m
- def __iter__(self):
- yield self.dest_i
- yield self.src1_i
- yield self.src2_i
- yield self.issue_i
- yield self.go_write_i
- yield self.go_read_i
- yield self.dest_rsel_o
- yield self.src1_rsel_o
- yield self.src2_rsel_o
- yield self.dest_fwd_o
- yield self.src1_fwd_o
- yield self.src2_fwd_o
- def ports(self):
- return list(self)
-def dcell_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_dcell():
- dut = DependenceCell()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_dcell.il", "w") as f:
- f.write(vl)
- run_simulation(dut, dcell_sim(dut), vcd_name='test_dcell.vcd')
-if __name__ == '__main__':
- test_dcell()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Array, Const, Elaboratable
-from nmutil.latch import SRLatch
-from nmigen.lib.coding import Decoder
-from shadow_fn import ShadowFn
-class FnUnit(Elaboratable):
- """ implements 11.4.8 function unit, p31
- also implements optional shadowing 11.5.1, p55
- shadowing can be used for branches as well as exceptions (interrupts),
- load/store hold (exceptions again), and vector-element predication
- (once the predicate is known, which it may not be at instruction issue)
- Inputs
- * :wid: register file width
- * :shadow_wid: number of shadow/fail/good/go_die sets
- * :n_dests: number of destination regfile(s) (index: rfile_sel_i)
- * :wr_pend: if true, writable observes the g_wr_pend_i vector
- otherwise observes g_rd_pend_i
- notes:
- * dest_i / src1_i / src2_i are in *binary*, whereas...
- * ...g_rd_pend_i / g_wr_pend_i and rd_pend_o / wr_pend_o are UNARY
- * req_rel_i (request release) is the direct equivalent of pipeline
- "output valid" (valid_o)
- * recover is a local python variable (actually go_die_o)
- * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing)
- * wr_pend is set False for the majority of uses: however for
- use in a STORE Function Unit it is set to True
- """
- def __init__(self, wid, shadow_wid=0, n_dests=1, wr_pend=False):
- self.reg_width = wid
- self.n_dests = n_dests
- self.shadow_wid = shadow_wid
- self.wr_pend = wr_pend
- # inputs
- if n_dests > 1:
- self.rfile_sel_i = Signal(max=n_dests, reset_less=True)
- else:
- self.rfile_sel_i = Const(0) # no selection. gets Array[0]
- self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in (top)
- self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in (top)
- self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in (top)
- self.issue_i = Signal(reset_less=True) # Issue in (top)
- self.go_write_i = Signal(reset_less=True) # Go Write in (left)
- self.go_read_i = Signal(reset_less=True) # Go Read in (left)
- self.req_rel_i = Signal(reset_less=True) # request release (left)
- self.g_xx_pend_i = Array(Signal(wid, reset_less=True, name="g_pend_i") \
- for i in range(n_dests)) # global rd (right)
- self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right)
- if shadow_wid:
- self.shadow_i = Signal(shadow_wid, reset_less=True)
- self.s_fail_i = Signal(shadow_wid, reset_less=True)
- self.s_good_i = Signal(shadow_wid, reset_less=True)
- self.go_die_o = Signal(reset_less=True)
- # outputs
- self.readable_o = Signal(reset_less=True) # Readable out (right)
- self.writable_o = Array(Signal(reset_less=True, name="writable_o") \
- for i in range(n_dests)) # writable out (right)
- self.busy_o = Signal(reset_less=True) # busy out (left)
- self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right)
- self.xx_pend_o = Array(Signal(wid, reset_less=True, name="pend_o") \
- for i in range(n_dests))# wr pending (right)
- def elaborate(self, platform):
- m = Module()
- m.submodules.rd_l = rd_l = SRLatch(sync=False)
- m.submodules.wr_l = wr_l = SRLatch(sync=False)
- m.submodules.dest_d = dest_d = Decoder(self.reg_width)
- m.submodules.src1_d = src1_d = Decoder(self.reg_width)
- m.submodules.src2_d = src2_d = Decoder(self.reg_width)
- s_latches = []
- for i in range(self.shadow_wid):
- sh = ShadowFn()
- setattr(m.submodules, "shadow%d" % i, sh)
- s_latches.append(sh)
- # shadow / recover (optional: shadow_wid > 0)
- if self.shadow_wid:
- recover = self.go_die_o
- shadown = Signal(reset_less=True)
- i_l = []
- fail_l = []
- good_l = []
- shi_l = []
- sho_l = []
- rec_l = []
- # get list of latch signals. really must be a better way to do this
- for l in s_latches:
- i_l.append(l.issue_i)
- shi_l.append(l.shadow_i)
- fail_l.append(l.s_fail_i)
- good_l.append(l.s_good_i)
- sho_l.append(l.shadow_o)
- rec_l.append(l.recover_o)
- m.d.comb += Cat(*i_l).eq(self.issue_i)
- m.d.comb += Cat(*fail_l).eq(self.s_fail_i)
- m.d.comb += Cat(*good_l).eq(self.s_good_i)
- m.d.comb += Cat(*shi_l).eq(self.shadow_i)
- m.d.comb += shadown.eq(~(Cat(*sho_l).bool()))
- m.d.comb += recover.eq(Cat(*rec_l).bool())
- else:
- shadown = Const(1)
- recover = Const(0)
- # selector
- xx_pend_o = self.xx_pend_o[self.rfile_sel_i]
- writable_o = self.writable_o[self.rfile_sel_i]
- g_pend_i = self.g_xx_pend_i[self.rfile_sel_i]
- for i in range(self.n_dests):
- m.d.comb += self.xx_pend_o[i].eq(0) # initialise all array
- m.d.comb += self.writable_o[i].eq(0) # to zero
- # go_write latch: reset on go_write HI, set on issue
- m.d.comb += wr_l.s.eq(self.issue_i)
- m.d.comb += wr_l.r.eq(self.go_write_i | recover)
- # src1 latch: reset on go_read HI, set on issue
- m.d.comb += rd_l.s.eq(self.issue_i)
- m.d.comb += rd_l.r.eq(self.go_read_i | recover)
- # dest decoder: write-pending out
- m.d.comb += dest_d.i.eq(self.dest_i)
- m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted
- m.d.comb += self.busy_o.eq(wr_l.q) # busy if set
- m.d.comb += xx_pend_o.eq(dest_d.o)
- # src1/src2 decoder: read-pending out
- m.d.comb += src1_d.i.eq(self.src1_i)
- m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted
- m.d.comb += src2_d.i.eq(self.src2_i)
- m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted
- m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o)
- # readable output signal
- g_rd = Signal(self.reg_width, reset_less=True)
- m.d.comb += g_rd.eq(self.g_wr_pend_i & self.rd_pend_o)
- m.d.comb += self.readable_o.eq(g_rd.bool())
- # writable output signal
- g_wr_v = Signal(self.reg_width, reset_less=True)
- g_wr = Signal(reset_less=True)
- wo = Signal(reset_less=True)
- m.d.comb += g_wr_v.eq(g_pend_i & xx_pend_o)
- m.d.comb += g_wr.eq(~g_wr_v.bool())
- m.d.comb += wo.eq(g_wr & rd_l.q & self.req_rel_i & shadown)
- m.d.comb += writable_o.eq(wo)
- return m
- def __iter__(self):
- yield self.dest_i
- yield self.src1_i
- yield self.src2_i
- yield self.issue_i
- yield self.go_write_i
- yield self.go_read_i
- yield self.req_rel_i
- yield from self.g_xx_pend_i
- yield self.g_wr_pend_i
- yield self.readable_o
- yield from self.writable_o
- yield self.rd_pend_o
- yield from self.xx_pend_o
- def ports(self):
- return list(self)
-############# ###############
-# --- --- #
-# --- renamed / redirected from base class --- #
-# --- --- #
-# --- below are convenience classes which match the names --- #
-# --- of the various mitch alsup book chapter gate diagrams --- #
-# --- --- #
-############# ###############
-class IntFnUnit(FnUnit):
- def __init__(self, wid, shadow_wid=0):
- FnUnit.__init__(self, wid, shadow_wid)
- self.int_rd_pend_o = self.rd_pend_o
- self.int_wr_pend_o = self.xx_pend_o[0]
- self.g_int_wr_pend_i = self.g_wr_pend_i
- self.g_int_rd_pend_i = self.g_xx_pend_i[0]
- self.int_readable_o = self.readable_o
- self.int_writable_o = self.writable_o[0]
- self.int_rd_pend_o.name = "int_rd_pend_o"
- self.int_wr_pend_o.name = "int_wr_pend_o"
- self.g_int_rd_pend_i.name = "g_int_rd_pend_i"
- self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
- self.int_readable_o.name = "int_readable_o"
- self.int_writable_o.name = "int_writable_o"
-class FPFnUnit(FnUnit):
- def __init__(self, wid, shadow_wid=0):
- FnUnit.__init__(self, wid, shadow_wid)
- self.fp_rd_pend_o = self.rd_pend_o
- self.fp_wr_pend_o = self.xx_pend_o[0]
- self.g_fp_wr_pend_i = self.g_wr_pend_i
- self.g_fp_rd_pend_i = self.g_xx_pend_i[0]
- self.fp_writable_o = self.writable_o[0]
- self.fp_readable_o = self.readable_o
- self.fp_rd_pend_o.name = "fp_rd_pend_o"
- self.fp_wr_pend_o.name = "fp_wr_pend_o"
- self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i"
- self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i"
- self.fp_writable_o.name = "fp_writable_o"
- self.fp_readable_o.name = "fp_readable_o"
-class LDFnUnit(FnUnit):
- """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile)
- * when rfile_sel_i == 0, int_wr_pend_o is set
- * when rfile_sel_i == 1, fp_wr_pend_o is set
- """
- def __init__(self, wid, shadow_wid=0):
- FnUnit.__init__(self, wid, shadow_wid, n_dests=2)
- self.int_rd_pend_o = self.rd_pend_o
- self.int_wr_pend_o = self.xx_pend_o[0]
- self.fp_wr_pend_o = self.xx_pend_o[1]
- self.g_int_wr_pend_i = self.g_wr_pend_i
- self.g_int_rd_pend_i = self.g_xx_pend_i[0]
- self.g_fp_rd_pend_i = self.g_xx_pend_i[1]
- self.int_readable_o = self.readable_o
- self.int_writable_o = self.writable_o[0]
- self.fp_writable_o = self.writable_o[1]
- self.int_rd_pend_o.name = "int_rd_pend_o"
- self.int_wr_pend_o.name = "int_wr_pend_o"
- self.fp_wr_pend_o.name = "fp_wr_pend_o"
- self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
- self.g_int_rd_pend_i.name = "g_int_rd_pend_i"
- self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i"
- self.int_readable_o.name = "int_readable_o"
- self.int_writable_o.name = "int_writable_o"
- self.fp_writable_o.name = "fp_writable_o"
-class STFnUnit(FnUnit):
- """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile)
- * wr_pend=False indicates to observe global fp write pending
- * when rfile_sel_i == 0, int_wr_pend_o is set
- * when rfile_sel_i == 1, fp_wr_pend_o is set
- *
- """
- def __init__(self, wid, shadow_wid=0):
- FnUnit.__init__(self, wid, shadow_wid, n_dests=2, wr_pend=True)
- self.int_rd_pend_o = self.rd_pend_o # 1st int read-pending vector
- self.int2_rd_pend_o = self.xx_pend_o[0] # 2nd int read-pending vector
- self.fp_rd_pend_o = self.xx_pend_o[1] # 1x FP read-pending vector
- # yes overwrite FnUnit base class g_wr_pend_i vector
- self.g_int_wr_pend_i = self.g_wr_pend_i = self.g_xx_pend_i[0]
- self.g_fp_wr_pend_i = self.g_xx_pend_i[1]
- self.int_readable_o = self.readable_o
- self.int_writable_o = self.writable_o[0]
- self.fp_writable_o = self.writable_o[1]
- self.int_rd_pend_o.name = "int_rd_pend_o"
- self.int2_rd_pend_o.name = "int2_rd_pend_o"
- self.fp_rd_pend_o.name = "fp_rd_pend_o"
- self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
- self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i"
- self.int_readable_o.name = "int_readable_o"
- self.int_writable_o.name = "int_writable_o"
- self.fp_writable_o.name = "fp_writable_o"
-def int_fn_unit_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_int_fn_unit():
- dut = FnUnit(32, 2, 2)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fn_unit.il", "w") as f:
- f.write(vl)
- dut = LDFnUnit(32, 2)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_ld_fn_unit.il", "w") as f:
- f.write(vl)
- dut = STFnUnit(32, 0)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_st_fn_unit.il", "w") as f:
- f.write(vl)
- run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_fn_unit.vcd')
-if __name__ == '__main__':
- test_int_fn_unit()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable
-from nmutil.latch import SRLatch
-class FUDependenceCell(Elaboratable):
- """ implements 11.4.7 mitch alsup dependence cell, p27
- """
- def __init__(self):
- # inputs
- self.rd_pend_i = Signal(reset_less=True) # read pending in (left)
- self.wr_pend_i = Signal(reset_less=True) # write pending in (left)
- self.issue_i = Signal(reset_less=True) # Issue in (top)
- self.go_write_i = Signal(reset_less=True) # Go Write in (left)
- self.go_read_i = Signal(reset_less=True) # Go Read in (left)
- # outputs (latched rd/wr pend)
- self.rd_pend_o = Signal(reset_less=True) # read pending out (right)
- self.wr_pend_o = Signal(reset_less=True) # write pending out (right)
- def elaborate(self, platform):
- m = Module()
- m.submodules.rd_l = rd_l = SRLatch()
- m.submodules.wr_l = wr_l = SRLatch()
- # write latch: reset on go_write HI, set on write pending and issue
- m.d.comb += wr_l.s.eq(self.issue_i & self.wr_pend_i)
- m.d.comb += wr_l.r.eq(self.go_write_i)
- # read latch: reset on go_read HI, set on read pending and issue
- m.d.comb += rd_l.s.eq(self.issue_i & self.rd_pend_i)
- m.d.comb += rd_l.r.eq(self.go_read_i)
- # Read/Write Pending Latches (read out horizontally)
- m.d.comb += self.wr_pend_o.eq(wr_l.qn)
- m.d.comb += self.rd_pend_o.eq(rd_l.qn)
- return m
- def __iter__(self):
- yield self.rd_pend_i
- yield self.wr_pend_i
- yield self.issue_i
- yield self.go_write_i
- yield self.go_read_i
- yield self.rd_pend_o
- yield self.wr_pend_o
- def ports(self):
- return list(self)
-def dcell_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_dcell():
- dut = FUDependenceCell()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fu_dcell.il", "w") as f:
- f.write(vl)
- run_simulation(dut, dcell_sim(dut), vcd_name='test_fu_dcell.vcd')
-if __name__ == '__main__':
- test_dcell()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat
-#from nmutil.latch import SRLatch
-from fu_dep_cell import FUDependenceCell
-from fu_picker_vec import FU_Pick_Vec
- 6600 Function Unit Dependency Table Matrix inputs / outputs
- -----------------------------------------------------------
-class FUFUDepMatrix(Elaboratable):
- """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26
- """
- def __init__(self, n_fu_row, n_fu_col):
- self.n_fu_row = n_fu_row # Y (FU row#) ^v
- self.n_fu_col = n_fu_col # X (FU col #) <>
- self.rd_pend_i = Signal(n_fu_row, reset_less=True) # Rd pending (left)
- self.wr_pend_i = Signal(n_fu_row, reset_less=True) # Wr pending (left)
- self.issue_i = Signal(n_fu_col, reset_less=True) # Issue in (top)
- self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
- self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
- # for Function Unit Readable/Writable (horizontal)
- self.readable_o = Signal(n_fu_col, reset_less=True) # readable (bot)
- self.writable_o = Signal(n_fu_col, reset_less=True) # writable (bot)
- def elaborate(self, platform):
- m = Module()
- # ---
- # matrix of dependency cells
- # ---
- dm = Array(Array(FUDependenceCell() for r in range(self.n_fu_row)) \
- for f in range(self.n_fu_col))
- for x in range(self.n_fu_col):
- for y in range(self.n_fu_row):
- setattr(m.submodules, "dm_fx%d_fy%d" % (x, y), dm[x][y])
- # ---
- # array of Function Unit Readable/Writable: row-length, horizontal
- # ---
- fur = Array(FU_Pick_Vec(self.n_fu_row) for r in range(self.n_fu_col))
- for x in range(self.n_fu_col):
- setattr(m.submodules, "fur_x%d" % (x), fur[x])
- # ---
- # connect FU Readable/Writable vector
- # ---
- readable = []
- writable = []
- for x in range(self.n_fu_col):
- fu = fur[x]
- rd_pend_o = []
- wr_pend_o = []
- for y in range(self.n_fu_row):
- dc = dm[x][y]
- # accumulate cell outputs rd/wr-pending
- rd_pend_o.append(dc.rd_pend_o)
- wr_pend_o.append(dc.wr_pend_o)
- # connect cell reg-select outputs to Reg Vector In
- m.d.comb += [fu.rd_pend_i.eq(Cat(*rd_pend_o)),
- fu.wr_pend_i.eq(Cat(*wr_pend_o)),
- ]
- # accumulate Readable/Writable Vector outputs
- readable.append(fu.readable_o)
- writable.append(fu.writable_o)
- # ... and output them from this module (horizontal, width=REGs)
- m.d.comb += self.readable_o.eq(Cat(*readable))
- m.d.comb += self.writable_o.eq(Cat(*writable))
- # ---
- # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
- # ---
- for y in range(self.n_fu_row):
- issue_i = []
- for x in range(self.n_fu_col):
- dc = dm[x][y]
- # accumulate cell inputs issue
- issue_i.append(dc.issue_i)
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += Cat(*issue_i).eq(self.issue_i)
- # ---
- # connect Matrix go_read_i/go_write_i to module readable/writable
- # ---
- for x in range(self.n_fu_col):
- go_read_i = []
- go_write_i = []
- rd_pend_i = []
- wr_pend_i = []
- for y in range(self.n_fu_row):
- dc = dm[x][y]
- # accumulate cell rd_pend/wr_pend/go_read/go_write
- rd_pend_i.append(dc.rd_pend_i)
- wr_pend_i.append(dc.wr_pend_i)
- go_read_i.append(dc.go_read_i)
- go_write_i.append(dc.go_write_i)
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += [Cat(*go_read_i).eq(self.go_read_i),
- Cat(*go_write_i).eq(self.go_write_i),
- Cat(*rd_pend_i).eq(self.rd_pend_i),
- Cat(*wr_pend_i).eq(self.wr_pend_i),
- ]
- return m
- def __iter__(self):
- yield self.rd_pend_i
- yield self.wr_pend_i
- yield self.issue_i
- yield self.go_write_i
- yield self.go_read_i
- yield self.readable_o
- yield self.writable_o
- def ports(self):
- return list(self)
-def d_matrix_sim(dut):
- """ XXX TODO
- """
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_fu_fu_matrix():
- dut = FUFUDepMatrix(n_fu_row=3, n_fu_col=4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fu_fu_matrix.il", "w") as f:
- f.write(vl)
- run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_fu_matrix.vcd')
-if __name__ == '__main__':
- test_fu_fu_matrix()
+++ /dev/null
-from nmigen import Elaboratable, Module, Signal, Cat
-class FU_Pick_Vec(Elaboratable):
- """ these are allocated per-FU (horizontally),
- and are of length fu_row_n
- """
- def __init__(self, fu_row_n):
- self.fu_row_n = fu_row_n
- self.rd_pend_i = Signal(fu_row_n, reset_less=True)
- self.wr_pend_i = Signal(fu_row_n, reset_less=True)
- self.readable_o = Signal(reset_less=True)
- self.writable_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.readable_o.eq(self.rd_pend_i.bool())
- m.d.comb += self.writable_o.eq(self.wr_pend_i.bool())
- return m
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat
-#from nmutil.latch import SRLatch
-from dependence_cell import DependenceCell
-from fu_wr_pending import FU_RW_Pend
-from reg_select import Reg_Rsv
- 6600 Dependency Table Matrix inputs / outputs
- ---------------------------------------------
- d s1 s2 i d s1 s2 i d s1 s2 i d s1 s2 i
- | | | | | | | | | | | | | | | |
- v v v v v v v v v v v v v v v v
- go_rd/go_wr -> dm-r0-fu0 dm-r1-fu0 dm-r2-fu0 dm-r3-fu0 -> wr/rd-pend
- go_rd/go_wr -> dm-r0-fu1 dm-r1-fu1 dm-r2-fu1 dm-r3-fu1 -> wr/rd-pend
- go_rd/go_wr -> dm-r0-fu2 dm-r1-fu2 dm-r2-fu2 dm-r3-fu2 -> wr/rd-pend
- | | | | | | | | | | | |
- v v v v v v v v v v v v
- d s1 s2 d s1 s2 d s1 s2 d s1 s2
- reg sel reg sel reg sel reg sel
-class FURegDepMatrix(Elaboratable):
- """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26
- """
- def __init__(self, n_fu_row, n_reg_col):
- self.n_fu_row = n_fu_row # Y (FUs) ^v
- self.n_reg_col = n_reg_col # X (Regs) <>
- self.dest_i = Signal(n_reg_col, reset_less=True) # Dest in (top)
- self.src1_i = Signal(n_reg_col, reset_less=True) # oper1 in (top)
- self.src2_i = Signal(n_reg_col, reset_less=True) # oper2 in (top)
- self.issue_i = Signal(n_reg_col, reset_less=True) # Issue in (top)
- self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
- self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
- # for Register File Select Lines (horizontal), per-reg
- self.dest_rsel_o = Signal(n_reg_col, reset_less=True) # dest reg (bot)
- self.src1_rsel_o = Signal(n_reg_col, reset_less=True) # src1 reg (bot)
- self.src2_rsel_o = Signal(n_reg_col, reset_less=True) # src2 reg (bot)
- # for Function Unit "forward progress" (vertical), per-FU
- self.wr_pend_o = Signal(n_fu_row, reset_less=True) # wr pending (right)
- self.rd_pend_o = Signal(n_fu_row, reset_less=True) # rd pending (right)
- def elaborate(self, platform):
- m = Module()
- # ---
- # matrix of dependency cells
- # ---
- dm = Array(Array(DependenceCell() for r in range(self.n_fu_row)) \
- for f in range(self.n_reg_col))
- for rn in range(self.n_reg_col):
- for fu in range(self.n_fu_row):
- setattr(m.submodules, "dm_r%d_fu%d" % (rn, fu), dm[rn][fu])
- # ---
- # array of Function Unit Pending vectors
- # ---
- fupend = Array(FU_RW_Pend(self.n_reg_col) for f in range(self.n_fu_row))
- for fu in range(self.n_fu_row):
- setattr(m.submodules, "fu_fu%d" % (fu), fupend[fu])
- # ---
- # array of Register Reservation vectors
- # ---
- regrsv = Array(Reg_Rsv(self.n_fu_row) for r in range(self.n_reg_col))
- for rn in range(self.n_reg_col):
- setattr(m.submodules, "rr_r%d" % (rn), regrsv[rn])
- # ---
- # connect Function Unit vector
- # ---
- wr_pend = []
- rd_pend = []
- for fu in range(self.n_fu_row):
- fup = fupend[fu]
- dest_fwd_o = []
- src1_fwd_o = []
- src2_fwd_o = []
- for rn in range(self.n_reg_col):
- dc = dm[rn][fu]
- # accumulate cell fwd outputs for dest/src1/src2
- dest_fwd_o.append(dc.dest_fwd_o)
- src1_fwd_o.append(dc.src1_fwd_o)
- src2_fwd_o.append(dc.src2_fwd_o)
- # connect cell fwd outputs to FU Vector in [Cat is gooood]
- m.d.comb += [fup.dest_fwd_i.eq(Cat(*dest_fwd_o)),
- fup.src1_fwd_i.eq(Cat(*src1_fwd_o)),
- fup.src2_fwd_i.eq(Cat(*src2_fwd_o))
- ]
- # accumulate FU Vector outputs
- wr_pend.append(fup.reg_wr_pend_o)
- rd_pend.append(fup.reg_rd_pend_o)
- # ... and output them from this module (vertical, width=FUs)
- m.d.comb += self.wr_pend_o.eq(Cat(*wr_pend))
- m.d.comb += self.rd_pend_o.eq(Cat(*rd_pend))
- # ---
- # connect Reg Selection vector
- # ---
- dest_rsel = []
- src1_rsel = []
- src2_rsel = []
- for rn in range(self.n_reg_col):
- rsv = regrsv[rn]
- dest_rsel_o = []
- src1_rsel_o = []
- src2_rsel_o = []
- for fu in range(self.n_fu_row):
- dc = dm[rn][fu]
- # accumulate cell reg-select outputs dest/src1/src2
- dest_rsel_o.append(dc.dest_rsel_o)
- src1_rsel_o.append(dc.src1_rsel_o)
- src2_rsel_o.append(dc.src2_rsel_o)
- # connect cell reg-select outputs to Reg Vector In
- m.d.comb += [rsv.dest_rsel_i.eq(Cat(*dest_rsel_o)),
- rsv.src1_rsel_i.eq(Cat(*src1_rsel_o)),
- rsv.src2_rsel_i.eq(Cat(*src2_rsel_o)),
- ]
- # accumulate Reg-Sel Vector outputs
- dest_rsel.append(rsv.dest_rsel_o)
- src1_rsel.append(rsv.src1_rsel_o)
- src2_rsel.append(rsv.src2_rsel_o)
- # ... and output them from this module (horizontal, width=REGs)
- m.d.comb += self.dest_rsel_o.eq(Cat(*dest_rsel))
- m.d.comb += self.src1_rsel_o.eq(Cat(*src1_rsel))
- m.d.comb += self.src2_rsel_o.eq(Cat(*src2_rsel))
- # ---
- # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
- # ---
- for rn in range(self.n_reg_col):
- dest_i = []
- src1_i = []
- src2_i = []
- issue_i = []
- for fu in range(self.n_fu_row):
- dc = dm[rn][fu]
- # accumulate cell inputs dest/src1/src2
- dest_i.append(dc.dest_i)
- src1_i.append(dc.src1_i)
- src2_i.append(dc.src2_i)
- issue_i.append(dc.issue_i)
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += [Cat(*dest_i).eq(self.dest_i),
- Cat(*src1_i).eq(self.src1_i),
- Cat(*src2_i).eq(self.src2_i),
- Cat(*issue_i).eq(self.issue_i),
- ]
- # ---
- # connect Dependency Matrix go_read_i/go_write_i to module go_rd/go_wr
- # ---
- for fu in range(self.n_fu_row):
- go_read_i = []
- go_write_i = []
- for rn in range(self.n_reg_col):
- dc = dm[rn][fu]
- # accumulate cell fwd outputs for dest/src1/src2
- go_read_i.append(dc.go_read_i)
- go_write_i.append(dc.go_write_i)
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += [Cat(*go_read_i).eq(self.go_read_i),
- Cat(*go_write_i).eq(self.go_write_i),
- ]
- return m
- def __iter__(self):
- yield self.dest_i
- yield self.src1_i
- yield self.src2_i
- yield self.issue_i
- yield self.go_write_i
- yield self.go_read_i
- yield self.dest_rsel_o
- yield self.src1_rsel_o
- yield self.src2_rsel_o
- yield self.wr_pend_o
- yield self.rd_pend_o
- def ports(self):
- return list(self)
-def d_matrix_sim(dut):
- """ XXX TODO
- """
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_d_matrix():
- dut = FURegDepMatrix(n_fu_row=3, n_reg_col=4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fu_reg_matrix.il", "w") as f:
- f.write(vl)
- run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_reg_matrix.vcd')
-if __name__ == '__main__':
- test_d_matrix()
+++ /dev/null
-from nmigen import Elaboratable, Module, Signal, Cat
-class FU_RW_Pend(Elaboratable):
- """ these are allocated per-FU (horizontally),
- and are of length reg_count
- """
- def __init__(self, reg_count):
- self.reg_count = reg_count
- self.dest_fwd_i = Signal(reg_count, reset_less=True)
- self.src1_fwd_i = Signal(reg_count, reset_less=True)
- self.src2_fwd_i = Signal(reg_count, reset_less=True)
- self.reg_wr_pend_o = Signal(reset_less=True)
- self.reg_rd_pend_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- srces = Cat(self.src1_fwd_i, self.src2_fwd_i)
- m.d.comb += self.reg_wr_pend_o.eq(self.dest_fwd_i.bool())
- m.d.comb += self.reg_rd_pend_o.eq(srces.bool())
- return m
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Elaboratable
-from nmutil.latch import SRLatch
-from nmigen.lib.coding import Decoder
-class GlobalPending(Elaboratable):
- """ implements Global Pending Vector, basically ORs all incoming Function
- Unit vectors together. Can be used for creating Read or Write Global
- Pending. Can be used for INT or FP Global Pending.
- Inputs:
- * :wid: register file width
- * :fu_vecs: a python list of function unit "pending" vectors, each
- vector being a Signal of width equal to the reg file.
- Notes:
- * the regfile may be Int or FP, this code doesn't care which.
- obviously do not try to put in a mixture of regfiles into fu_vecs.
- * this code also doesn't care if it's used for Read Pending or Write
- pending, it can be used for both: again, obviously, do not try to
- put in a mixture of read *and* write pending vectors in.
- * if some Function Units happen not to be uniform (don't operate
- on a particular register (extremely unusual), they must set a Const
- zero bit in the vector.
- """
- def __init__(self, wid, fu_vecs):
- self.reg_width = wid
- # inputs
- self.fu_vecs = fu_vecs
- for v in fu_vecs:
- assert len(v) == wid, "FU Vector must be same width as regfile"
- self.g_pend_o = Signal(wid, reset_less=True) # global pending vector
- def elaborate(self, platform):
- m = Module()
- pend_l = []
- for i in range(self.reg_width): # per-register
- vec_bit_l = []
- for v in self.fu_vecs:
- vec_bit_l.append(v[i]) # fu bit for same register
- pend_l.append(Cat(*vec_bit_l).bool()) # OR all bits for same reg
- m.d.comb += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits
- return m
- def __iter__(self):
- yield from self.fu_vecs
- yield self.g_pend_o
- def ports(self):
- return list(self)
-def g_vec_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_g_vec():
- vecs = []
- for i in range(3):
- vecs.append(Signal(32, name="fu%d" % i))
- dut = GlobalPending(32, vecs)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_global_pending.il", "w") as f:
- f.write(vl)
- run_simulation(dut, g_vec_sim(dut), vcd_name='test_global_pending.vcd')
-if __name__ == '__main__':
- test_g_vec()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Elaboratable
-class PriorityPicker(Elaboratable):
- """ implements a priority-picker. input: N bits, output: N bits
- """
- def __init__(self, wid):
- self.wid = wid
- # inputs
- self.i = Signal(wid, reset_less=True)
- self.o = Signal(wid, reset_less=True)
- def elaborate(self, platform):
- m = Module()
- res = []
- for i in range(0, self.wid):
- tmp = Signal(reset_less = True)
- if i == 0:
- m.d.comb += tmp.eq(self.i[0])
- else:
- m.d.comb += tmp.eq((~tmp) & self.i[i])
- res.append(tmp)
- # we like Cat(*xxx). turn lists into concatenated bits
- m.d.comb += self.o.eq(Cat(*res))
- return m
- def __iter__(self):
- yield self.i
- yield self.o
- def ports(self):
- return list(self)
-class GroupPicker(Elaboratable):
- """ implements 10.5 mitch alsup group picker, p27
- """
- def __init__(self, wid):
- self.gp_wid = wid
- # inputs
- self.readable_i = Signal(wid, reset_less=True) # readable in (top)
- self.writable_i = Signal(wid, reset_less=True) # writable in (top)
- self.rel_req_i = Signal(wid, reset_less=True) # release request in (top)
- # outputs
- self.go_rd_o = Signal(wid, reset_less=True) # go read (bottom)
- self.go_wr_o = Signal(wid, reset_less=True) # go write (bottom)
- def elaborate(self, platform):
- m = Module()
- m.submodules.rpick = rpick = PriorityPicker(self.gp_wid)
- m.submodules.wpick = wpick = PriorityPicker(self.gp_wid)
- # combine release (output ready signal) with writeable
- m.d.comb += wpick.i.eq(self.writable_i & self.rel_req_i)
- m.d.comb += self.go_wr_o.eq(wpick.o)
- m.d.comb += rpick.i.eq(self.readable_i)
- m.d.comb += self.go_rd_o.eq(rpick.o)
- return m
- def __iter__(self):
- yield self.readable_i
- yield self.writable_i
- yield self.rel_req_i
- yield self.go_rd_o
- yield self.go_wr_o
- def ports(self):
- return list(self)
-def grp_pick_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_grp_pick():
- dut = GroupPicker(4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_grp_pick.il", "w") as f:
- f.write(vl)
- run_simulation(dut, grp_pick_sim(dut), vcd_name='test_grp_pick.vcd')
-if __name__ == '__main__':
- test_grp_pick()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Array, Const, Record, Elaboratable
-from nmutil.latch import SRLatch
-from nmigen.lib.coding import Decoder
-from shadow_fn import ShadowFn
-class IssueUnit(Elaboratable):
- """ implements 11.4.14 issue unit, p50
- Inputs
- * :wid: register file width
- * :n_insns: number of instructions in this issue unit.
- """
- def __init__(self, wid, n_insns):
- self.reg_width = wid
- self.n_insns = n_insns
- # inputs
- self.store_i = Signal(reset_less=True) # instruction is a store
- self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in
- self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in
- self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in
- self.g_wr_pend_i = Signal(wid, reset_less=True) # write pending vector
- self.insn_i = Array(Signal(reset_less=True, name="insn_i") \
- for i in range(n_insns))
- self.busy_i = Array(Signal(reset_less=True, name="busy_i") \
- for i in range(n_insns))
- # outputs
- self.fn_issue_o = Array(Signal(reset_less=True, name="fn_issue_o") \
- for i in range(n_insns))
- self.g_issue_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- m.submodules.dest_d = dest_d = Decoder(self.reg_width)
- # temporaries
- waw_stall = Signal(reset_less=True)
- fu_stall = Signal(reset_less=True)
- pend = Signal(self.reg_width, reset_less=True)
- # dest decoder: write-pending
- m.d.comb += dest_d.i.eq(self.dest_i)
- m.d.comb += dest_d.n.eq(~self.store_i) # decode is inverted
- m.d.comb += pend.eq(dest_d.o & self.g_wr_pend_i)
- m.d.comb += waw_stall.eq(pend.bool())
- ib_l = []
- for i in range(self.n_insns):
- ib_l.append(self.insn_i[i] & self.busy_i[i])
- m.d.comb += fu_stall.eq(Cat(*ib_l).bool())
- m.d.comb += self.g_issue_o.eq(~(waw_stall | fu_stall))
- for i in range(self.n_insns):
- m.d.comb += self.fn_issue_o[i].eq(self.g_issue_o & self.insn_i[i])
- return m
- def __iter__(self):
- yield self.store_i
- yield self.dest_i
- yield self.src1_i
- yield self.src2_i
- yield self.g_wr_pend_i
- yield from self.insn_i
- yield from self.busy_i
- yield from self.fn_issue_o
- yield self.g_issue_o
- def ports(self):
- return list(self)
-class IntFPIssueUnit(Elaboratable):
- def __init__(self, wid, n_int_insns, n_fp_insns):
- self.i = IssueUnit(wid, n_int_insns)
- self.f = IssueUnit(wid, n_fp_insns)
- self.issue_o = Signal(reset_less=True)
- # some renames
- self.int_write_pending_i = self.i.g_wr_pend_i
- self.fp_write_pending_i = self.f.g_wr_pend_i
- self.int_write_pending_i.name = 'int_write_pending_i'
- self.fp_write_pending_i.name = 'fp_write_pending_i'
- def elaborate(self, platform):
- m = Module()
- m.submodules.intissue = self.i
- m.submodules.fpissue = self.f
- m.d.comb += self.issue_o.eq(self.i.g_issue_o | self.f.g_issue_o)
- return m
- def ports(self):
- yield self.issue_o
- yield from self.i
- yield from self.f
-def issue_unit_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_issue_unit():
- dut = IssueUnit(32, 3)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_issue_unit.il", "w") as f:
- f.write(vl)
- dut = IntFPIssueUnit(32, 3, 3)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_intfp_issue_unit.il", "w") as f:
- f.write(vl)
- run_simulation(dut, issue_unit_sim(dut), vcd_name='test_issue_unit.vcd')
-if __name__ == '__main__':
- test_issue_unit()
+++ /dev/null
-""" Mitch Alsup 6600-style LD/ST scoreboard Dependency Cell
-Relevant bugreports:
-* http://bugs.libre-riscv.org/show_bug.cgi?id=81
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable
-from nmutil.latch import SRLatch
-class LDSTDepCell(Elaboratable):
- """ implements 11.4.12 mitch alsup load/store dependence cell, p45
- """
- def __init__(self):
- # inputs
- self.load_i = Signal(reset_less=True) # load pending in (top)
- self.stor_i = Signal(reset_less=True) # store pending in (top)
- self.issue_i = Signal(reset_less=True) # Issue in (top)
- self.load_hit_i = Signal(reset_less=True) # load hit in (right)
- self.stwd_hit_i = Signal(reset_less=True) # store w/ data hit in (right)
- # outputs (latched rd/wr pend)
- self.ld_hold_st_o = Signal(reset_less=True) # load holds st out (left)
- self.st_hold_ld_o = Signal(reset_less=True) # st holds load out (left)
- def elaborate(self, platform):
- m = Module()
- m.submodules.war_l = war_l = SRLatch(sync=False) # WriteAfterRead Latch
- m.submodules.raw_l = raw_l = SRLatch(sync=False) # ReadAfterWrite Latch
- # issue & store & load - used for both WAR and RAW Setting
- i_s_l = Signal(reset_less=True)
- m.d.comb += i_s_l.eq(self.issue_i & self.stor_i & self.load_i)
- # write after read latch: loads block stores
- m.d.comb += war_l.s.eq(i_s_l)
- m.d.comb += war_l.r.eq(self.load_i) # reset on LD
- # read after write latch: stores block loads
- m.d.comb += raw_l.s.eq(i_s_l)
- m.d.comb += raw_l.r.eq(self.stor_i) # reset on ST
- # Hold results (read out horizontally, accumulate in OR fashion)
- m.d.comb += self.ld_hold_st_o.eq(war_l.qn & self.load_hit_i)
- m.d.comb += self.st_hold_ld_o.eq(raw_l.qn & self.stwd_hit_i)
- return m
- def __iter__(self):
- yield self.load_i
- yield self.stor_i
- yield self.issue_i
- yield self.load_hit_i
- yield self.stwd_hit_i
- yield self.ld_hold_st_o
- yield self.st_hold_ld_o
- def ports(self):
- return list(self)
-def dcell_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_dcell():
- dut = LDSTDepCell()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_ldst_dcell.il", "w") as f:
- f.write(vl)
- run_simulation(dut, dcell_sim(dut), vcd_name='test_ldst_dcell.vcd')
-if __name__ == '__main__':
- test_dcell()
+++ /dev/null
-""" Mitch Alsup 6600-style LD/ST Memory Scoreboard Matrix (sparse vector)
-6600 LD/ST Dependency Table Matrix inputs / outputs
-Relevant comments (p45-46):
-* If there are no WAR dependencies on a Load instruction with a computed
- address it can assert Bank_Addressable and Translate_Addressable.
-* If there are no RAW dependencies on a Store instruction with both a
- write permission and store data present it can assert Bank_Addressable
-Relevant bugreports:
-* http://bugs.libre-riscv.org/show_bug.cgi?id=81
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat, Const
-from ldst_dep_cell import LDSTDepCell
-class LDSTDepMatrix(Elaboratable):
- """ implements 11.4.12 mitch alsup LD/ST Dependency Matrix, p46
- actually a sparse matrix along the diagonal.
- load-hold-store and store-hold-load accumulate in a priority-picking
- fashion, ORing together. the OR gate from the dependency cell is
- here.
- """
- def __init__(self, n_ldst):
- self.n_ldst = n_ldst # X and Y (FUs)
- self.load_i = Signal(n_ldst, reset_less=True) # load pending in
- self.stor_i = Signal(n_ldst, reset_less=True) # store pending in
- self.issue_i = Signal(n_ldst, reset_less=True) # Issue in
- self.load_hit_i = Signal(n_ldst, reset_less=True) # load hit in
- self.stwd_hit_i = Signal(n_ldst, reset_less=True) # store w/data hit in
- # outputs
- self.ld_hold_st_o = Signal(reset_less=True) # load holds st out
- self.st_hold_ld_o = Signal(reset_less=True) # st holds load out
- def elaborate(self, platform):
- m = Module()
- # ---
- # matrix of dependency cells
- # ---
- dm = Array(LDSTDepCell() for f in range(self.n_ldst))
- for fu in range(self.n_ldst):
- setattr(m.submodules, "dm_fu%d" % (fu), dm[fu])
- # ---
- # connect Function Unit vector
- # ---
- lhs_l = []
- shl_l = []
- load_l = []
- stor_l = []
- issue_l = []
- lh_l = []
- sh_l = []
- for fu in range(self.n_ldst):
- dc = dm[fu]
- # accumulate load-hold-store / store-hold-load bits
- lhs_l.append(dc.ld_hold_st_o)
- shl_l.append(dc.st_hold_ld_o)
- # accumulate inputs (for Cat'ing later) - TODO: must be a better way
- load_l.append(dc.load_i)
- stor_l.append(dc.stor_i)
- issue_l.append(dc.issue_i)
- lh_l.append(dc.load_hit_i)
- sh_l.append(dc.stwd_hit_i)
- # connect cell inputs using Cat(*list_of_stuff)
- m.d.comb += [Cat(*load_l).eq(self.load_i),
- Cat(*stor_l).eq(self.stor_i),
- Cat(*issue_l).eq(self.issue_i),
- Cat(*lh_l).eq(self.load_hit_i),
- Cat(*sh_l).eq(self.stwd_hit_i),
- ]
- # set the load-hold-store / store-hold-load OR-accumulated outputs
- m.d.comb += self.ld_hold_st_o.eq(Cat(*lhs_l).bool())
- m.d.comb += self.st_hold_ld_o.eq(Cat(*shl_l).bool())
- return m
- def __iter__(self):
- yield self.load_i
- yield self.stor_i
- yield self.issue_i
- yield self.load_hit_i
- yield self.stwd_hit_i
- yield self.ld_hold_st_o
- yield self.st_hold_ld_o
- def ports(self):
- return list(self)
-def d_matrix_sim(dut):
- """ XXX TODO
- """
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_d_matrix():
- dut = LDSTDepMatrix(n_ldst=4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_ld_st_matrix.il", "w") as f:
- f.write(vl)
- run_simulation(dut, d_matrix_sim(dut), vcd_name='test_ld_st_matrix.vcd')
-if __name__ == '__main__':
- test_d_matrix()
+++ /dev/null
-from nmigen import Elaboratable, Module, Signal
-class Reg_Rsv(Elaboratable):
- """ these are allocated per-Register (vertically),
- and are each of length fu_count
- """
- def __init__(self, fu_count):
- self.fu_count = fu_count
- self.dest_rsel_i = Signal(fu_count, reset_less=True)
- self.src1_rsel_i = Signal(fu_count, reset_less=True)
- self.src2_rsel_i = Signal(fu_count, reset_less=True)
- self.dest_rsel_o = Signal(reset_less=True)
- self.src1_rsel_o = Signal(reset_less=True)
- self.src2_rsel_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.dest_rsel_o.eq(self.dest_rsel_i.bool())
- m.d.comb += self.src1_rsel_o.eq(self.src1_rsel_i.bool())
- m.d.comb += self.src2_rsel_o.eq(self.src2_rsel_i.bool())
- return m
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Elaboratable
-from nmutil.latch import SRLatch
-from nmigen.lib.coding import Decoder
-class ShadowFn(Elaboratable):
- """ implements shadowing 11.5.1, p55, just the individual shadow function
- """
- def __init__(self):
- # inputs
- self.issue_i = Signal(reset_less=True)
- self.shadow_i = Signal(reset_less=True)
- self.s_fail_i = Signal(reset_less=True)
- self.s_good_i = Signal(reset_less=True)
- # outputs
- self.shadow_o = Signal(reset_less=True)
- self.recover_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- m.submodules.sl = sl = SRLatch(sync=False)
- m.d.comb += sl.s.eq(self.shadow_i & self.issue_i)
- m.d.comb += sl.r.eq(self.s_good_i)
- m.d.comb += self.recover_o.eq(sl.q & self.s_fail_i)
- m.d.comb += self.shadow_o.eq(sl.q)
- return m
- def __iter__(self):
- yield self.issue_i
- yield self.shadow_i
- yield self.s_fail_i
- yield self.s_good_i
- yield self.shadow_o
- yield self.recover_o
- def ports(self):
- return list(self)
-def shadow_fn_unit_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_shadow_fn_unit():
- dut = ShadowFn()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_shadow_fn_unit.il", "w") as f:
- f.write(vl)
- run_simulation(dut, shadow_fn_unit_sim(dut),
- vcd_name='test_shadow_fn_unit.vcd')
-if __name__ == '__main__':
- test_shadow_fn_unit()
--- /dev/null
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.lib.coding import Encoder, PriorityEncoder
+class AddressEncoder():
+ """Address Encoder
+ The purpose of this module is to take in a vector and
+ encode the bits that are one hot into an address. This module
+ combines both nmigen's Encoder and PriorityEncoder and will state
+ whether the input line has a single bit hot, multiple bits hot,
+ or no bits hot. The output line will always have the lowest value
+ address output.
+ Usage:
+ The output is valid when either single or multiple match is high.
+ Otherwise output is 0.
+ """
+ def __init__(self, width):
+ """ Arguments:
+ * width: The desired length of the input vector
+ """
+ # Internal
+ self.encoder = Encoder(width)
+ self.p_encoder = PriorityEncoder(width)
+ # Input
+ self.i = Signal(width)
+ # Output
+ self.single_match = Signal(1)
+ self.multiple_match = Signal(1)
+ self.o = Signal(max=width)
+ def elaborate(self, platform=None):
+ m = Module()
+ # Add internal submodules
+ m.submodules.encoder = self.encoder
+ m.submodules.p_encoder = self.p_encoder
+ m.d.comb += [
+ self.encoder.i.eq(self.i),
+ self.p_encoder.i.eq(self.i)
+ ]
+ # Steps:
+ # 1. check if the input vector is non-zero
+ # 2. if non-zero, check if single match or multiple match
+ # 3. set output line to be lowest value address output
+ # If the priority encoder recieves an input of 0
+ # If n is 1 then the output is not valid
+ with m.If(self.p_encoder.n):
+ m.d.comb += [
+ self.single_match.eq(0),
+ self.multiple_match.eq(0),
+ self.o.eq(0)
+ ]
+ # If the priority encoder recieves an input > 0
+ with m.Else():
+ # Multiple Match if encoder n is invalid
+ with m.If(self.encoder.n):
+ m.d.comb += [
+ self.single_match.eq(0),
+ self.multiple_match.eq(1)
+ ]
+ # Single Match if encoder n is valid
+ with m.Else():
+ m.d.comb += [
+ self.single_match.eq(1),
+ self.multiple_match.eq(0)
+ ]
+ # Always set output based on priority encoder output
+ m.d.comb += self.o.eq(self.p_encoder.o)
+ return m
--- /dev/null
+from nmigen import Array, Cat, Module, Signal
+from nmigen.lib.coding import Decoder
+from nmigen.cli import main #, verilog
+from CamEntry import CamEntry
+from AddressEncoder import AddressEncoder
+class Cam():
+ """ Content Addressable Memory (CAM)
+ The purpose of this module is to quickly look up whether an
+ entry exists given a data key.
+ This module will search for the given data in all internal entries
+ and output whether a single or multiple match was found.
+ If an single entry is found the address be returned and single_match
+ is set HIGH. If multiple entries are found the lowest address is
+ returned and multiple_match is set HIGH. If neither single_match or
+ multiple_match are HIGH this implies no match was found. To write
+ to the CAM set the address bus to the desired entry and set write_enable
+ HIGH. Entry managment should be performed one level above this block
+ as lookup is performed within.
+ Notes:
+ The read and write operations take one clock cycle to complete.
+ Currently the read_warning line is present for interfacing but
+ is not necessary for this design. This module is capable of writing
+ in the first cycle, reading on the second, and output the correct
+ address on the third.
+ """
+ def __init__(self, data_size, cam_size):
+ """ Arguments:
+ * data_size: (bits) The bit size of the data
+ * cam_size: (number) The number of entries in the CAM
+ """
+ # Internal
+ self.cam_size = cam_size
+ self.encoder = AddressEncoder(cam_size)
+ self.decoder = Decoder(cam_size)
+ self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
+ # Input
+ self.enable = Signal(1)
+ self.write_enable = Signal(1)
+ self.data_in = Signal(data_size) # The data to be written
+ self.data_mask = Signal(data_size) # mask for ternary writes
+ self.address_in = Signal(max=cam_size) # address of CAM Entry to write
+ # Output
+ self.read_warning = Signal(1) # High when a read interrupts a write
+ self.single_match = Signal(1) # High when there is only one match
+ self.multiple_match = Signal(1) # High when there at least two matches
+ self.match_address = Signal(max=cam_size) # The lowest address matched
+ def elaborate(self, platform=None):
+ m = Module()
+ # AddressEncoder for match types and output address
+ m.submodules.AddressEncoder = self.encoder
+ # Decoder is used to select which entry will be written to
+ m.submodules.Decoder = self.decoder
+ # CamEntry Array Submodules
+ # Note these area added anonymously
+ entry_array = self.entry_array
+ m.submodules += entry_array
+ # Decoder logic
+ m.d.comb += [
+ self.decoder.i.eq(self.address_in),
+ self.decoder.n.eq(0)
+ ]
+ encoder_vector = []
+ with m.If(self.enable):
+ # Set the key value for every CamEntry
+ for index in range(self.cam_size):
+ # Write Operation
+ with m.If(self.write_enable):
+ with m.If(self.decoder.o[index]):
+ m.d.comb += entry_array[index].command.eq(2)
+ with m.Else():
+ m.d.comb += entry_array[index].command.eq(0)
+ # Read Operation
+ with m.Else():
+ m.d.comb += entry_array[index].command.eq(1)
+ # Send data input to all entries
+ m.d.comb += entry_array[index].data_in.eq(self.data_in)
+ # Send all entry matches to encoder
+ ematch = entry_array[index].match
+ encoder_vector.append(ematch)
+ # Give input to and accept output from encoder module
+ m.d.comb += [
+ self.encoder.i.eq(Cat(*encoder_vector)),
+ self.single_match.eq(self.encoder.single_match),
+ self.multiple_match.eq(self.encoder.multiple_match),
+ self.match_address.eq(self.encoder.o)
+ ]
+ # If the CAM is not enabled set all outputs to 0
+ with m.Else():
+ m.d.comb += [
+ self.read_warning.eq(0),
+ self.single_match.eq(0),
+ self.multiple_match.eq(0),
+ self.match_address.eq(0)
+ ]
+ return m
+ def ports(self):
+ return [self.enable, self.write_enable,
+ self.data_in, self.data_mask,
+ self.read_warning, self.single_match,
+ self.multiple_match, self.match_address]
+if __name__ == '__main__':
+ cam = Cam(4, 4)
+ main(cam, ports=cam.ports())
--- /dev/null
+from nmigen import Module, Signal
+class CamEntry:
+ """ Content Addressable Memory (CAM) Entry
+ The purpose of this module is to represent an entry within a CAM.
+ This module when given a read command will compare the given data
+ and output whether a match was found or not. When given a write
+ command it will write the given data into internal registers.
+ """
+ def __init__(self, data_size):
+ """ Arguments:
+ * data_size: (bit count) The size of the data
+ """
+ # Input
+ self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
+ self.data_in = Signal(data_size) # Data input when writing
+ # Output
+ self.match = Signal(1) # Result of the internal/input key comparison
+ self.data = Signal(data_size)
+ def elaborate(self, platform=None):
+ m = Module()
+ with m.Switch(self.command):
+ with m.Case("00"):
+ m.d.sync += self.match.eq(0)
+ with m.Case("01"):
+ with m.If(self.data == self.data_in):
+ m.d.sync += self.match.eq(1)
+ with m.Else():
+ m.d.sync += self.match.eq(0)
+ with m.Case("10"):
+ m.d.sync += [
+ self.data.eq(self.data_in),
+ self.match.eq(0)
+ ]
+ with m.Case():
+ m.d.sync += [
+ self.match.eq(0),
+ self.data.eq(0)
+ ]
+ return m
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+from nmigen.cli import verilog, rtlil
+class LFSRPolynomial(set):
+ """ implements a polynomial for use in LFSR
+ """
+ def __init__(self, exponents=()):
+ for e in exponents:
+ assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
+ assert (e >= 0), ValueError("%d must not be negative" % e)
+ set.__init__(self, set(exponents).union({0})) # must contain zero
+ @property
+ def max_exponent(self):
+ return max(self) # derived from set, so this returns the max exponent
+ @property
+ def exponents(self):
+ exponents = list(self) # get elements of set as a list
+ exponents.sort(reverse=True)
+ return exponents
+ def __str__(self):
+ expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
+ retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
+ return " + ".join(retval)
+ def __repr__(self):
+ return "LFSRPolynomial(%s)" % self.exponents
+# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa
+LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
+LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
+LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
+LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
+LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
+LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
+LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
+LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
+LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
+LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
+LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
+LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
+LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
+LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
+LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
+LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
+LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
+LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
+LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
+LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
+LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
+LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
+LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
+class LFSR(LFSRPolynomial, Elaboratable):
+ """ implements a Linear Feedback Shift Register
+ """
+ def __init__(self, polynomial):
+ """ Inputs:
+ ------
+ :polynomial: the polynomial to feedback on. may be a LFSRPolynomial
+ instance or an iterable of ints (list/tuple/generator)
+ :enable: enable (set LO to disable. NOTE: defaults to HI)
+ Outputs:
+ -------
+ :state: the LFSR state. bitwidth is taken from the polynomial
+ maximum exponent.
+ Note: if an LFSRPolynomial is passed in as the input, because
+ LFSRPolynomial is derived from set() it's ok:
+ LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
+ """
+ LFSRPolynomial.__init__(self, polynomial)
+ self.state = Signal(self.max_exponent, reset=1)
+ self.enable = Signal(reset=1)
+ def elaborate(self, platform):
+ m = Module()
+ # do absolutely nothing if the polynomial is empty (always has a zero)
+ if self.max_exponent <= 1:
+ return m
+ # create XOR-bunch, select bits from state based on exponent
+ feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
+ for exponent in self:
+ if exponent > 0: # don't have to skip, saves CPU cycles though
+ feedback ^= self.state[exponent - 1]
+ # if enabled, shift-and-feedback
+ with m.If(self.enable):
+ # shift up lower bits by Cat'ing in a new bit zero (feedback)
+ newstate = Cat(feedback, self.state[:-1])
+ m.d.sync += self.state.eq(newstate)
+ return m
+# example: Poly24
+if __name__ == '__main__':
+ p24 = rtlil.convert(LFSR(LFSR_POLY_24))
+ with open("lfsr2_p24.il", "w") as f:
+ f.write(p24)
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Module
+from typing import Iterable, Optional, Iterator, Any, Union
+from typing_extensions import final
+class LFSRPolynomial(set):
+ def __init__(self, exponents: Iterable[int] = ()):
+ def elements() -> Iterable[int]: ...
+ @property
+ def exponents(self) -> list[int]: ...
+ def __str__(self) -> str: ...
+ def __repr__(self) -> str: ...
+class LFSR:
+ def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
+ @property
+ def width(self) -> int: ...
+ def elaborate(self, platform: Any) -> Module: ...
--- /dev/null
+ python3 Cam.py generate -t v > Cam.v
--- /dev/null
+from nmigen import Cat, Memory, Module, Signal, Elaboratable
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+class MemorySet(Elaboratable):
+ def __init__(self, data_size, tag_size, set_count, active):
+ self.active = active
+ input_size = tag_size + data_size # Size of the input data
+ memory_width = input_size + 1 # The width of the cache memory
+ self.active = active
+ self.data_size = data_size
+ self.tag_size = tag_size
+ # XXX TODO, use rd-enable and wr-enable?
+ self.mem = Memory(memory_width, set_count)
+ self.r = self.mem.read_port()
+ self.w = self.mem.write_port()
+ # inputs (address)
+ self.cset = Signal(max=set_count) # The set to be checked
+ self.tag = Signal(tag_size) # The tag to find
+ self.data_i = Signal(data_size) # Incoming data
+ # outputs
+ self.valid = Signal()
+ self.data_o = Signal(data_size) # Outgoing data (excludes tag)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.mem = self.mem
+ m.submodules.r = self.r
+ m.submodules.w = self.w
+ # temporaries
+ active_bit = Signal()
+ tag_valid = Signal()
+ data_start = self.active + 1
+ data_end = data_start + self.data_size
+ tag_start = data_end
+ tag_end = tag_start + self.tag_size
+ # connect the read port address to the set/entry
+ read_port = self.r
+ m.d.comb += read_port.addr.eq(self.cset)
+ # Pull out active bit from data
+ data = read_port.data
+ m.d.comb += active_bit.eq(data[self.active])
+ # Validate given tag vs stored tag
+ tag = data[tag_start:tag_end]
+ m.d.comb += tag_valid.eq(self.tag == tag)
+ # An entry is only valid if the tags match AND
+ # is marked as a valid entry
+ m.d.comb += self.valid.eq(tag_valid & active_bit)
+ # output data: TODO, check rd-enable?
+ m.d.comb += self.data_o.eq(data[data_start:data_end])
+ # connect the write port addr to the set/entry (only if write enabled)
+ # (which is only done on a match, see SAC.write_entry below)
+ write_port = self.w
+ with m.If(write_port.en):
+ m.d.comb += write_port.addr.eq(self.cset)
+ m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
+ return m
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.cli import main
+from PteEntry import PteEntry
+class PermissionValidator():
+ """ The purpose of this Module is to check the Permissions of a given PTE
+ against the requested access permissions.
+ This module will either validate (by setting the valid bit HIGH)
+ the request or find a permission fault and invalidate (by setting
+ the valid bit LOW) the request
+ """
+ def __init__(self, asid_size, pte_size):
+ """ Arguments:
+ * asid_size: (bit count) The size of the asid to be processed
+ * pte_size: (bit count) The size of the pte to be processed
+ Return:
+ * valid HIGH when permissions are correct
+ """
+ # Internal
+ self.pte_entry = PteEntry(asid_size, pte_size)
+ # Input
+ self.data = Signal(asid_size + pte_size);
+ self.xwr = Signal(3) # Execute, Write, Read
+ self.super_mode = Signal(1) # Supervisor Mode
+ self.super_access = Signal(1) # Supervisor Access
+ self.asid = Signal(15) # Address Space IDentifier (ASID)
+ # Output
+ self.valid = Signal(1) # Denotes if the permissions are correct
+ def elaborate(self, platform=None):
+ m = Module()
+ m.submodules.pte_entry = self.pte_entry
+ m.d.comb += self.pte_entry.i.eq(self.data)
+ # Check if the entry is valid
+ with m.If(self.pte_entry.v):
+ # ASID match or Global Permission
+ # Note that the MSB bound is exclusive
+ with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
+ # Check Execute, Write, Read (XWR) Permissions
+ with m.If(self.pte_entry.xwr == self.xwr):
+ # Supervisor Logic
+ with m.If(self.super_mode):
+ # Valid if entry is not in user mode or supervisor
+ # has Supervisor User Memory (SUM) access via the
+ # SUM bit in the sstatus register
+ m.d.comb += self.valid.eq((~self.pte_entry.u) \
+ | self.super_access)
+ # User logic
+ with m.Else():
+ # Valid if the entry is in user mode only
+ m.d.comb += self.valid.eq(self.pte_entry.u)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ return m
\ No newline at end of file
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.cli import main
+class PteEntry():
+ """ The purpose of this Module is to centralize the parsing of Page
+ Table Entries (PTE) into one module to prevent common mistakes
+ and duplication of code. The control bits are parsed out for
+ ease of use.
+ This module parses according to the standard PTE given by the
+ Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
+ The Address Space IDentifier (ASID) is appended to the MSB of the input
+ and is parsed out as such.
+ An valid input Signal would be:
+ Bits:[78-64][63-0]
+ The output PTE value will include the control bits.
+ """
+ def __init__(self, asid_size, pte_size):
+ """ Arguments:
+ * asid_size: (bit count) The size of the asid to be processed
+ * pte_size: (bit count) The size of the pte to be processed
+ Return:
+ * d The Dirty bit from the PTE portion of i
+ * a The Accessed bit from the PTE portion of i
+ * g The Global bit from the PTE portion of i
+ * u The User Mode bit from the PTE portion of i
+ * xwr The Execute/Write/Read bit from the PTE portion of i
+ * v The Valid bit from the PTE portion of i
+ * asid The asid portion of i
+ * pte The pte portion of i
+ """
+ # Internal
+ self.asid_start = pte_size
+ self.asid_end = pte_size + asid_size
+ # Input
+ self.i = Signal(asid_size + pte_size)
+ # Output
+ self.d = Signal(1) # Dirty bit (From pte)
+ self.a = Signal(1) # Accessed bit (From pte)
+ self.g = Signal(1) # Global Access (From pte)
+ self.u = Signal(1) # User Mode (From pte)
+ self.xwr = Signal(3) # Execute Read Write (From pte)
+ self.v = Signal(1) # Valid (From pte)
+ self.asid = Signal(asid_size) # Associated Address Space IDentifier
+ self.pte = Signal(pte_size) # Full Page Table Entry
+ def elaborate(self, platform=None):
+ m = Module()
+ # Pull out all control bites from PTE
+ m.d.comb += [
+ self.d.eq(self.i[7]),
+ self.a.eq(self.i[6]),
+ self.g.eq(self.i[5]),
+ self.u.eq(self.i[4]),
+ self.xwr.eq(self.i[1:4]),
+ self.v.eq(self.i[0])
+ ]
+ m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
+ m.d.comb += self.pte.eq(self.i[0:self.asid_start])
+ return m
\ No newline at end of file
--- /dev/null
+Online simulator of 4-way set-associative cache:
+Python simulator of a N-way set-associative cache:
+import sys
+from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
+from nmigen.compat.genlib import fsm
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+from AddressEncoder import AddressEncoder
+from MemorySet import MemorySet
+# TODO: use a LFSR that advances continuously and picking the bottom
+# few bits from it to select which cache line to replace, instead of PLRU
+# http://bugs.libre-riscv.org/show_bug.cgi?id=71
+from plru import PLRU
+from LFSR import LFSR, LFSR_POLY_24
+SA_NA = "00" # no action (none)
+SA_RD = "01" # read
+SA_WR = "10" # write
+class SetAssociativeCache(Elaboratable):
+ """ Set Associative Cache Memory
+ The purpose of this module is to generate a memory cache given the
+ constraints passed in. This will create a n-way set associative cache.
+ It is expected for the SV TLB that the VMA will provide the set number
+ while the ASID provides the tag (still to be decided).
+ """
+ def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
+ """ Arguments
+ * tag_size (bits): The bit count of the tag
+ * data_size (bits): The bit count of the data to be stored
+ * set_count (number): The number of sets/entries in the cache
+ * way_count (number): The number of slots a data can be stored
+ in one set
+ * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
+ set/entry to write to. otherwise, use a PLRU
+ """
+ # Internals
+ self.lfsr_mode = lfsr
+ self.way_count = way_count # The number of slots in one set
+ self.tag_size = tag_size # The bit count of the tag
+ self.data_size = data_size # The bit count of the data to be stored
+ # set up Memory array
+ self.mem_array = Array() # memory array
+ for i in range(way_count):
+ ms = MemorySet(data_size, tag_size, set_count, active=0)
+ self.mem_array.append(ms)
+ # Finds valid entries
+ self.encoder = AddressEncoder(way_count)
+ # setup PLRU or LFSR
+ if lfsr:
+ # LFSR mode
+ self.lfsr = LFSR(LFSR_POLY_24)
+ else:
+ # PLRU mode
+ self.plru = PLRU(way_count) # One block to handle plru calculations
+ self.plru_array = Array() # PLRU data on each set
+ for i in range(set_count):
+ name="plru%d" % i
+ self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
+ # Input
+ self.enable = Signal(1) # Whether the cache is enabled
+ self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX)
+ self.cset = Signal(max=set_count) # The set to be checked
+ self.tag = Signal(tag_size) # The tag to find
+ self.data_i = Signal(data_size) # The input data
+ # Output
+ self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
+ self.hit = Signal(1) # Tag matched one way in the given set
+ self.multiple_hit = Signal(1) # Tag matched many ways in the given set
+ self.data_o = Signal(data_size) # The data linked to the matched tag
+ def check_tags(self, m):
+ """ Validate the tags in the selected set. If one and only one
+ tag matches set its state to zero and increment all others
+ by one. We only advance to next state if a single hit is found.
+ """
+ # Vector to store way valid results
+ # A zero denotes a way is invalid
+ valid_vector = []
+ # Loop through memory to prep read/write ports and set valid_vector
+ for i in range(self.way_count):
+ valid_vector.append(self.mem_array[i].valid)
+ # Pass encoder the valid vector
+ m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
+ # Only one entry should be marked
+ # This is due to already verifying the tags
+ # matched and the valid bit is high
+ with m.If(self.hit):
+ m.next = "FINISHED_READ"
+ # Pull out data from the read port
+ data = self.mem_array[self.encoder.o].data_o
+ m.d.comb += self.data_o.eq(data)
+ if not self.lfsr_mode:
+ self.access_plru(m)
+ # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
+ with m.Elif(self.multiple_hit):
+ # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+ m.d.comb += self.data_o.eq(0)
+ # No tag matches means no data
+ with m.Else():
+ # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+ m.d.comb += self.data_o.eq(0)
+ def access_plru(self, m):
+ """ An entry was accessed and the plru tree must now be updated
+ """
+ # Pull out the set's entry being edited
+ plru_entry = self.plru_array[self.cset]
+ m.d.comb += [
+ # Set the plru data to the current state
+ self.plru.plru_tree.eq(plru_entry),
+ # Set that the cache was accessed
+ self.plru.lu_access_i.eq(1)
+ ]
+ def read(self, m):
+ """ Go through the read process of the cache.
+ This takes two cycles to complete. First it checks for a valid tag
+ and secondly it updates the LRU values.
+ """
+ with m.FSM() as fsm_read:
+ with m.State("READY"):
+ m.d.comb += self.ready.eq(0)
+ # check_tags will set the state if the conditions are met
+ self.check_tags(m)
+ with m.State("FINISHED_READ"):
+ m.next = "READY"
+ m.d.comb += self.ready.eq(1)
+ if not self.lfsr_mode:
+ plru_tree_o = self.plru.plru_tree_o
+ m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
+ def write_entry(self, m):
+ if not self.lfsr_mode:
+ m.d.comb += [# set cset (mem address) into PLRU
+ self.plru.plru_tree.eq(self.plru_array[self.cset]),
+ # and connect plru to encoder for write
+ self.encoder.i.eq(self.plru.replace_en_o)
+ ]
+ write_port = self.mem_array[self.encoder.o].w
+ else:
+ # use the LFSR to generate a random(ish) one of the mem array
+ lfsr_output = Signal(max=self.way_count)
+ lfsr_random = Signal(max=self.way_count)
+ m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
+ # address too big, limit to range of array
+ m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
+ lfsr_output - self.way_count,
+ lfsr_output))
+ write_port = self.mem_array[lfsr_random].w
+ # then if there is a match from the encoder, enable the selected write
+ with m.If(self.encoder.single_match):
+ m.d.comb += write_port.en.eq(1)
+ def write(self, m):
+ """ Go through the write process of the cache.
+ This takes two cycles to complete. First it writes the entry,
+ and secondly it updates the PLRU (in plru mode)
+ """
+ with m.FSM() as fsm_write:
+ with m.State("READY"):
+ m.d.comb += self.ready.eq(0)
+ self.write_entry(m)
+ m.next ="FINISHED_WRITE"
+ with m.State("FINISHED_WRITE"):
+ m.d.comb += self.ready.eq(1)
+ if not self.lfsr_mode:
+ plru_entry = self.plru_array[self.cset]
+ m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
+ m.next = "READY"
+ def elaborate(self, platform=None):
+ m = Module()
+ # ----
+ # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
+ # ----
+ m.submodules.AddressEncoder = self.encoder
+ if self.lfsr_mode:
+ m.submodules.LFSR = self.lfsr
+ else:
+ m.submodules.PLRU = self.plru
+ for i, mem in enumerate(self.mem_array):
+ setattr(m.submodules, "mem%d" % i, mem)
+ # ----
+ # select mode: PLRU connect to encoder, LFSR do... something
+ # ----
+ if not self.lfsr_mode:
+ # Set what entry was hit
+ m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
+ else:
+ # enable LFSR
+ m.d.comb += self.lfsr.enable.eq(self.enable)
+ # ----
+ # connect hit/multiple hit to encoder output
+ # ----
+ m.d.comb += [
+ self.hit.eq(self.encoder.single_match),
+ self.multiple_hit.eq(self.encoder.multiple_match),
+ ]
+ # ----
+ # connect incoming data/tag/cset(addr) to mem_array
+ # ----
+ for mem in self.mem_array:
+ write_port = mem.w
+ m.d.comb += [mem.cset.eq(self.cset),
+ mem.tag.eq(self.tag),
+ mem.data_i.eq(self.data_i),
+ write_port.en.eq(0), # default: disable write
+ ]
+ # ----
+ # Commands: READ/WRITE/TODO
+ # ----
+ with m.If(self.enable):
+ with m.Switch(self.command):
+ # Search all sets at a particular tag
+ with m.Case(SA_RD):
+ self.read(m)
+ with m.Case(SA_WR):
+ self.write(m)
+ # Maybe catch multiple tags write here?
+ # TODO
+ # TODO: invalidate/flush, flush-all?
+ return m
+ def ports(self):
+ return [self.enable, self.command, self.cset, self.tag, self.data_i,
+ self.ready, self.hit, self.multiple_hit, self.data_o]
+if __name__ == '__main__':
+ sac = SetAssociativeCache(4, 8, 4, 6)
+ vl = rtlil.convert(sac, ports=sac.ports())
+ with open("SetAssociativeCache.il", "w") as f:
+ f.write(vl)
+ sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
+ vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
+ with open("SetAssociativeCacheLFSR.il", "w") as f:
+ f.write(vl)
--- /dev/null
+""" TLB Module
+ The expected form of the data is:
+ * Item (Bits)
+ * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
+from nmigen import Memory, Module, Signal, Cat
+from nmigen.cli import main
+from PermissionValidator import PermissionValidator
+from Cam import Cam
+class TLB():
+ def __init__(self, asid_size, vma_size, pte_size, L1_size):
+ """ Arguments
+ * asid_size: Address Space IDentifier (ASID) typically 15 bits
+ * vma_size: Virtual Memory Address (VMA) typically 36 bits
+ * pte_size: Page Table Entry (PTE) typically 64 bits
+ Notes:
+ These arguments should represent the largest possible size
+ defined by the MODE settings. See
+ Volume II: RISC-V Privileged Architectures V1.10 Page 57
+ """
+ # Internal
+ self.state = 0
+ # L1 Cache Modules
+ L1_size = 8 # XXX overridden incoming argument?
+ self.cam_L1 = Cam(vma_size, L1_size)
+ self.mem_L1 = Memory(asid_size + pte_size, L1_size)
+ # Permission Validator
+ self.perm_validator = PermissionValidator(asid_size, pte_size)
+ # Inputs
+ self.supermode = Signal(1) # Supervisor Mode
+ self.super_access = Signal(1) # Supervisor Access
+ self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
+ self.xwr = Signal(3) # Execute, Write, Read
+ self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
+ self.address_L1 = Signal(max=L1_size)
+ self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
+ self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
+ self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
+ # Outputs
+ self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
+ self.perm_valid = Signal(1) # Denotes if the permissions are correct
+ self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
+ def search(self, m, read_L1, write_L1):
+ """ searches the TLB
+ """
+ m.d.comb += [
+ write_L1.en.eq(0),
+ self.cam_L1.write_enable.eq(0),
+ self.cam_L1.data_in.eq(self.vma)
+ ]
+ # Match found in L1 CAM
+ match_found = Signal(reset_less=True)
+ m.d.comb += match_found.eq(self.cam_L1.single_match
+ | self.cam_L1.multiple_match)
+ with m.If(match_found):
+ # Memory shortcut variables
+ mem_address = self.cam_L1.match_address
+ # Memory Logic
+ m.d.comb += read_L1.addr.eq(mem_address)
+ # Permission Validator Logic
+ m.d.comb += [
+ self.hit.eq(1),
+ # Set permission validator data to the correct
+ # register file data according to CAM match
+ # address
+ self.perm_validator.data.eq(read_L1.data),
+ # Execute, Read, Write
+ self.perm_validator.xwr.eq(self.xwr),
+ # Supervisor Mode
+ self.perm_validator.super_mode.eq(self.supermode),
+ # Supverisor Access
+ self.perm_validator.super_access.eq(self.super_access),
+ # Address Space IDentifier (ASID)
+ self.perm_validator.asid.eq(self.asid),
+ # Output result of permission validation
+ self.perm_valid.eq(self.perm_validator.valid)
+ ]
+ # Only output PTE if permissions are valid
+ with m.If(self.perm_validator.valid):
+ # XXX TODO - dummy for now
+ reg_data = Signal.like(self.pte_out)
+ m.d.comb += [
+ self.pte_out.eq(reg_data)
+ ]
+ with m.Else():
+ m.d.comb += [
+ self.pte_out.eq(0)
+ ]
+ # Miss Logic
+ with m.Else():
+ m.d.comb += [
+ self.hit.eq(0),
+ self.perm_valid.eq(0),
+ self.pte_out.eq(0)
+ ]
+ def write_l1(self, m, read_L1, write_L1):
+ """ writes to the L1 cache
+ """
+ # Memory_L1 Logic
+ m.d.comb += [
+ write_L1.en.eq(1),
+ write_L1.addr.eq(self.address_L1),
+ # The Cat places arguments from LSB -> MSB
+ write_L1.data.eq(Cat(self.pte_in, self.asid))
+ ]
+ # CAM_L1 Logic
+ m.d.comb += [
+ self.cam_L1.write_enable.eq(1),
+ self.cam_L1.data_in.eq(self.vma),
+ ]
+ def elaborate(self, platform):
+ m = Module()
+ # Add submodules
+ # Submodules for L1 Cache
+ m.d.submodules.cam_L1 = self.cam_L1
+ m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port()
+ m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port()
+ # Permission Validator Submodule
+ m.d.submodules.perm_valididator = self.perm_validator
+ # When MODE specifies translation
+ # TODO add in different bit length handling ie prefix 0s
+ tlb_enable = Signal(reset_less=True)
+ m.d.comb += tlb_enable.eq(self.mode != 0)
+ with m.If(tlb_enable):
+ m.d.comb += [
+ self.cam_L1.enable.eq(1)
+ ]
+ with m.Switch(self.command):
+ # Search
+ with m.Case("01"):
+ self.search(m, read_L1, write_L1)
+ # Write L1
+ # Expected that the miss will be handled in software
+ with m.Case("10"):
+ self.write_l1(m, read_L1, write_L1)
+ # TODO
+ #with m.Case("11"):
+ # When disabled
+ with m.Else():
+ m.d.comb += [
+ self.cam_L1.enable.eq(0),
+ # XXX TODO - self.reg_file.enable.eq(0),
+ self.hit.eq(0),
+ self.perm_valid.eq(0), # XXX TODO, check this
+ self.pte_out.eq(0)
+ ]
+ return m
+if __name__ == '__main__':
+ tlb = TLB(15, 36, 64, 4)
+ main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command,
+ tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
+ tlb.vma, tlb.pte_in,
+ tlb.hit, tlb.perm_valid, tlb.pte_out,
+ ] + tlb.cam_L1.ports())
--- /dev/null
+#include <cstdint>
+#include <iostream>
+#include <cmath>
+#define NWAY 4
+#define NLINE 256
+#define HIT 0
+#define MISS 1
+#define MS 1000
+Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
+Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
+four-way set associative - three bits
+ each bit represents one branch point in a binary decision tree; let 1
+ represent that the left side has been referenced more recently than the
+ right side, and 0 vice-versa
+ are all 4 lines valid?
+ / \
+ yes no, use an invalid line
+ |
+ |
+ |
+ bit_0 == 0? state | replace ref to | next state
+ / \ ------+-------- -------+-----------
+ y n 00x | line_0 line_0 | 11_
+ / \ 01x | line_1 line_1 | 10_
+ bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
+ / \ / \ 1x1 | line_3 line_3 | 0_0
+ y n y n
+ / \ / \ ('x' means ('_' means unchanged)
+ line_0 line_1 line_2 line_3 don't care)
+ 8-way set associative - 7 = 1+2+4 bits
+16-way set associative - 15 = 1+2+4+8 bits
+32-way set associative - 31 = 1+2+4+8+16 bits
+64-way set associative - 63 = 1+2+4+8+16+32 bits
+using namespace std;
+struct AddressField {
+ uint64_t wd_idx : 2;//Unused
+ uint64_t offset : 4;//Unused
+ uint64_t index : 8;//NLINE = 256 = 2^8
+ uint64_t tag : 50;
+union Address {
+ uint32_t* p;
+ AddressField fields;
+struct Cell {
+ bool v;
+ uint64_t tag;
+ Cell() : v(false), tag(0) {}
+ bool isHit(uint64_t tag) {
+ return v && (tag == this->tag);
+ }
+ void fetch(uint32_t* address) {
+ Address addr;
+ addr.p = address;
+ addr.fields.offset = 0;
+ addr.fields.wd_idx = 0;
+ tag = addr.fields.tag;
+ v = true;
+ }
+ostream& operator<<(ostream & out, const Cell& cell) {
+ out << " v:" << cell.v << " tag:" << hex << cell.tag;
+ return out;
+struct Block {
+ Cell cell[NWAY];
+ uint32_t state;
+ uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
+ uint64_t *value;
+ uint64_t *next_value;
+ Block() : state(0) {
+ switch (NWAY) {
+ case 4:
+ mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
+ value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
+ next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
+ break;
+ case 8:
+ mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
+ 0b1010001};
+ value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
+ 0b1010001};
+ next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
+ 0b0000001, 0b0000000};
+ break;
+ //TODO - more NWAY goes here.
+ default:
+ std::cout << "Error definition NWAY = " << NWAY << std::endl;
+ }
+ }
+ uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
+ for (int i = 0; i < NWAY; ++i) {
+ if (cell[i].isHit(tag)) {
+ *pway = i;
+ return pway;
+ }
+ }
+ return NULL;
+ }
+ void setLRU(uint32_t *address) {
+ int way = 0;
+ uint32_t st = state;
+ for (int i = 0; i < NWAY; ++i) {
+ if ((state & mask[i]) == value[i]) {
+ state ^= mask[i];
+ way = i;
+ break;
+ }
+ }
+ cell[way].fetch(address);
+ cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
+ }
+ uint32_t *get(uint32_t *address, uint32_t *pway) {
+ Address addr;
+ addr.p = address;
+ uint32_t *d = getByTag(addr.fields.tag, pway);
+ if (d != NULL) {
+ return &d[addr.fields.offset];
+ }
+ return d;
+ }
+ int set(uint32_t *address) {
+ uint32_t way = 0;
+ uint32_t *p = get(address, &way);
+ if (p != NULL) {
+ printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
+ state &= ~mask[way];
+ printf("%X --> ", state);
+ state |= next_value[way];
+ printf("%X\n", state);
+ // *p = *address; //skip since address is fake.
+ return HIT;
+ } else {
+ setLRU(address);
+ return MISS;
+ }
+ }
+ostream& operator<<(ostream & out, const Block& block) {
+ out << "state:" << block.state << " ";
+ for (int i = 0; i<NWAY; i++) {
+ out << block.cell[i];
+ }
+ return out;
+struct Cache {
+ Block block[NLINE];
+ uint32_t count[2];
+ Cache() { count[HIT] = 0; count[MISS] = 0; }
+ void access(uint32_t* address) {
+ Address addr;
+ addr.p = address;
+ Block& b = block[addr.fields.index];
+ ++count[b.set(address)];
+ }
+ostream& operator<<(ostream & out, const Cache& cache) {
+ out << "\n==Summary==\n\tHit: " << cache.count[HIT] << " Miss: " << cache.count[MISS] << std::endl;
+ for (int i = 0; i < NLINE; i++) {
+ out << cache.block[i] << endl;
+ }
+ return out;
+Cache cache;
+void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
+ int x, i, j;
+ for (i = 0; i < MS; i++) {
+ for (j = 0; j < MS; j++) {
+ cache.access(res + i*MS +j);
+ for (x = 0; x < MS; x++) {
+ cache.access(m1 + i*MS + x);
+ cache.access(m2 + x*MS + j);
+ cache.access(res + i*MS +j);
+ // res[i][j] += m1[i][x] * m2[x][j];
+ cache.access(res + i*MS +j);
+ }
+ }
+ }
+int main()
+ uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL; // fake virtual address; don’t access it
+ uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL; // fake virtual address; don’t access it
+ uint32_t* res = (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
+ multiply(m1, m2, res);
+ cout << cache << endl;
+ return 0;
--- /dev/null
+two-way set associative - one bit
+ indicates which line of the two has been reference more recently
+four-way set associative - three bits
+ each bit represents one branch point in a binary decision tree; let 1
+ represent that the left side has been referenced more recently than the
+ right side, and 0 vice-versa
+ are all 4 lines valid?
+ / \
+ yes no, use an invalid line
+ |
+ |
+ |
+ bit_0 == 0? state | replace ref to | next state
+ / \ ------+-------- -------+-----------
+ y n 00x | line_0 line_0 | 11_
+ / \ 01x | line_1 line_1 | 10_
+ bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
+ / \ / \ 1x1 | line_3 line_3 | 0_0
+ y n y n
+ / \ / \ ('x' means ('_' means unchanged)
+ line_0 line_1 line_2 line_3 don't care)
+ (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
+ Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
+note that there is a 6-bit encoding for true LRU for four-way set associative
+ bit 0: bank[1] more recently used than bank[0]
+ bit 1: bank[2] more recently used than bank[0]
+ bit 2: bank[2] more recently used than bank[1]
+ bit 3: bank[3] more recently used than bank[0]
+ bit 4: bank[3] more recently used than bank[1]
+ bit 5: bank[3] more recently used than bank[2]
+ this results in 24 valid bit patterns within the 64 possible bit patterns
+ (4! possible valid traces for bank references)
+ e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
+ you can implement a state machine with a 256x6 ROM (6-bit state encoding
+ appended with a 2-bit bank reference input will yield a new 6-bit state),
+ and you can implement an LRU bank indicator with a 64x2 ROM
--- /dev/null
+from nmigen import Const
+INSTR_ACCESS_FAULT = Const(1, 64)
+ILLEGAL_INSTR = Const(2, 64)
+BREAKPOINT = Const(3, 64)
+LD_ADDR_MISALIGNED = Const(4, 64)
+LD_ACCESS_FAULT = Const(5, 64)
+ST_ADDR_MISALIGNED = Const(6, 64)
+ST_ACCESS_FAULT = Const(7, 64)
+ENV_CALL_UMODE = Const(8, 64) # environment call from user mode
+ENV_CALL_SMODE = Const(9, 64) # environment call from supervisor mode
+ENV_CALL_MMODE = Const(11, 64) # environment call from machine mode
+INSTR_PAGE_FAULT = Const(12, 64) # Instruction page fault
+LOAD_PAGE_FAULT = Const(13, 64) # Load page fault
+STORE_PAGE_FAULT = Const(15, 64) # Store page fault
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+# Author: Florian Zaruba, ETH Zurich
+# Date: 19/04/2017
+# Description: Memory Management Unit for Ariane, contains TLB and
+# address translation unit. SV39 as defined in RISC-V
+# privilege specification 1.11-WIP
+import ariane_pkg::*;
+from nmigen import Const, Signal, Cat, Module, Mux
+from nmigen.cli import verilog, rtlil
+from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
+from tlb import TLB
+from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
+PRIV_LVL_M = Const(0b11, 2)
+PRIV_LVL_S = Const(0b01, 2)
+PRIV_LVL_U = Const(0b00, 2)
+class RVException:
+ def __init__(self):
+ self.cause = Signal(64) # cause of exception
+ self.tval = Signal(64) # more info of causing exception
+ # (e.g.: instruction causing it),
+ # address of LD/ST fault
+ self.valid = Signal()
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+ def __iter__(self):
+ yield self.cause
+ yield self.tval
+ yield self.valid
+ def ports(self):
+ return list(self)
+class ICacheReqI:
+ def __init__(self):
+ self.fetch_valid = Signal() # address translation valid
+ self.fetch_paddr = Signal(64) # physical address in
+ self.fetch_exception = RVException() # exception occurred during fetch
+ def __iter__(self):
+ yield self.fetch_valid
+ yield self.fetch_paddr
+ yield from self.fetch_exception
+ def ports(self):
+ return list(self)
+class ICacheReqO:
+ def __init__(self):
+ self.fetch_req = Signal() # address translation request
+ self.fetch_vaddr = Signal(64) # virtual address out
+ def __iter__(self):
+ yield self.fetch_req
+ yield self.fetch_vaddr
+ def ports(self):
+ return list(self)
+class MMU:
+ def __init__(self, instr_tlb_entries = 4,
+ data_tlb_entries = 4,
+ asid_width = 1):
+ self.instr_tlb_entries = instr_tlb_entries
+ self.data_tlb_entries = data_tlb_entries
+ self.asid_width = asid_width
+ self.flush_i = Signal()
+ self.enable_translation_i = Signal()
+ self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
+ # IF interface
+ self.icache_areq_i = ICacheReqO()
+ self.icache_areq_o = ICacheReqI()
+ # LSU interface
+ # this is a more minimalistic interface because the actual addressing
+ # logic is handled in the LSU as we distinguish load and stores,
+ # what we do here is simple address translation
+ self.misaligned_ex_i = RVException()
+ self.lsu_req_i = Signal() # request address translation
+ self.lsu_vaddr_i = Signal(64) # virtual address in
+ self.lsu_is_store_i = Signal() # the translation is requested by a store
+ # if we need to walk the page table we can't grant in the same cycle
+ # Cycle 0
+ self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
+ # if translation hits in the DTLB
+ # Cycle 1
+ self.lsu_valid_o = Signal() # translation is valid
+ self.lsu_paddr_o = Signal(64) # translated address
+ self.lsu_exception_o = RVException() # addr translate threw exception
+ # General control signals
+ self.priv_lvl_i = Signal(2)
+ self.ld_st_priv_lvl_i = Signal(2)
+ self.sum_i = Signal()
+ self.mxr_i = Signal()
+ # input logic flag_mprv_i,
+ self.satp_ppn_i = Signal(44)
+ self.asid_i = Signal(self.asid_width)
+ self.flush_tlb_i = Signal()
+ # Performance counters
+ self.itlb_miss_o = Signal()
+ self.dtlb_miss_o = Signal()
+ # PTW memory interface
+ self.req_port_i = DCacheReqO()
+ self.req_port_o = DCacheReqI()
+ def elaborate(self, platform):
+ m = Module()
+ iaccess_err = Signal() # insufficient priv to access instr page
+ daccess_err = Signal() # insufficient priv to access data page
+ ptw_active = Signal() # PTW is currently walking a page table
+ walking_instr = Signal() # PTW is walking because of an ITLB miss
+ ptw_error = Signal() # PTW threw an exception
+ update_vaddr = Signal(39)
+ uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
+ update_ptw_itlb = TLBUpdate(self.asid_width)
+ update_ptw_dtlb = TLBUpdate(self.asid_width)
+ itlb_lu_access = Signal()
+ itlb_content = PTE()
+ itlb_is_2M = Signal()
+ itlb_is_1G = Signal()
+ itlb_lu_hit = Signal()
+ dtlb_lu_access = Signal()
+ dtlb_content = PTE()
+ dtlb_is_2M = Signal()
+ dtlb_is_1G = Signal()
+ dtlb_lu_hit = Signal()
+ # Assignments
+ m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
+ dtlb_lu_access.eq(self.lsu_req_i)
+ ]
+ # ITLB
+ m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
+ self.asid_width)
+ m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
+ i_tlb.update_i.eq(update_ptw_itlb),
+ i_tlb.lu_access_i.eq(itlb_lu_access),
+ i_tlb.lu_asid_i.eq(self.asid_i),
+ i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+ itlb_content.eq(i_tlb.lu_content_o),
+ itlb_is_2M.eq(i_tlb.lu_is_2M_o),
+ itlb_is_1G.eq(i_tlb.lu_is_1G_o),
+ itlb_lu_hit.eq(i_tlb.lu_hit_o),
+ ]
+ # DTLB
+ m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
+ self.asid_width)
+ m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
+ d_tlb.update_i.eq(update_ptw_dtlb),
+ d_tlb.lu_access_i.eq(dtlb_lu_access),
+ d_tlb.lu_asid_i.eq(self.asid_i),
+ d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
+ dtlb_content.eq(d_tlb.lu_content_o),
+ dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
+ dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
+ dtlb_lu_hit.eq(d_tlb.lu_hit_o),
+ ]
+ # PTW
+ m.submodules.ptw = ptw = PTW(self.asid_width)
+ m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
+ walking_instr.eq(ptw.walking_instr_o),
+ ptw_error.eq(ptw.ptw_error_o),
+ ptw.enable_translation_i.eq(self.enable_translation_i),
+ update_vaddr.eq(ptw.update_vaddr_o),
+ update_ptw_itlb.eq(ptw.itlb_update_o),
+ update_ptw_dtlb.eq(ptw.dtlb_update_o),
+ ptw.itlb_access_i.eq(itlb_lu_access),
+ ptw.itlb_hit_i.eq(itlb_lu_hit),
+ ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+ ptw.dtlb_access_i.eq(dtlb_lu_access),
+ ptw.dtlb_hit_i.eq(dtlb_lu_hit),
+ ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
+ ptw.req_port_i.eq(self.req_port_i),
+ self.req_port_o.eq(ptw.req_port_o),
+ ]
+ # ila_1 i_ila_1 (
+ # .clk(clk_i), # input wire clk
+ # .probe0({req_port_o.address_tag, req_port_o.address_index}),
+ # .probe1(req_port_o.data_req), # input wire [63:0] probe1
+ # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2
+ # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3
+ # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4
+ # .probe5(ptw_error), # input wire [1:0] probe5
+ # .probe6(update_vaddr), # input wire [0:0] probe6
+ # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7
+ # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8
+ # .probe9(dtlb_lu_access), # input wire [0:0] probe9
+ # .probe10(lsu_vaddr_i), # input wire [0:0] probe10
+ # .probe11(dtlb_lu_hit), # input wire [0:0] probe11
+ # .probe12(itlb_lu_access), # input wire [0:0] probe12
+ # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13
+ # .probe14(itlb_lu_hit) # input wire [0:0] probe13
+ # );
+ #-----------------------
+ # Instruction Interface
+ #-----------------------
+ # The instruction interface is a simple request response interface
+ # MMU disabled: just pass through
+ m.d.comb += [self.icache_areq_o.fetch_valid.eq(
+ self.icache_areq_i.fetch_req),
+ # play through in case we disabled address translation
+ self.icache_areq_o.fetch_paddr.eq(
+ self.icache_areq_i.fetch_vaddr)
+ ]
+ # two potential exception sources:
+ # 1. HPTW threw an exception -> signal with a page fault exception
+ # 2. We got an access error because of insufficient permissions ->
+ # throw an access exception
+ m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
+ # Check whether we are allowed to access this memory region
+ # from a fetch perspective
+ # XXX TODO: use PermissionValidator instead [we like modules]
+ m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
+ (((self.priv_lvl_i == PRIV_LVL_U) & \
+ ~itlb_content.u) | \
+ ((self.priv_lvl_i == PRIV_LVL_S) & \
+ itlb_content.u)))
+ # MMU enabled: address from TLB, request delayed until hit.
+ # Error when TLB hit and no access right or TLB hit and
+ # translated address not valid (e.g. AXI decode error),
+ # or when PTW performs walk due to ITLB miss and raises
+ # an error.
+ with m.If (self.enable_translation_i):
+ # we work with SV39, so if VM is enabled, check that
+ # all bits [63:38] are equal
+ with m.If (self.icache_areq_i.fetch_req & \
+ ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \
+ (self.icache_areq_i.fetch_vaddr[38:64]) == 0)):
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+ fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+ fe.valid.eq(1)
+ ]
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
+ # 4K page
+ paddr = Signal.like(self.icache_areq_o.fetch_paddr)
+ paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
+ itlb_content.ppn)
+ m.d.comb += paddr.eq(paddr4k)
+ # Mega page
+ with m.If(itlb_is_2M):
+ m.d.comb += paddr[12:21].eq(
+ self.icache_areq_i.fetch_vaddr[12:21])
+ # Giga page
+ with m.If(itlb_is_1G):
+ m.d.comb += paddr[12:30].eq(
+ self.icache_areq_i.fetch_vaddr[12:30])
+ m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
+ # ---------
+ # ITLB Hit
+ # --------
+ # if we hit the ITLB output the request signal immediately
+ with m.If(itlb_lu_hit):
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(
+ self.icache_areq_i.fetch_req)
+ # we got an access error
+ with m.If (iaccess_err):
+ # throw a page fault
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+ fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+ fe.valid.eq(1)
+ ]
+ # ---------
+ # ITLB Miss
+ # ---------
+ # watch out for exceptions happening during walking the page table
+ with m.Elif(ptw_active & walking_instr):
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
+ fe.tval.eq(uaddr64),
+ fe.valid.eq(1)
+ ]
+ #-----------------------
+ # Data Interface
+ #-----------------------
+ lsu_vaddr = Signal(64)
+ dtlb_pte = PTE()
+ misaligned_ex = RVException()
+ lsu_req = Signal()
+ lsu_is_store = Signal()
+ dtlb_hit = Signal()
+ dtlb_is_2M = Signal()
+ dtlb_is_1G = Signal()
+ # check if we need to do translation or if we are always
+ # ready (e.g.: we are not translating anything)
+ m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
+ dtlb_lu_hit, 1))
+ # The data interface is simpler and only consists of a
+ # request/response interface
+ m.d.comb += [
+ # save request and DTLB response
+ lsu_vaddr.eq(self.lsu_vaddr_i),
+ lsu_req.eq(self.lsu_req_i),
+ misaligned_ex.eq(self.misaligned_ex_i),
+ dtlb_pte.eq(dtlb_content),
+ dtlb_hit.eq(dtlb_lu_hit),
+ lsu_is_store.eq(self.lsu_is_store_i),
+ dtlb_is_2M.eq(dtlb_is_2M),
+ dtlb_is_1G.eq(dtlb_is_1G),
+ ]
+ m.d.sync += [
+ self.lsu_paddr_o.eq(lsu_vaddr),
+ self.lsu_valid_o.eq(lsu_req),
+ self.lsu_exception_o.eq(misaligned_ex),
+ ]
+ sverr = Signal()
+ usrerr = Signal()
+ m.d.comb += [
+ # mute misaligned exceptions if there is no request
+ # otherwise they will throw accidental exceptions
+ misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
+ # SUM is not set and we are trying to access a user
+ # page in supervisor mode
+ sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
+ dtlb_pte.u),
+ # this is not a user page but we are in user mode and
+ # trying to access it
+ usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
+ # Check if the User flag is set, then we may only
+ # access it in supervisor mode if SUM is enabled
+ daccess_err.eq(sverr | usrerr),
+ ]
+ # translation is enabled and no misaligned exception occurred
+ with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
+ m.d.comb += lsu_req.eq(0)
+ # 4K page
+ paddr = Signal.like(lsu_vaddr)
+ paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
+ m.d.comb += paddr.eq(paddr4k)
+ # Mega page
+ with m.If(dtlb_is_2M):
+ m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
+ # Giga page
+ with m.If(dtlb_is_1G):
+ m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
+ m.d.sync += self.lsu_paddr_o.eq(paddr)
+ # ---------
+ # DTLB Hit
+ # --------
+ with m.If(dtlb_hit & lsu_req):
+ m.d.comb += lsu_req.eq(1)
+ # this is a store
+ with m.If (lsu_is_store):
+ # check if the page is write-able and
+ # we are not violating privileges
+ # also check if the dirty flag is set
+ with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+ le.tval.eq(lsu_vaddr),
+ le.valid.eq(1)
+ ]
+ # this is a load, check for sufficient access
+ # privileges - throw a page fault if necessary
+ with m.Elif(daccess_err):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+ le.tval.eq(lsu_vaddr),
+ le.valid.eq(1)
+ ]
+ # ---------
+ # DTLB Miss
+ # ---------
+ # watch out for exceptions
+ with m.Elif (ptw_active & ~walking_instr):
+ # page table walker threw an exception
+ with m.If (ptw_error):
+ # an error makes the translation valid
+ m.d.comb += lsu_req.eq(1)
+ # the page table walker can only throw page faults
+ with m.If (lsu_is_store):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+ le.tval.eq(uaddr64),
+ le.valid.eq(1)
+ ]
+ with m.Else():
+ m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+ le.tval.eq(uaddr64),
+ le.valid.eq(1)
+ ]
+ return m
+ def ports(self):
+ return [self.flush_i, self.enable_translation_i,
+ self.en_ld_st_translation_i,
+ self.lsu_req_i,
+ self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
+ self.lsu_valid_o, self.lsu_paddr_o,
+ self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
+ self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
+ self.itlb_miss_o, self.dtlb_miss_o] + \
+ self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
+ self.req_port_i.ports() + self.req_port_o.ports() + \
+ self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
+if __name__ == '__main__':
+ mmu = MMU()
+ vl = rtlil.convert(mmu, ports=mmu.ports())
+ with open("test_mmu.il", "w") as f:
+ f.write(vl)
--- /dev/null
+from nmigen import Signal, Module, Cat, Const
+from nmigen.hdl.ir import Elaboratable
+from math import log2
+from ptw import TLBUpdate, PTE, ASID_WIDTH
+class PLRU(Elaboratable):
+ """ PLRU - Pseudo Least Recently Used Replacement
+ PLRU-tree indexing:
+ lvl0 0
+ / \
+ / \
+ lvl1 1 2
+ / \ / \
+ lvl2 3 4 5 6
+ / \ /\/\ /\
+ ... ... ... ...
+ """
+ def __init__(self, entries):
+ self.entries = entries
+ self.lu_hit = Signal(entries)
+ self.replace_en_o = Signal(entries)
+ self.lu_access_i = Signal()
+ # Tree (bit per entry)
+ self.TLBSZ = 2*(self.entries-1)
+ self.plru_tree = Signal(self.TLBSZ)
+ self.plru_tree_o = Signal(self.TLBSZ)
+ def elaborate(self, platform=None):
+ m = Module()
+ # Just predefine which nodes will be set/cleared
+ # E.g. for a TLB with 8 entries, the for-loop is semantically
+ # equivalent to the following pseudo-code:
+ # unique case (1'b1)
+ # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1};
+ # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0};
+ # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1};
+ # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0};
+ # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1};
+ # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0};
+ # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1};
+ # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0};
+ # default: begin /* No hit */ end
+ # endcase
+ LOG_TLB = int(log2(self.entries))
+ print(LOG_TLB)
+ for i in range(self.entries):
+ # we got a hit so update the pointer as it was least recently used
+ hit = Signal(reset_less=True)
+ m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i)
+ with m.If(hit):
+ # Set the nodes to the values we would expect
+ for lvl in range(LOG_TLB):
+ idx_base = (1<<lvl)-1
+ # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+ shift = LOG_TLB - lvl;
+ new_idx = Const(~((i >> (shift-1)) & 1), (1, False))
+ plru_idx = idx_base + (i >> shift)
+ print ("plru", i, lvl, hex(idx_base),
+ plru_idx, shift, new_idx)
+ m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx)
+ # Decode tree to write enable signals
+ # Next for-loop basically creates the following logic for e.g.
+ # an 8 entry TLB (note: pseudo-code obviously):
+ # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1}
+ # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0}
+ # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1}
+ # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0}
+ # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1}
+ # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0}
+ # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1}
+ # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0}
+ # For each entry traverse the tree. If every tree-node matches
+ # the corresponding bit of the entry's index, this is
+ # the next entry to replace.
+ replace = []
+ for i in range(self.entries):
+ en = []
+ for lvl in range(LOG_TLB):
+ idx_base = (1<<lvl)-1
+ # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+ shift = LOG_TLB - lvl;
+ new_idx = (i >> (shift-1)) & 1;
+ plru_idx = idx_base + (i>>shift)
+ plru = Signal(reset_less=True,
+ name="plru-%d-%d-%d" % (i, lvl, plru_idx))
+ m.d.comb += plru.eq(self.plru_tree[plru_idx])
+ # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx;
+ if new_idx:
+ en.append(~plru) # yes inverted (using bool())
+ else:
+ en.append(plru) # yes inverted (using bool())
+ print ("plru", i, en)
+ # boolean logic manipulation:
+ # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
+ replace.append(~Cat(*en).bool())
+ m.d.comb += self.replace_en_o.eq(Cat(*replace))
+ return m
+ def ports(self):
+ return [self.entries, self.lu_hit, self.replace_en_o,
+ self.lu_access_i, self.plru_tree, self.plru_tree_o]
\ No newline at end of file
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 24.4.2017
+# Description: Hardware-PTW
+/* verilator lint_off WIDTH */
+import ariane_pkg::*;
+see linux kernel source:
+* "arch/riscv/include/asm/page.h"
+* "arch/riscv/include/asm/mmu_context.h"
+* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
+from nmigen import Const, Signal, Cat, Module
+from nmigen.hdl.ast import ArrayProxy
+from nmigen.cli import verilog, rtlil
+from math import log2
+CONFIG_L1D_SIZE = 32*1024
+class DCacheReqI:
+ def __init__(self):
+ self.address_index = Signal(DCACHE_INDEX_WIDTH)
+ self.address_tag = Signal(DCACHE_TAG_WIDTH)
+ self.data_wdata = Signal(64)
+ self.data_req = Signal()
+ self.data_we = Signal()
+ self.data_be = Signal(8)
+ self.data_size = Signal(2)
+ self.kill_req = Signal()
+ self.tag_valid = Signal()
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+ def ports(self):
+ return [self.address_index, self.address_tag,
+ self.data_wdata, self.data_req,
+ self.data_we, self.data_be, self.data_size,
+ self.kill_req, self.tag_valid,
+ ]
+class DCacheReqO:
+ def __init__(self):
+ self.data_gnt = Signal()
+ self.data_rvalid = Signal()
+ self.data_rdata = Signal(64) # actually in PTE object format
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+ def ports(self):
+ return [self.data_gnt, self.data_rvalid, self.data_rdata]
+class PTE: #(RecordObject):
+ def __init__(self):
+ self.v = Signal()
+ self.r = Signal()
+ self.w = Signal()
+ self.x = Signal()
+ self.u = Signal()
+ self.g = Signal()
+ self.a = Signal()
+ self.d = Signal()
+ self.rsw = Signal(2)
+ self.ppn = Signal(44)
+ self.reserved = Signal(10)
+ def flatten(self):
+ return Cat(*self.ports())
+ def eq(self, x):
+ if isinstance(x, ArrayProxy):
+ res = []
+ for o in self.ports():
+ i = getattr(x, o.name)
+ res.append(i)
+ x = Cat(*res)
+ else:
+ x = x.flatten()
+ return self.flatten().eq(x)
+ def __iter__(self):
+ """ order is critical so that flatten creates LSB to MSB
+ """
+ yield self.v
+ yield self.r
+ yield self.w
+ yield self.x
+ yield self.u
+ yield self.g
+ yield self.a
+ yield self.d
+ yield self.rsw
+ yield self.ppn
+ yield self.reserved
+ def ports(self):
+ return list(self)
+class TLBUpdate:
+ def __init__(self, asid_width):
+ self.valid = Signal() # valid flag
+ self.is_2M = Signal()
+ self.is_1G = Signal()
+ self.vpn = Signal(27)
+ self.asid = Signal(asid_width)
+ self.content = PTE()
+ def flatten(self):
+ return Cat(*self.ports())
+ def eq(self, x):
+ return self.flatten().eq(x.flatten())
+ def ports(self):
+ return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
+ self.content.ports()
+# SV39 defines three levels of page tables
+LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
+LVL2 = Const(1, 2)
+LVL3 = Const(2, 2)
+class PTW:
+ def __init__(self, asid_width=8):
+ self.asid_width = asid_width
+ self.flush_i = Signal() # flush everything, we need to do this because
+ # actually everything we do is speculative at this stage
+ # e.g.: there could be a CSR instruction that changes everything
+ self.ptw_active_o = Signal(reset=1) # active if not IDLE
+ self.walking_instr_o = Signal() # set when walking for TLB
+ self.ptw_error_o = Signal() # set when an error occurred
+ self.enable_translation_i = Signal() # CSRs indicate to enable SV39
+ self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
+ self.lsu_is_store_i = Signal() # translation triggered by store
+ # PTW memory interface
+ self.req_port_i = DCacheReqO()
+ self.req_port_o = DCacheReqI()
+ # to TLBs, update logic
+ self.itlb_update_o = TLBUpdate(asid_width)
+ self.dtlb_update_o = TLBUpdate(asid_width)
+ self.update_vaddr_o = Signal(39)
+ self.asid_i = Signal(self.asid_width)
+ # from TLBs
+ # did we miss?
+ self.itlb_access_i = Signal()
+ self.itlb_hit_i = Signal()
+ self.itlb_vaddr_i = Signal(64)
+ self.dtlb_access_i = Signal()
+ self.dtlb_hit_i = Signal()
+ self.dtlb_vaddr_i = Signal(64)
+ # from CSR file
+ self.satp_ppn_i = Signal(44) # ppn from satp
+ self.mxr_i = Signal()
+ # Performance counters
+ self.itlb_miss_o = Signal()
+ self.dtlb_miss_o = Signal()
+ def ports(self):
+ return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
+ ]
+ return [
+ self.enable_translation_i, self.en_ld_st_translation_i,
+ self.lsu_is_store_i, self.req_port_i, self.req_port_o,
+ self.update_vaddr_o,
+ self.asid_i,
+ self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
+ self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
+ self.satp_ppn_i, self.mxr_i,
+ self.itlb_miss_o, self.dtlb_miss_o
+ ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
+ def elaborate(self, platform):
+ m = Module()
+ # input registers
+ data_rvalid = Signal()
+ data_rdata = Signal(64)
+ # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
+ # is spec'd in 64-bit binary-format: better to spec as Record?
+ pte = PTE()
+ m.d.comb += pte.flatten().eq(data_rdata)
+ # SV39 defines three levels of page tables
+ ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
+ ptw_lvl1 = Signal()
+ ptw_lvl2 = Signal()
+ ptw_lvl3 = Signal()
+ m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
+ ptw_lvl2.eq(ptw_lvl == LVL2),
+ ptw_lvl3.eq(ptw_lvl == LVL3)]
+ # is this an instruction page table walk?
+ is_instr_ptw = Signal()
+ global_mapping = Signal()
+ # latched tag signal
+ tag_valid = Signal()
+ # register the ASID
+ tlb_update_asid = Signal(self.asid_width)
+ # register VPN we need to walk, SV39 defines a 39 bit virtual addr
+ vaddr = Signal(64)
+ # 4 byte aligned physical pointer
+ ptw_pptr = Signal(56)
+ m.d.sync += [
+ # Assignments
+ self.update_vaddr_o.eq(vaddr),
+ self.walking_instr_o.eq(is_instr_ptw),
+ # directly output the correct physical address
+ self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
+ self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
+ # we are never going to kill this request
+ self.req_port_o.kill_req.eq(0), # XXX assign comb?
+ # we are never going to write with the HPTW
+ self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
+ # -----------
+ # TLB Update
+ # -----------
+ self.itlb_update_o.vpn.eq(vaddr[12:39]),
+ self.dtlb_update_o.vpn.eq(vaddr[12:39]),
+ # update the correct page table level
+ self.itlb_update_o.is_2M.eq(ptw_lvl2),
+ self.itlb_update_o.is_1G.eq(ptw_lvl1),
+ self.dtlb_update_o.is_2M.eq(ptw_lvl2),
+ self.dtlb_update_o.is_1G.eq(ptw_lvl1),
+ # output the correct ASID
+ self.itlb_update_o.asid.eq(tlb_update_asid),
+ self.dtlb_update_o.asid.eq(tlb_update_asid),
+ # set the global mapping bit
+ self.itlb_update_o.content.eq(pte),
+ self.itlb_update_o.content.g.eq(global_mapping),
+ self.dtlb_update_o.content.eq(pte),
+ self.dtlb_update_o.content.g.eq(global_mapping),
+ self.req_port_o.tag_valid.eq(tag_valid),
+ ]
+ #-------------------
+ # Page table walker
+ #-------------------
+ # A virtual address va is translated into a physical address pa as
+ # follows:
+ # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
+ # PAGESIZE=2^12 and LEVELS=3.)
+ # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
+ # (For Sv32, PTESIZE=4.)
+ # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
+ # access exception.
+ # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
+ # step 5. Otherwise, this PTE is a pointer to the next level of
+ # the page table.
+ # Let i=i-1. If i < 0, stop and raise an access exception.
+ # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2.
+ # 5. A leaf PTE has been found. Determine if the requested memory
+ # access is allowed by the pte.r, pte.w, and pte.x bits. If not,
+ # stop and raise an access exception. Otherwise, the translation is
+ # successful. Set pte.a to 1, and, if the memory access is a
+ # store, set pte.d to 1.
+ # The translated physical address is given as follows:
+ # - pa.pgoff = va.pgoff.
+ # - If i > 0, then this is a superpage translation and
+ # pa.ppn[i-1:0] = va.vpn[i-1:0].
+ # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
+ # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned
+ # superpage stop and raise a page-fault exception.
+ m.d.sync += tag_valid.eq(0)
+ # default assignments
+ m.d.comb += [
+ # PTW memory interface
+ self.req_port_o.data_req.eq(0),
+ self.req_port_o.data_be.eq(Const(0xFF, 8)),
+ self.req_port_o.data_size.eq(Const(0b11, 2)),
+ self.req_port_o.data_we.eq(0),
+ self.ptw_error_o.eq(0),
+ self.itlb_update_o.valid.eq(0),
+ self.dtlb_update_o.valid.eq(0),
+ self.itlb_miss_o.eq(0),
+ self.dtlb_miss_o.eq(0),
+ ]
+ # ------------
+ # State Machine
+ # ------------
+ with m.FSM() as fsm:
+ with m.State("IDLE"):
+ self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
+ ptw_pptr, vaddr, tlb_update_asid)
+ with m.State("WAIT_GRANT"):
+ self.grant(m, tag_valid, data_rvalid)
+ with m.State("PTE_LOOKUP"):
+ # we wait for the valid signal
+ with m.If(data_rvalid):
+ self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
+ data_rvalid, global_mapping,
+ is_instr_ptw, ptw_pptr)
+ # Propagate error to MMU/LSU
+ with m.State("PROPAGATE_ERROR"):
+ m.next = "IDLE"
+ m.d.comb += self.ptw_error_o.eq(1)
+ # wait for the rvalid before going back to IDLE
+ with m.State("WAIT_RVALID"):
+ with m.If(data_rvalid):
+ m.next = "IDLE"
+ m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
+ data_rvalid.eq(self.req_port_i.data_rvalid)
+ ]
+ return m
+ def set_grant_state(self, m):
+ # should we have flushed before we got an rvalid,
+ # wait for it until going back to IDLE
+ with m.If(self.flush_i):
+ with m.If (self.req_port_i.data_gnt):
+ m.next = "WAIT_RVALID"
+ with m.Else():
+ m.next = "IDLE"
+ with m.Else():
+ m.next = "WAIT_GRANT"
+ def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
+ ptw_pptr, vaddr, tlb_update_asid):
+ # by default we start with the top-most page table
+ m.d.sync += [is_instr_ptw.eq(0),
+ ptw_lvl.eq(LVL1),
+ global_mapping.eq(0),
+ self.ptw_active_o.eq(0), # deactive (IDLE)
+ ]
+ # work out itlb/dtlb miss
+ m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
+ self.itlb_access_i & \
+ ~self.itlb_hit_i & \
+ ~self.dtlb_access_i)
+ m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
+ self.dtlb_access_i & \
+ ~self.dtlb_hit_i)
+ # we got an ITLB miss?
+ with m.If(self.itlb_miss_o):
+ pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39],
+ self.satp_ppn_i)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ is_instr_ptw.eq(1),
+ vaddr.eq(self.itlb_vaddr_i),
+ tlb_update_asid.eq(self.asid_i),
+ ]
+ self.set_grant_state(m)
+ # we got a DTLB miss?
+ with m.Elif(self.dtlb_miss_o):
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39],
+ self.satp_ppn_i)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ vaddr.eq(self.dtlb_vaddr_i),
+ tlb_update_asid.eq(self.asid_i),
+ ]
+ self.set_grant_state(m)
+ def grant(self, m, tag_valid, data_rvalid):
+ # we've got a data WAIT_GRANT so tell the
+ # cache that the tag is valid
+ # send a request out
+ m.d.comb += self.req_port_o.data_req.eq(1)
+ # wait for the WAIT_GRANT
+ with m.If(self.req_port_i.data_gnt):
+ # send the tag valid signal one cycle later
+ m.d.sync += tag_valid.eq(1)
+ # should we have flushed before we got an rvalid,
+ # wait for it until going back to IDLE
+ with m.If(self.flush_i):
+ with m.If (~data_rvalid):
+ m.next = "WAIT_RVALID"
+ with m.Else():
+ m.next = "IDLE"
+ with m.Else():
+ m.next = "PTE_LOOKUP"
+ def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
+ data_rvalid, global_mapping,
+ is_instr_ptw, ptw_pptr):
+ # temporaries
+ pte_rx = Signal(reset_less=True)
+ pte_exe = Signal(reset_less=True)
+ pte_inv = Signal(reset_less=True)
+ pte_a = Signal(reset_less=True)
+ st_wd = Signal(reset_less=True)
+ m.d.comb += [pte_rx.eq(pte.r | pte.x),
+ pte_exe.eq(~pte.x | ~pte.a),
+ pte_inv.eq(~pte.v | (~pte.r & pte.w)),
+ pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
+ st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
+ l1err = Signal(reset_less=True)
+ l2err = Signal(reset_less=True)
+ m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)),
+ l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ]
+ # check if the global mapping bit is set
+ with m.If (pte.g):
+ m.d.sync += global_mapping.eq(1)
+ m.next = "IDLE"
+ # -------------
+ # Invalid PTE
+ # -------------
+ # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
+ # stop and raise a page-fault exception.
+ with m.If (pte_inv):
+ m.next = "PROPAGATE_ERROR"
+ # -----------
+ # Valid PTE
+ # -----------
+ # it is a valid PTE
+ # if pte.r = 1 or pte.x = 1 it is a valid PTE
+ with m.Elif (pte_rx):
+ # Valid translation found (either 1G, 2M or 4K)
+ with m.If(is_instr_ptw):
+ # ------------
+ # Update ITLB
+ # ------------
+ # If page not executable, we can directly raise error.
+ # This doesn't put a useless entry into the TLB.
+ # The same idea applies to the access flag since we let
+ # the access flag be managed by SW.
+ with m.If (pte_exe):
+ m.next = "IDLE"
+ with m.Else():
+ m.d.comb += self.itlb_update_o.valid.eq(1)
+ with m.Else():
+ # ------------
+ # Update DTLB
+ # ------------
+ # Check if the access flag has been set, otherwise
+ # throw page-fault and let software handle those bits.
+ # If page not readable (there are no write-only pages)
+ # directly raise an error. This doesn't put a useless
+ # entry into the TLB.
+ with m.If(pte_a):
+ m.d.comb += self.dtlb_update_o.valid.eq(1)
+ with m.Else():
+ m.next = "PROPAGATE_ERROR"
+ # Request is a store: perform additional checks
+ # If the request was a store and the page not
+ # write-able, raise an error
+ # the same applies if the dirty flag is not set
+ with m.If (st_wd):
+ m.d.comb += self.dtlb_update_o.valid.eq(0)
+ m.next = "PROPAGATE_ERROR"
+ # check if the ppn is correctly aligned: Case (6)
+ with m.If(l1err | l2err):
+ m.next = "PROPAGATE_ERROR"
+ m.d.comb += [self.dtlb_update_o.valid.eq(0),
+ self.itlb_update_o.valid.eq(0)]
+ # this is a pointer to the next TLB level
+ with m.Else():
+ # pointer to next level of page table
+ with m.If (ptw_lvl1):
+ # we are in the second level now
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL2)
+ ]
+ with m.If(ptw_lvl2):
+ # here we received a pointer to the third level
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL3)
+ ]
+ self.set_grant_state(m)
+ with m.If (ptw_lvl3):
+ # Should already be the last level
+ # page table => Error
+ m.d.sync += ptw_lvl.eq(LVL3)
+ m.next = "PROPAGATE_ERROR"
+if __name__ == '__main__':
+ ptw = PTW()
+ vl = rtlil.convert(ptw, ports=ptw.ports())
+ with open("test_ptw.il", "w") as f:
+ f.write(vl)
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 21.4.2017
+# Description: Translation Lookaside Buffer, SV39
+# fully set-associative
+Implementation in c++:
+Text description:
+Online simulator:
+from math import log2
+from nmigen import Signal, Module, Cat, Const, Array
+from nmigen.cli import verilog, rtlil
+from nmigen.lib.coding import Encoder
+from ptw import TLBUpdate, PTE, ASID_WIDTH
+from plru import PLRU
+from tlb_content import TLBContent
+class TLB:
+ def __init__(self, tlb_entries=8, asid_width=8):
+ self.tlb_entries = tlb_entries
+ self.asid_width = asid_width
+ self.flush_i = Signal() # Flush signal
+ # Lookup signals
+ self.lu_access_i = Signal()
+ self.lu_asid_i = Signal(self.asid_width)
+ self.lu_vaddr_i = Signal(64)
+ self.lu_content_o = PTE()
+ self.lu_is_2M_o = Signal()
+ self.lu_is_1G_o = Signal()
+ self.lu_hit_o = Signal()
+ # Update TLB
+ self.pte_width = len(self.lu_content_o.flatten())
+ self.update_i = TLBUpdate(asid_width)
+ def elaborate(self, platform):
+ m = Module()
+ vpn2 = Signal(9)
+ vpn1 = Signal(9)
+ vpn0 = Signal(9)
+ #-------------
+ # Translation
+ #-------------
+ # SV39 defines three levels of page tables
+ m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]),
+ vpn1.eq(self.lu_vaddr_i[21:30]),
+ vpn2.eq(self.lu_vaddr_i[30:39]),
+ ]
+ tc = []
+ for i in range(self.tlb_entries):
+ tlc = TLBContent(self.pte_width, self.asid_width)
+ setattr(m.submodules, "tc%d" % i, tlc)
+ tc.append(tlc)
+ # connect inputs
+ tlc.update_i = self.update_i # saves a lot of graphviz links
+ m.d.comb += [tlc.vpn0.eq(vpn0),
+ tlc.vpn1.eq(vpn1),
+ tlc.vpn2.eq(vpn2),
+ tlc.flush_i.eq(self.flush_i),
+ #tlc.update_i.eq(self.update_i),
+ tlc.lu_asid_i.eq(self.lu_asid_i)]
+ tc = Array(tc)
+ #--------------
+ # Select hit
+ #--------------
+ # use Encoder to select hit index
+ # XXX TODO: assert that there's only one valid entry (one lu_hit)
+ hitsel = Encoder(self.tlb_entries)
+ m.submodules.hitsel = hitsel
+ hits = []
+ for i in range(self.tlb_entries):
+ hits.append(tc[i].lu_hit_o)
+ m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
+ idx = hitsel.o
+ active = Signal(reset_less=True)
+ m.d.comb += active.eq(~hitsel.n)
+ with m.If(active):
+ # active hit, send selected as output
+ m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
+ self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
+ self.lu_hit_o.eq(1),
+ self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
+ ]
+ #--------------
+ # PLRU.
+ #--------------
+ p = PLRU(self.tlb_entries)
+ plru_tree = Signal(p.TLBSZ)
+ m.submodules.plru = p
+ # connect PLRU inputs/outputs
+ # XXX TODO: assert that there's only one valid entry (one replace_en)
+ en = []
+ for i in range(self.tlb_entries):
+ en.append(tc[i].replace_en_i)
+ m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
+ p.lu_hit.eq(hitsel.i),
+ p.lu_access_i.eq(self.lu_access_i),
+ p.plru_tree.eq(plru_tree)]
+ m.d.sync += plru_tree.eq(p.plru_tree_o)
+ #--------------
+ # Sanity checks
+ #--------------
+ assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
+ "TLB size must be a multiple of 2 and greater than 1"
+ assert (self.asid_width >= 1), \
+ "ASID width must be at least 1"
+ return m
+ """
+ # Just for checking
+ function int countSetBits(logic[self.tlb_entries-1:0] vector);
+ automatic int count = 0;
+ foreach (vector[idx]) begin
+ count += vector[idx];
+ end
+ return count;
+ endfunction
+ assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
+ else $error("More then one hit in TLB!"); $stop(); end
+ assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
+ else $error("More then one TLB entry selected for next replace!");
+ """
+ def ports(self):
+ return [self.flush_i, self.lu_access_i,
+ self.lu_asid_i, self.lu_vaddr_i,
+ self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
+ ] + self.lu_content_o.ports() + self.update_i.ports()
+if __name__ == '__main__':
+ tlb = TLB()
+ vl = rtlil.convert(tlb, ports=tlb.ports())
+ with open("test_tlb.il", "w") as f:
+ f.write(vl)
--- /dev/null
+from nmigen import Signal, Module, Cat, Const
+from ptw import TLBUpdate, PTE
+class TLBEntry:
+ def __init__(self, asid_width):
+ self.asid = Signal(asid_width)
+ # SV39 defines three levels of page tables
+ self.vpn0 = Signal(9)
+ self.vpn1 = Signal(9)
+ self.vpn2 = Signal(9)
+ self.is_2M = Signal()
+ self.is_1G = Signal()
+ self.valid = Signal()
+ def flatten(self):
+ return Cat(*self.ports())
+ def eq(self, x):
+ return self.flatten().eq(x.flatten())
+ def ports(self):
+ return [self.asid, self.vpn0, self.vpn1, self.vpn2,
+ self.is_2M, self.is_1G, self.valid]
+class TLBContent:
+ def __init__(self, pte_width, asid_width):
+ self.asid_width = asid_width
+ self.pte_width = pte_width
+ self.flush_i = Signal() # Flush signal
+ # Update TLB
+ self.update_i = TLBUpdate(asid_width)
+ self.vpn2 = Signal(9)
+ self.vpn1 = Signal(9)
+ self.vpn0 = Signal(9)
+ self.replace_en_i = Signal() # replace the following entry,
+ # set by replacement strategy
+ # Lookup signals
+ self.lu_asid_i = Signal(asid_width)
+ self.lu_content_o = Signal(pte_width)
+ self.lu_is_2M_o = Signal()
+ self.lu_is_1G_o = Signal()
+ self.lu_hit_o = Signal()
+ def elaborate(self, platform):
+ m = Module()
+ tags = TLBEntry(self.asid_width)
+ content = Signal(self.pte_width)
+ m.d.comb += [self.lu_hit_o.eq(0),
+ self.lu_is_2M_o.eq(0),
+ self.lu_is_1G_o.eq(0)]
+ # temporaries for 1st level match
+ asid_ok = Signal(reset_less=True)
+ vpn2_ok = Signal(reset_less=True)
+ tags_ok = Signal(reset_less=True)
+ vpn2_hit = Signal(reset_less=True)
+ m.d.comb += [tags_ok.eq(tags.valid),
+ asid_ok.eq(tags.asid == self.lu_asid_i),
+ vpn2_ok.eq(tags.vpn2 == self.vpn2),
+ vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)]
+ # temporaries for 2nd level match
+ vpn1_ok = Signal(reset_less=True)
+ tags_2M = Signal(reset_less=True)
+ vpn0_ok = Signal(reset_less=True)
+ vpn0_or_2M = Signal(reset_less=True)
+ m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1),
+ tags_2M.eq(tags.is_2M),
+ vpn0_ok.eq(self.vpn0 == tags.vpn0),
+ vpn0_or_2M.eq(tags_2M | vpn0_ok)]
+ # first level match, this may be a giga page,
+ # check the ASID flags as well
+ with m.If(vpn2_hit):
+ # second level
+ with m.If (tags.is_1G):
+ m.d.comb += [ self.lu_content_o.eq(content),
+ self.lu_is_1G_o.eq(1),
+ self.lu_hit_o.eq(1),
+ ]
+ # not a giga page hit so check further
+ with m.Elif(vpn1_ok):
+ # this could be a 2 mega page hit or a 4 kB hit
+ # output accordingly
+ with m.If(vpn0_or_2M):
+ m.d.comb += [ self.lu_content_o.eq(content),
+ self.lu_is_2M_o.eq(tags.is_2M),
+ self.lu_hit_o.eq(1),
+ ]
+ # ------------------
+ # Update or Flush
+ # ------------------
+ # temporaries
+ replace_valid = Signal(reset_less=True)
+ m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
+ # flush
+ with m.If (self.flush_i):
+ # invalidate (flush) conditions: all if zero or just this ASID
+ with m.If (self.lu_asid_i == Const(0, self.asid_width) |
+ (self.lu_asid_i == tags.asid)):
+ m.d.sync += tags.valid.eq(0)
+ # normal replacement
+ with m.Elif(replace_valid):
+ m.d.sync += [ # update tag array
+ tags.asid.eq(self.update_i.asid),
+ tags.vpn2.eq(self.update_i.vpn[18:27]),
+ tags.vpn1.eq(self.update_i.vpn[9:18]),
+ tags.vpn0.eq(self.update_i.vpn[0:9]),
+ tags.is_1G.eq(self.update_i.is_1G),
+ tags.is_2M.eq(self.update_i.is_2M),
+ tags.valid.eq(1),
+ # and content as well
+ content.eq(self.update_i.content.flatten())
+ ]
+ return m
+ def ports(self):
+ return [self.flush_i,
+ self.lu_asid_i,
+ self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
+ ] + self.update_i.content.ports() + self.update_i.ports()
--- /dev/null
+import sys
+from plru import PLRU
+from nmigen.compat.sim import run_simulation
+def testbench(dut):
+ yield
+if __name__ == "__main__":
+ dut = PLRU(4)
+ run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd")
+ print("PLRU Unit Test Success")
\ No newline at end of file
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from ptw import PTW, PTE
+def testbench(dut):
+ addr = 0x8000000
+ #pte = PTE()
+ #yield pte.v.eq(1)
+ #yield pte.r.eq(1)
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield dut.req_port_i.data_rvalid.eq(1)
+ yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten())
+ # data lookup
+ yield dut.en_ld_st_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000000)
+ yield
+ yield
+ yield
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x200000)
+ yield
+ yield
+ yield
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000011)
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+ # data lookup, PTW levels 1-2-3
+ addr = 0x4000000
+ yield dut.dtlb_vaddr_i.eq(addr)
+ yield dut.mxr_i.eq(0x1)
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield dut.req_port_i.data_rvalid.eq(1)
+ yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten())
+ yield dut.en_ld_st_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(addr)
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000011)
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+ yield
+ yield
+ # instruction lookup
+ yield dut.en_ld_st_translation_i.eq(0)
+ yield dut.enable_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x800000)
+ yield
+ yield
+ yield
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x200000)
+ yield
+ yield
+ yield
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x800011)
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+ yield
+if __name__ == "__main__":
+ dut = PTW()
+ run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd")
+ print("PTW Unit Test Success")
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from tlb import TLB
+def set_vaddr(addr):
+ yield dut.lu_vaddr_i.eq(addr)
+ yield dut.update_i.vpn.eq(addr>>12)
+def testbench(dut):
+ yield dut.lu_access_i.eq(1)
+ yield dut.lu_asid_i.eq(1)
+ yield dut.update_i.valid.eq(1)
+ yield dut.update_i.is_1G.eq(0)
+ yield dut.update_i.is_2M.eq(0)
+ yield dut.update_i.asid.eq(1)
+ yield dut.update_i.content.ppn.eq(0)
+ yield dut.update_i.content.rsw.eq(0)
+ yield dut.update_i.content.r.eq(1)
+ yield
+ addr = 0x80000
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x90001
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x28000000
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x28000001
+ yield from set_vaddr(addr)
+ addr = 0x28000001
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x1000040000
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x1000040001
+ yield from set_vaddr(addr)
+ yield
+ yield dut.update_i.is_1G.eq(1)
+ addr = 0x2040000
+ yield from set_vaddr(addr)
+ yield
+ yield dut.update_i.is_1G.eq(1)
+ addr = 0x2040001
+ yield from set_vaddr(addr)
+ yield
+ yield
+if __name__ == "__main__":
+ dut = TLB()
+ run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd")
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+import sys
+from LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3
+from nmigen.back.pysim import Simulator, Delay, Tick
+import unittest
+class TestLFSR(unittest.TestCase):
+ def test_poly(self):
+ v = LFSRPolynomial()
+ self.assertEqual(repr(v), "LFSRPolynomial([0])")
+ self.assertEqual(str(v), "1")
+ v = LFSRPolynomial([1])
+ self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
+ self.assertEqual(str(v), "x + 1")
+ v = LFSRPolynomial([0, 1])
+ self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
+ self.assertEqual(str(v), "x + 1")
+ v = LFSRPolynomial([1, 2])
+ self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])")
+ self.assertEqual(str(v), "x^2 + x + 1")
+ v = LFSRPolynomial([2])
+ self.assertEqual(repr(v), "LFSRPolynomial([2, 0])")
+ self.assertEqual(str(v), "x^2 + 1")
+ self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1")
+ def test_lfsr_3(self):
+ module = LFSR(LFSR_POLY_3)
+ traces = [module.state, module.enable]
+ with Simulator(module,
+ vcd_file=open("Waveforms/test_LFSR2.vcd", "w"),
+ gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"),
+ traces=traces) as sim:
+ sim.add_clock(1e-6, 0.25e-6)
+ delay = Delay(1e-7)
+ def async_process():
+ yield module.enable.eq(0)
+ yield Tick()
+ self.assertEqual((yield module.state), 0x1)
+ yield Tick()
+ self.assertEqual((yield module.state), 0x1)
+ yield module.enable.eq(1)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x2)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x5)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x3)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x7)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x6)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x4)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x1)
+ yield Tick()
+ sim.add_process(async_process)
+ sim.run()
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from AddressEncoder import AddressEncoder
+from test_helper import assert_eq, assert_ne, assert_op
+# This function allows for the easy setting of values to the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# i (Input): The array of single bits to be written
+def set_encoder(dut, i):
+ yield dut.i.eq(i)
+ yield
+# Checks the single match of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# sm (Single Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_single_match(dut, sm, op):
+ out_sm = yield dut.single_match
+ assert_op("Single Match", out_sm, sm, op)
+# Checks the multiple match of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# mm (Multiple Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_multiple_match(dut, mm, op):
+ out_mm = yield dut.multiple_match
+ assert_op("Multiple Match", out_mm, mm, op)
+# Checks the output of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# o (Output): The expected output
+# op (Operation): (0 => ==), (1 => !=)
+def check_output(dut, o, op):
+ out_o = yield dut.o
+ assert_op("Output", out_o, o, op)
+# Checks the state of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# sm (Single Match): The expected match result
+# mm (Multiple Match): The expected match result
+# o (Output): The expected output
+# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+def check_all(dut, sm, mm, o, sm_op, mm_op, o_op):
+ yield from check_single_match(dut, sm, sm_op)
+ yield from check_multiple_match(dut, mm, mm_op)
+ yield from check_output(dut, o, o_op)
+def testbench(dut):
+ # Check invalid input
+ in_val = 0b000
+ single_match = 0
+ multiple_match = 0
+ output = 0
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+ # Check single bit
+ in_val = 0b001
+ single_match = 1
+ multiple_match = 0
+ output = 0
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+ # Check another single bit
+ in_val = 0b100
+ single_match = 1
+ multiple_match = 0
+ output = 2
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+ # Check multiple match
+ # We expected the lowest bit to be returned which is address 0
+ in_val = 0b101
+ single_match = 0
+ multiple_match = 1
+ output = 0
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+ # Check another multiple match
+ # We expected the lowest bit to be returned which is address 1
+ in_val = 0b110
+ single_match = 0
+ multiple_match = 1
+ output = 1
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+if __name__ == "__main__":
+ dut = AddressEncoder(4)
+ run_simulation(dut, testbench(dut),
+ vcd_name="Waveforms/test_address_encoder.vcd")
+ print("AddressEncoder Unit Test Success")
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from Cam import Cam
+from test_helper import assert_eq, assert_ne, assert_op
+# This function allows for the easy setting of values to the Cam
+# Arguments:
+# dut: The Cam being tested
+# e (Enable): Whether the block is going to be enabled
+# we (Write Enable): Whether the Cam will write on the next cycle
+# a (Address): Where the data will be written if write enable is high
+# d (Data): Either what we are looking for or will write to the address
+def set_cam(dut, e, we, a, d):
+ yield dut.enable.eq(e)
+ yield dut.write_enable.eq(we)
+ yield dut.address_in.eq(a)
+ yield dut.data_in.eq(d)
+ yield
+# Checks the multiple match of the Cam
+# Arguments:
+# dut: The Cam being tested
+# mm (Multiple Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_multiple_match(dut, mm, op):
+ out_mm = yield dut.multiple_match
+ assert_op("Multiple Match", out_mm, mm, op)
+# Checks the single match of the Cam
+# Arguments:
+# dut: The Cam being tested
+# sm (Single Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_single_match(dut, sm, op):
+ out_sm = yield dut.single_match
+ assert_op("Single Match", out_sm, sm, op)
+# Checks the address output of the Cam
+# Arguments:
+# dut: The Cam being tested
+# ma (Match Address): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_match_address(dut, ma, op):
+ out_ma = yield dut.match_address
+ assert_op("Match Address", out_ma, ma, op)
+# Checks the state of the Cam
+# Arguments:
+# dut: The Cam being tested
+# sm (Single Match): The expected match result
+# mm (Multiple Match): The expected match result
+# ma: (Match Address): The expected address output
+# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=)
+def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op):
+ yield from check_multiple_match(dut, mm, mm_op)
+ yield from check_single_match(dut, sm, sm_op)
+ yield from check_match_address(dut, ma, ma_op)
+def testbench(dut):
+ # NA
+ enable = 0
+ write_enable = 0
+ address = 0
+ data = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Read Miss Multiple
+ # Note that the default starting entry data bits are all 0
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 0
+ multiple_match = 1
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_multiple_match(dut, multiple_match, 0)
+ # Read Miss
+ # Note that the default starting entry data bits are all 0
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 1
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Write Entry 0
+ enable = 1
+ write_enable = 1
+ address = 0
+ data = 4
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Read Hit Entry 0
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 4
+ multiple_match = 0
+ single_match = 1
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
+ # Search Hit
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 4
+ multiple_match = 0
+ single_match = 1
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
+ # Search Miss
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 5
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Multiple Match test
+ # Write Entry 1
+ enable = 1
+ write_enable = 1
+ address = 1
+ data = 5
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Write Entry 2
+ # Same data as Entry 1
+ enable = 1
+ write_enable = 1
+ address = 2
+ data = 5
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Read Hit Data 5
+ enable = 1
+ write_enable = 0
+ address = 1
+ data = 5
+ multiple_match = 1
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_all(dut, multiple_match, single_match, address,0,0,0)
+ # Verify read_warning is not caused
+ # Write Entry 0
+ enable = 1
+ write_enable = 1
+ address = 0
+ data = 7
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ # Note there is no yield we immediately attempt to read in the next cycle
+ # Read Hit Data 7
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 7
+ multiple_match = 0
+ single_match = 1
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ yield
+if __name__ == "__main__":
+ dut = Cam(4, 4)
+ run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_cam.vcd")
+ print("Cam Unit Test Success")
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from test_helper import assert_eq, assert_ne, assert_op
+from CamEntry import CamEntry
+# This function allows for the easy setting of values to the Cam Entry
+# Arguments:
+# dut: The CamEntry being tested
+# c (command): NA (0), Read (1), Write (2), Reserve (3)
+# d (data): The data to be set
+def set_cam_entry(dut, c, d):
+ # Write desired values
+ yield dut.command.eq(c)
+ yield dut.data_in.eq(d)
+ yield
+ # Reset all lines
+ yield dut.command.eq(0)
+ yield dut.data_in.eq(0)
+ yield
+# Checks the data state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# d (Data): The expected data
+# op (Operation): (0 => ==), (1 => !=)
+def check_data(dut, d, op):
+ out_d = yield dut.data
+ assert_op("Data", out_d, d, op)
+# Checks the match state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# m (Match): The expected match
+# op (Operation): (0 => ==), (1 => !=)
+def check_match(dut, m, op):
+ out_m = yield dut.match
+ assert_op("Match", out_m, m, op)
+# Checks the state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# d (data): The expected data
+# m (match): The expected match
+# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=)
+# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+def check_all(dut, d, m, d_op, m_op):
+ yield from check_data(dut, d, d_op)
+ yield from check_match(dut, m, m_op)
+# This testbench goes through the paces of testing the CamEntry module
+# It is done by writing and then reading various combinations of key/data pairs
+# and reading the results with varying keys to verify the resulting stored
+# data is correct.
+def testbench(dut):
+ # Check write
+ command = 2
+ data = 1
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+ # Check read miss
+ command = 1
+ data = 2
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 1, 0)
+ # Check read hit
+ command = 1
+ data = 1
+ match = 1
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+ # Check overwrite
+ command = 2
+ data = 5
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield
+ yield from check_all(dut, data, match, 0, 0)
+ # Check read hit
+ command = 1
+ data = 5
+ match = 1
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+ # Check reset
+ command = 3
+ data = 0
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+ # Extra clock cycle for waveform
+ yield
+if __name__ == "__main__":
+ dut = CamEntry(4)
+ run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_cam_entry.vcd")
+ print("CamEntry Unit Test Success")
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from LFSR import LFSR
+from test_helper import assert_eq, assert_ne, assert_op
+def testbench(dut):
+ yield dut.enable.eq(1)
+ yield dut.o.eq(9)
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+if __name__ == "__main__":
+ dut = LFSR()
+ run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_lfsr.vcd")
+ print("LFSR Unit Test Success")
\ No newline at end of file
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from PermissionValidator import PermissionValidator
+from test_helper import assert_op
+def set_validator(dut, d, xwr, sm, sa, asid):
+ yield dut.data.eq(d)
+ yield dut.xwr.eq(xwr)
+ yield dut.super_mode.eq(sm)
+ yield dut.super_access.eq(sa)
+ yield dut.asid.eq(asid)
+ yield
+def check_valid(dut, v, op):
+ out_v = yield dut.valid
+ assert_op("Valid", out_v, v, op)
+def testbench(dut):
+ # 80 bits represented. Ignore the MSB as it will be truncated
+ # ASID is bits first 4 hex values (bits 64 - 78)
+ # Test user mode entry valid
+ # Global Bit matching ASID
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000031
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test user mode entry valid
+ # Global Bit nonmatching ASID
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000031
+ # Ignore MSB it will be truncated
+ asid = 0x7FF6
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test user mode entry invalid
+ # Global Bit nonmatching ASID
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000021
+ # Ignore MSB it will be truncated
+ asid = 0x7FF6
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 0
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test user mode entry valid
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test user mode entry invalid
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FF6
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 0
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test supervisor mode entry valid
+ # The entry is NOT in user mode
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000001
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test supervisor mode entry invalid
+ # The entry is in user mode
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 0
+ xwr = 0
+ valid = 0
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test supervisor mode entry valid
+ # The entry is NOT in user mode with access
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000001
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 1
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test supervisor mode entry valid
+ # The entry is in user mode with access
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 1
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+if __name__ == "__main__":
+ dut = PermissionValidator(15, 64);
+ run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_permission_validator.vcd")
+ print("PermissionValidator Unit Test Success")
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from PteEntry import PteEntry
+from test_helper import assert_op
+def set_entry(dut, i):
+ yield dut.i.eq(i)
+ yield
+def check_dirty(dut, d, op):
+ out_d = yield dut.d
+ assert_op("Dirty", out_d, d, op)
+def check_accessed(dut, a, op):
+ out_a = yield dut.a
+ assert_op("Accessed", out_a, a, op)
+def check_global(dut, o, op):
+ out = yield dut.g
+ assert_op("Global", out, o, op)
+def check_user(dut, o, op):
+ out = yield dut.u
+ assert_op("User Mode", out, o, op)
+def check_xwr(dut, o, op):
+ out = yield dut.xwr
+ assert_op("XWR", out, o, op)
+def check_asid(dut, o, op):
+ out = yield dut.asid
+ assert_op("ASID", out, o, op)
+def check_pte(dut, o, op):
+ out = yield dut.pte
+ assert_op("ASID", out, o, op)
+def check_valid(dut, v, op):
+ out_v = yield dut.v
+ assert_op("Valid", out_v, v, op)
+def check_all(dut, d, a, g, u, xwr, v, asid, pte):
+ yield from check_dirty(dut, d, 0)
+ yield from check_accessed(dut, a, 0)
+ yield from check_global(dut, g, 0)
+ yield from check_user(dut, u, 0)
+ yield from check_xwr(dut, xwr, 0)
+ yield from check_asid(dut, asid, 0)
+ yield from check_pte(dut, pte, 0)
+ yield from check_valid(dut, v, 0)
+def testbench(dut):
+ # 80 bits represented. Ignore the MSB as it will be truncated
+ # ASID is bits first 4 hex values (bits 64 - 78)
+ i = 0x7FFF0000000000000031
+ dirty = 0
+ access = 0
+ glob = 1
+ user = 1
+ xwr = 0
+ valid = 1
+ asid = 0x7FFF
+ pte = 0x0000000000000031
+ yield from set_entry(dut, i)
+ yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
+ i = 0x0FFF00000000000000FF
+ dirty = 1
+ access = 1
+ glob = 1
+ user = 1
+ xwr = 7
+ valid = 1
+ asid = 0x0FFF
+ pte = 0x00000000000000FF
+ yield from set_entry(dut, i)
+ yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
+ i = 0x0721000000001100001F
+ dirty = 0
+ access = 0
+ glob = 0
+ user = 1
+ xwr = 7
+ valid = 1
+ asid = 0x0721
+ pte = 0x000000001100001F
+ yield from set_entry(dut, i)
+ yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
+ yield
+if __name__ == "__main__":
+ dut = PteEntry(15, 64);
+ run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_pte_entry.vcd")
+ print("PteEntry Unit Test Success")
\ No newline at end of file
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from SetAssociativeCache import SetAssociativeCache
+from test_helper import assert_eq, assert_ne, assert_op
+def set_sac(dut, e, c, s, t, d):
+ yield dut.enable.eq(e)
+ yield dut.command.eq(c)
+ yield dut.cset.eq(s)
+ yield dut.tag.eq(t)
+ yield dut.data_i.eq(d)
+ yield
+def testbench(dut):
+ enable = 1
+ command = 2
+ cset = 1
+ tag = 2
+ data = 3
+ yield from set_sac(dut, enable, command, cset, tag, data)
+ yield
+ enable = 1
+ command = 2
+ cset = 1
+ tag = 5
+ data = 8
+ yield from set_sac(dut, enable, command, cset, tag, data)
+ yield
+if __name__ == "__main__":
+ dut = SetAssociativeCache(4, 4, 4, 4)
+ run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_set_associative_cache.vcd")
+ print("Set Associative Cache Unit Test Success")
--- /dev/null
+# Verifies the given values given the particular operand
+# Arguments:
+# p (Prefix): Appended to the front of the assert statement
+# e (Expected): The expected value
+# o (Output): The output result
+# op (Operation): (0 => ==), (1 => !=)
+def assert_op(pre, o, e, op):
+ if op == 0:
+ assert_eq(pre, o, e)
+ else:
+ assert_ne(pre, o, e)
+# Verifies the given values are equal
+# Arguments:
+# p (Prefix): Appended to the front of the assert statement
+# e (Expected): The expected value
+# o (Output): The output result
+def assert_eq(p, o, e):
+ assert o == e, p + " Output " + str(o) + " Expected " + str(e)
+# Verifies the given values are not equal
+# Arguments:
+# p (Prefix): Appended to the front of the assert statement
+# e (Expected): The expected value
+# o (Output): The output result
+def assert_ne(p, o, e):
+ assert o != e, p + " Output " + str(o) + " Not Expecting " + str(e)
\ No newline at end of file
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable
+from nmutil.latch import SRLatch
+class DependenceCell(Elaboratable):
+ """ implements 11.4.7 mitch alsup dependence cell, p27
+ """
+ def __init__(self):
+ # inputs
+ self.dest_i = Signal(reset_less=True) # Dest in (top)
+ self.src1_i = Signal(reset_less=True) # oper1 in (top)
+ self.src2_i = Signal(reset_less=True) # oper2 in (top)
+ self.issue_i = Signal(reset_less=True) # Issue in (top)
+ self.go_write_i = Signal(reset_less=True) # Go Write in (left)
+ self.go_read_i = Signal(reset_less=True) # Go Read in (left)
+ # for Register File Select Lines (vertical)
+ self.dest_rsel_o = Signal(reset_less=True) # dest reg sel (bottom)
+ self.src1_rsel_o = Signal(reset_less=True) # src1 reg sel (bottom)
+ self.src2_rsel_o = Signal(reset_less=True) # src2 reg sel (bottom)
+ # for Function Unit "forward progress" (horizontal)
+ self.dest_fwd_o = Signal(reset_less=True) # dest FU fw (right)
+ self.src1_fwd_o = Signal(reset_less=True) # src1 FU fw (right)
+ self.src2_fwd_o = Signal(reset_less=True) # src2 FU fw (right)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.dest_l = dest_l = SRLatch()
+ m.submodules.src1_l = src1_l = SRLatch()
+ m.submodules.src2_l = src2_l = SRLatch()
+ # destination latch: reset on go_write HI, set on dest and issue
+ m.d.comb += dest_l.s.eq(self.issue_i & self.dest_i)
+ m.d.comb += dest_l.r.eq(self.go_write_i)
+ # src1 latch: reset on go_read HI, set on src1_i and issue
+ m.d.comb += src1_l.s.eq(self.issue_i & self.src1_i)
+ m.d.comb += src1_l.r.eq(self.go_read_i)
+ # src2 latch: reset on go_read HI, set on op2_i and issue
+ m.d.comb += src2_l.s.eq(self.issue_i & self.src2_i)
+ m.d.comb += src2_l.r.eq(self.go_read_i)
+ # FU "Forward Progress" (read out horizontally)
+ m.d.comb += self.dest_fwd_o.eq(dest_l.qn & self.dest_i)
+ m.d.comb += self.src1_fwd_o.eq(src1_l.qn & self.src1_i)
+ m.d.comb += self.src2_fwd_o.eq(src2_l.qn & self.src2_i)
+ # Register File Select (read out vertically)
+ m.d.comb += self.dest_rsel_o.eq(dest_l.qn & self.go_write_i)
+ m.d.comb += self.src1_rsel_o.eq(src1_l.qn & self.go_read_i)
+ m.d.comb += self.src2_rsel_o.eq(src2_l.qn & self.go_read_i)
+ return m
+ def __iter__(self):
+ yield self.dest_i
+ yield self.src1_i
+ yield self.src2_i
+ yield self.issue_i
+ yield self.go_write_i
+ yield self.go_read_i
+ yield self.dest_rsel_o
+ yield self.src1_rsel_o
+ yield self.src2_rsel_o
+ yield self.dest_fwd_o
+ yield self.src1_fwd_o
+ yield self.src2_fwd_o
+ def ports(self):
+ return list(self)
+def dcell_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_dcell():
+ dut = DependenceCell()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_dcell.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, dcell_sim(dut), vcd_name='test_dcell.vcd')
+if __name__ == '__main__':
+ test_dcell()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Array, Const, Elaboratable
+from nmutil.latch import SRLatch
+from nmigen.lib.coding import Decoder
+from shadow_fn import ShadowFn
+class FnUnit(Elaboratable):
+ """ implements 11.4.8 function unit, p31
+ also implements optional shadowing 11.5.1, p55
+ shadowing can be used for branches as well as exceptions (interrupts),
+ load/store hold (exceptions again), and vector-element predication
+ (once the predicate is known, which it may not be at instruction issue)
+ Inputs
+ * :wid: register file width
+ * :shadow_wid: number of shadow/fail/good/go_die sets
+ * :n_dests: number of destination regfile(s) (index: rfile_sel_i)
+ * :wr_pend: if true, writable observes the g_wr_pend_i vector
+ otherwise observes g_rd_pend_i
+ notes:
+ * dest_i / src1_i / src2_i are in *binary*, whereas...
+ * ...g_rd_pend_i / g_wr_pend_i and rd_pend_o / wr_pend_o are UNARY
+ * req_rel_i (request release) is the direct equivalent of pipeline
+ "output valid" (valid_o)
+ * recover is a local python variable (actually go_die_o)
+ * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing)
+ * wr_pend is set False for the majority of uses: however for
+ use in a STORE Function Unit it is set to True
+ """
+ def __init__(self, wid, shadow_wid=0, n_dests=1, wr_pend=False):
+ self.reg_width = wid
+ self.n_dests = n_dests
+ self.shadow_wid = shadow_wid
+ self.wr_pend = wr_pend
+ # inputs
+ if n_dests > 1:
+ self.rfile_sel_i = Signal(max=n_dests, reset_less=True)
+ else:
+ self.rfile_sel_i = Const(0) # no selection. gets Array[0]
+ self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in (top)
+ self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in (top)
+ self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in (top)
+ self.issue_i = Signal(reset_less=True) # Issue in (top)
+ self.go_write_i = Signal(reset_less=True) # Go Write in (left)
+ self.go_read_i = Signal(reset_less=True) # Go Read in (left)
+ self.req_rel_i = Signal(reset_less=True) # request release (left)
+ self.g_xx_pend_i = Array(Signal(wid, reset_less=True, name="g_pend_i") \
+ for i in range(n_dests)) # global rd (right)
+ self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right)
+ if shadow_wid:
+ self.shadow_i = Signal(shadow_wid, reset_less=True)
+ self.s_fail_i = Signal(shadow_wid, reset_less=True)
+ self.s_good_i = Signal(shadow_wid, reset_less=True)
+ self.go_die_o = Signal(reset_less=True)
+ # outputs
+ self.readable_o = Signal(reset_less=True) # Readable out (right)
+ self.writable_o = Array(Signal(reset_less=True, name="writable_o") \
+ for i in range(n_dests)) # writable out (right)
+ self.busy_o = Signal(reset_less=True) # busy out (left)
+ self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right)
+ self.xx_pend_o = Array(Signal(wid, reset_less=True, name="pend_o") \
+ for i in range(n_dests))# wr pending (right)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rd_l = rd_l = SRLatch(sync=False)
+ m.submodules.wr_l = wr_l = SRLatch(sync=False)
+ m.submodules.dest_d = dest_d = Decoder(self.reg_width)
+ m.submodules.src1_d = src1_d = Decoder(self.reg_width)
+ m.submodules.src2_d = src2_d = Decoder(self.reg_width)
+ s_latches = []
+ for i in range(self.shadow_wid):
+ sh = ShadowFn()
+ setattr(m.submodules, "shadow%d" % i, sh)
+ s_latches.append(sh)
+ # shadow / recover (optional: shadow_wid > 0)
+ if self.shadow_wid:
+ recover = self.go_die_o
+ shadown = Signal(reset_less=True)
+ i_l = []
+ fail_l = []
+ good_l = []
+ shi_l = []
+ sho_l = []
+ rec_l = []
+ # get list of latch signals. really must be a better way to do this
+ for l in s_latches:
+ i_l.append(l.issue_i)
+ shi_l.append(l.shadow_i)
+ fail_l.append(l.s_fail_i)
+ good_l.append(l.s_good_i)
+ sho_l.append(l.shadow_o)
+ rec_l.append(l.recover_o)
+ m.d.comb += Cat(*i_l).eq(self.issue_i)
+ m.d.comb += Cat(*fail_l).eq(self.s_fail_i)
+ m.d.comb += Cat(*good_l).eq(self.s_good_i)
+ m.d.comb += Cat(*shi_l).eq(self.shadow_i)
+ m.d.comb += shadown.eq(~(Cat(*sho_l).bool()))
+ m.d.comb += recover.eq(Cat(*rec_l).bool())
+ else:
+ shadown = Const(1)
+ recover = Const(0)
+ # selector
+ xx_pend_o = self.xx_pend_o[self.rfile_sel_i]
+ writable_o = self.writable_o[self.rfile_sel_i]
+ g_pend_i = self.g_xx_pend_i[self.rfile_sel_i]
+ for i in range(self.n_dests):
+ m.d.comb += self.xx_pend_o[i].eq(0) # initialise all array
+ m.d.comb += self.writable_o[i].eq(0) # to zero
+ # go_write latch: reset on go_write HI, set on issue
+ m.d.comb += wr_l.s.eq(self.issue_i)
+ m.d.comb += wr_l.r.eq(self.go_write_i | recover)
+ # src1 latch: reset on go_read HI, set on issue
+ m.d.comb += rd_l.s.eq(self.issue_i)
+ m.d.comb += rd_l.r.eq(self.go_read_i | recover)
+ # dest decoder: write-pending out
+ m.d.comb += dest_d.i.eq(self.dest_i)
+ m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted
+ m.d.comb += self.busy_o.eq(wr_l.q) # busy if set
+ m.d.comb += xx_pend_o.eq(dest_d.o)
+ # src1/src2 decoder: read-pending out
+ m.d.comb += src1_d.i.eq(self.src1_i)
+ m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted
+ m.d.comb += src2_d.i.eq(self.src2_i)
+ m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted
+ m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o)
+ # readable output signal
+ g_rd = Signal(self.reg_width, reset_less=True)
+ m.d.comb += g_rd.eq(self.g_wr_pend_i & self.rd_pend_o)
+ m.d.comb += self.readable_o.eq(g_rd.bool())
+ # writable output signal
+ g_wr_v = Signal(self.reg_width, reset_less=True)
+ g_wr = Signal(reset_less=True)
+ wo = Signal(reset_less=True)
+ m.d.comb += g_wr_v.eq(g_pend_i & xx_pend_o)
+ m.d.comb += g_wr.eq(~g_wr_v.bool())
+ m.d.comb += wo.eq(g_wr & rd_l.q & self.req_rel_i & shadown)
+ m.d.comb += writable_o.eq(wo)
+ return m
+ def __iter__(self):
+ yield self.dest_i
+ yield self.src1_i
+ yield self.src2_i
+ yield self.issue_i
+ yield self.go_write_i
+ yield self.go_read_i
+ yield self.req_rel_i
+ yield from self.g_xx_pend_i
+ yield self.g_wr_pend_i
+ yield self.readable_o
+ yield from self.writable_o
+ yield self.rd_pend_o
+ yield from self.xx_pend_o
+ def ports(self):
+ return list(self)
+############# ###############
+# --- --- #
+# --- renamed / redirected from base class --- #
+# --- --- #
+# --- below are convenience classes which match the names --- #
+# --- of the various mitch alsup book chapter gate diagrams --- #
+# --- --- #
+############# ###############
+class IntFnUnit(FnUnit):
+ def __init__(self, wid, shadow_wid=0):
+ FnUnit.__init__(self, wid, shadow_wid)
+ self.int_rd_pend_o = self.rd_pend_o
+ self.int_wr_pend_o = self.xx_pend_o[0]
+ self.g_int_wr_pend_i = self.g_wr_pend_i
+ self.g_int_rd_pend_i = self.g_xx_pend_i[0]
+ self.int_readable_o = self.readable_o
+ self.int_writable_o = self.writable_o[0]
+ self.int_rd_pend_o.name = "int_rd_pend_o"
+ self.int_wr_pend_o.name = "int_wr_pend_o"
+ self.g_int_rd_pend_i.name = "g_int_rd_pend_i"
+ self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
+ self.int_readable_o.name = "int_readable_o"
+ self.int_writable_o.name = "int_writable_o"
+class FPFnUnit(FnUnit):
+ def __init__(self, wid, shadow_wid=0):
+ FnUnit.__init__(self, wid, shadow_wid)
+ self.fp_rd_pend_o = self.rd_pend_o
+ self.fp_wr_pend_o = self.xx_pend_o[0]
+ self.g_fp_wr_pend_i = self.g_wr_pend_i
+ self.g_fp_rd_pend_i = self.g_xx_pend_i[0]
+ self.fp_writable_o = self.writable_o[0]
+ self.fp_readable_o = self.readable_o
+ self.fp_rd_pend_o.name = "fp_rd_pend_o"
+ self.fp_wr_pend_o.name = "fp_wr_pend_o"
+ self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i"
+ self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i"
+ self.fp_writable_o.name = "fp_writable_o"
+ self.fp_readable_o.name = "fp_readable_o"
+class LDFnUnit(FnUnit):
+ """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile)
+ * when rfile_sel_i == 0, int_wr_pend_o is set
+ * when rfile_sel_i == 1, fp_wr_pend_o is set
+ """
+ def __init__(self, wid, shadow_wid=0):
+ FnUnit.__init__(self, wid, shadow_wid, n_dests=2)
+ self.int_rd_pend_o = self.rd_pend_o
+ self.int_wr_pend_o = self.xx_pend_o[0]
+ self.fp_wr_pend_o = self.xx_pend_o[1]
+ self.g_int_wr_pend_i = self.g_wr_pend_i
+ self.g_int_rd_pend_i = self.g_xx_pend_i[0]
+ self.g_fp_rd_pend_i = self.g_xx_pend_i[1]
+ self.int_readable_o = self.readable_o
+ self.int_writable_o = self.writable_o[0]
+ self.fp_writable_o = self.writable_o[1]
+ self.int_rd_pend_o.name = "int_rd_pend_o"
+ self.int_wr_pend_o.name = "int_wr_pend_o"
+ self.fp_wr_pend_o.name = "fp_wr_pend_o"
+ self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
+ self.g_int_rd_pend_i.name = "g_int_rd_pend_i"
+ self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i"
+ self.int_readable_o.name = "int_readable_o"
+ self.int_writable_o.name = "int_writable_o"
+ self.fp_writable_o.name = "fp_writable_o"
+class STFnUnit(FnUnit):
+ """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile)
+ * wr_pend=False indicates to observe global fp write pending
+ * when rfile_sel_i == 0, int_wr_pend_o is set
+ * when rfile_sel_i == 1, fp_wr_pend_o is set
+ *
+ """
+ def __init__(self, wid, shadow_wid=0):
+ FnUnit.__init__(self, wid, shadow_wid, n_dests=2, wr_pend=True)
+ self.int_rd_pend_o = self.rd_pend_o # 1st int read-pending vector
+ self.int2_rd_pend_o = self.xx_pend_o[0] # 2nd int read-pending vector
+ self.fp_rd_pend_o = self.xx_pend_o[1] # 1x FP read-pending vector
+ # yes overwrite FnUnit base class g_wr_pend_i vector
+ self.g_int_wr_pend_i = self.g_wr_pend_i = self.g_xx_pend_i[0]
+ self.g_fp_wr_pend_i = self.g_xx_pend_i[1]
+ self.int_readable_o = self.readable_o
+ self.int_writable_o = self.writable_o[0]
+ self.fp_writable_o = self.writable_o[1]
+ self.int_rd_pend_o.name = "int_rd_pend_o"
+ self.int2_rd_pend_o.name = "int2_rd_pend_o"
+ self.fp_rd_pend_o.name = "fp_rd_pend_o"
+ self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
+ self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i"
+ self.int_readable_o.name = "int_readable_o"
+ self.int_writable_o.name = "int_writable_o"
+ self.fp_writable_o.name = "fp_writable_o"
+def int_fn_unit_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_int_fn_unit():
+ dut = FnUnit(32, 2, 2)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fn_unit.il", "w") as f:
+ f.write(vl)
+ dut = LDFnUnit(32, 2)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_ld_fn_unit.il", "w") as f:
+ f.write(vl)
+ dut = STFnUnit(32, 0)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_st_fn_unit.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_fn_unit.vcd')
+if __name__ == '__main__':
+ test_int_fn_unit()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable
+from nmutil.latch import SRLatch
+class FUDependenceCell(Elaboratable):
+ """ implements 11.4.7 mitch alsup dependence cell, p27
+ """
+ def __init__(self):
+ # inputs
+ self.rd_pend_i = Signal(reset_less=True) # read pending in (left)
+ self.wr_pend_i = Signal(reset_less=True) # write pending in (left)
+ self.issue_i = Signal(reset_less=True) # Issue in (top)
+ self.go_write_i = Signal(reset_less=True) # Go Write in (left)
+ self.go_read_i = Signal(reset_less=True) # Go Read in (left)
+ # outputs (latched rd/wr pend)
+ self.rd_pend_o = Signal(reset_less=True) # read pending out (right)
+ self.wr_pend_o = Signal(reset_less=True) # write pending out (right)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rd_l = rd_l = SRLatch()
+ m.submodules.wr_l = wr_l = SRLatch()
+ # write latch: reset on go_write HI, set on write pending and issue
+ m.d.comb += wr_l.s.eq(self.issue_i & self.wr_pend_i)
+ m.d.comb += wr_l.r.eq(self.go_write_i)
+ # read latch: reset on go_read HI, set on read pending and issue
+ m.d.comb += rd_l.s.eq(self.issue_i & self.rd_pend_i)
+ m.d.comb += rd_l.r.eq(self.go_read_i)
+ # Read/Write Pending Latches (read out horizontally)
+ m.d.comb += self.wr_pend_o.eq(wr_l.qn)
+ m.d.comb += self.rd_pend_o.eq(rd_l.qn)
+ return m
+ def __iter__(self):
+ yield self.rd_pend_i
+ yield self.wr_pend_i
+ yield self.issue_i
+ yield self.go_write_i
+ yield self.go_read_i
+ yield self.rd_pend_o
+ yield self.wr_pend_o
+ def ports(self):
+ return list(self)
+def dcell_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_dcell():
+ dut = FUDependenceCell()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fu_dcell.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, dcell_sim(dut), vcd_name='test_fu_dcell.vcd')
+if __name__ == '__main__':
+ test_dcell()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat
+#from nmutil.latch import SRLatch
+from fu_dep_cell import FUDependenceCell
+from fu_picker_vec import FU_Pick_Vec
+ 6600 Function Unit Dependency Table Matrix inputs / outputs
+ -----------------------------------------------------------
+class FUFUDepMatrix(Elaboratable):
+ """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26
+ """
+ def __init__(self, n_fu_row, n_fu_col):
+ self.n_fu_row = n_fu_row # Y (FU row#) ^v
+ self.n_fu_col = n_fu_col # X (FU col #) <>
+ self.rd_pend_i = Signal(n_fu_row, reset_less=True) # Rd pending (left)
+ self.wr_pend_i = Signal(n_fu_row, reset_less=True) # Wr pending (left)
+ self.issue_i = Signal(n_fu_col, reset_less=True) # Issue in (top)
+ self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
+ self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
+ # for Function Unit Readable/Writable (horizontal)
+ self.readable_o = Signal(n_fu_col, reset_less=True) # readable (bot)
+ self.writable_o = Signal(n_fu_col, reset_less=True) # writable (bot)
+ def elaborate(self, platform):
+ m = Module()
+ # ---
+ # matrix of dependency cells
+ # ---
+ dm = Array(Array(FUDependenceCell() for r in range(self.n_fu_row)) \
+ for f in range(self.n_fu_col))
+ for x in range(self.n_fu_col):
+ for y in range(self.n_fu_row):
+ setattr(m.submodules, "dm_fx%d_fy%d" % (x, y), dm[x][y])
+ # ---
+ # array of Function Unit Readable/Writable: row-length, horizontal
+ # ---
+ fur = Array(FU_Pick_Vec(self.n_fu_row) for r in range(self.n_fu_col))
+ for x in range(self.n_fu_col):
+ setattr(m.submodules, "fur_x%d" % (x), fur[x])
+ # ---
+ # connect FU Readable/Writable vector
+ # ---
+ readable = []
+ writable = []
+ for x in range(self.n_fu_col):
+ fu = fur[x]
+ rd_pend_o = []
+ wr_pend_o = []
+ for y in range(self.n_fu_row):
+ dc = dm[x][y]
+ # accumulate cell outputs rd/wr-pending
+ rd_pend_o.append(dc.rd_pend_o)
+ wr_pend_o.append(dc.wr_pend_o)
+ # connect cell reg-select outputs to Reg Vector In
+ m.d.comb += [fu.rd_pend_i.eq(Cat(*rd_pend_o)),
+ fu.wr_pend_i.eq(Cat(*wr_pend_o)),
+ ]
+ # accumulate Readable/Writable Vector outputs
+ readable.append(fu.readable_o)
+ writable.append(fu.writable_o)
+ # ... and output them from this module (horizontal, width=REGs)
+ m.d.comb += self.readable_o.eq(Cat(*readable))
+ m.d.comb += self.writable_o.eq(Cat(*writable))
+ # ---
+ # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
+ # ---
+ for y in range(self.n_fu_row):
+ issue_i = []
+ for x in range(self.n_fu_col):
+ dc = dm[x][y]
+ # accumulate cell inputs issue
+ issue_i.append(dc.issue_i)
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += Cat(*issue_i).eq(self.issue_i)
+ # ---
+ # connect Matrix go_read_i/go_write_i to module readable/writable
+ # ---
+ for x in range(self.n_fu_col):
+ go_read_i = []
+ go_write_i = []
+ rd_pend_i = []
+ wr_pend_i = []
+ for y in range(self.n_fu_row):
+ dc = dm[x][y]
+ # accumulate cell rd_pend/wr_pend/go_read/go_write
+ rd_pend_i.append(dc.rd_pend_i)
+ wr_pend_i.append(dc.wr_pend_i)
+ go_read_i.append(dc.go_read_i)
+ go_write_i.append(dc.go_write_i)
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += [Cat(*go_read_i).eq(self.go_read_i),
+ Cat(*go_write_i).eq(self.go_write_i),
+ Cat(*rd_pend_i).eq(self.rd_pend_i),
+ Cat(*wr_pend_i).eq(self.wr_pend_i),
+ ]
+ return m
+ def __iter__(self):
+ yield self.rd_pend_i
+ yield self.wr_pend_i
+ yield self.issue_i
+ yield self.go_write_i
+ yield self.go_read_i
+ yield self.readable_o
+ yield self.writable_o
+ def ports(self):
+ return list(self)
+def d_matrix_sim(dut):
+ """ XXX TODO
+ """
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_fu_fu_matrix():
+ dut = FUFUDepMatrix(n_fu_row=3, n_fu_col=4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fu_fu_matrix.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_fu_matrix.vcd')
+if __name__ == '__main__':
+ test_fu_fu_matrix()
--- /dev/null
+from nmigen import Elaboratable, Module, Signal, Cat
+class FU_Pick_Vec(Elaboratable):
+ """ these are allocated per-FU (horizontally),
+ and are of length fu_row_n
+ """
+ def __init__(self, fu_row_n):
+ self.fu_row_n = fu_row_n
+ self.rd_pend_i = Signal(fu_row_n, reset_less=True)
+ self.wr_pend_i = Signal(fu_row_n, reset_less=True)
+ self.readable_o = Signal(reset_less=True)
+ self.writable_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.readable_o.eq(self.rd_pend_i.bool())
+ m.d.comb += self.writable_o.eq(self.wr_pend_i.bool())
+ return m
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat
+#from nmutil.latch import SRLatch
+from dependence_cell import DependenceCell
+from fu_wr_pending import FU_RW_Pend
+from reg_select import Reg_Rsv
+ 6600 Dependency Table Matrix inputs / outputs
+ ---------------------------------------------
+ d s1 s2 i d s1 s2 i d s1 s2 i d s1 s2 i
+ | | | | | | | | | | | | | | | |
+ v v v v v v v v v v v v v v v v
+ go_rd/go_wr -> dm-r0-fu0 dm-r1-fu0 dm-r2-fu0 dm-r3-fu0 -> wr/rd-pend
+ go_rd/go_wr -> dm-r0-fu1 dm-r1-fu1 dm-r2-fu1 dm-r3-fu1 -> wr/rd-pend
+ go_rd/go_wr -> dm-r0-fu2 dm-r1-fu2 dm-r2-fu2 dm-r3-fu2 -> wr/rd-pend
+ | | | | | | | | | | | |
+ v v v v v v v v v v v v
+ d s1 s2 d s1 s2 d s1 s2 d s1 s2
+ reg sel reg sel reg sel reg sel
+class FURegDepMatrix(Elaboratable):
+ """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26
+ """
+ def __init__(self, n_fu_row, n_reg_col):
+ self.n_fu_row = n_fu_row # Y (FUs) ^v
+ self.n_reg_col = n_reg_col # X (Regs) <>
+ self.dest_i = Signal(n_reg_col, reset_less=True) # Dest in (top)
+ self.src1_i = Signal(n_reg_col, reset_less=True) # oper1 in (top)
+ self.src2_i = Signal(n_reg_col, reset_less=True) # oper2 in (top)
+ self.issue_i = Signal(n_reg_col, reset_less=True) # Issue in (top)
+ self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
+ self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
+ # for Register File Select Lines (horizontal), per-reg
+ self.dest_rsel_o = Signal(n_reg_col, reset_less=True) # dest reg (bot)
+ self.src1_rsel_o = Signal(n_reg_col, reset_less=True) # src1 reg (bot)
+ self.src2_rsel_o = Signal(n_reg_col, reset_less=True) # src2 reg (bot)
+ # for Function Unit "forward progress" (vertical), per-FU
+ self.wr_pend_o = Signal(n_fu_row, reset_less=True) # wr pending (right)
+ self.rd_pend_o = Signal(n_fu_row, reset_less=True) # rd pending (right)
+ def elaborate(self, platform):
+ m = Module()
+ # ---
+ # matrix of dependency cells
+ # ---
+ dm = Array(Array(DependenceCell() for r in range(self.n_fu_row)) \
+ for f in range(self.n_reg_col))
+ for rn in range(self.n_reg_col):
+ for fu in range(self.n_fu_row):
+ setattr(m.submodules, "dm_r%d_fu%d" % (rn, fu), dm[rn][fu])
+ # ---
+ # array of Function Unit Pending vectors
+ # ---
+ fupend = Array(FU_RW_Pend(self.n_reg_col) for f in range(self.n_fu_row))
+ for fu in range(self.n_fu_row):
+ setattr(m.submodules, "fu_fu%d" % (fu), fupend[fu])
+ # ---
+ # array of Register Reservation vectors
+ # ---
+ regrsv = Array(Reg_Rsv(self.n_fu_row) for r in range(self.n_reg_col))
+ for rn in range(self.n_reg_col):
+ setattr(m.submodules, "rr_r%d" % (rn), regrsv[rn])
+ # ---
+ # connect Function Unit vector
+ # ---
+ wr_pend = []
+ rd_pend = []
+ for fu in range(self.n_fu_row):
+ fup = fupend[fu]
+ dest_fwd_o = []
+ src1_fwd_o = []
+ src2_fwd_o = []
+ for rn in range(self.n_reg_col):
+ dc = dm[rn][fu]
+ # accumulate cell fwd outputs for dest/src1/src2
+ dest_fwd_o.append(dc.dest_fwd_o)
+ src1_fwd_o.append(dc.src1_fwd_o)
+ src2_fwd_o.append(dc.src2_fwd_o)
+ # connect cell fwd outputs to FU Vector in [Cat is gooood]
+ m.d.comb += [fup.dest_fwd_i.eq(Cat(*dest_fwd_o)),
+ fup.src1_fwd_i.eq(Cat(*src1_fwd_o)),
+ fup.src2_fwd_i.eq(Cat(*src2_fwd_o))
+ ]
+ # accumulate FU Vector outputs
+ wr_pend.append(fup.reg_wr_pend_o)
+ rd_pend.append(fup.reg_rd_pend_o)
+ # ... and output them from this module (vertical, width=FUs)
+ m.d.comb += self.wr_pend_o.eq(Cat(*wr_pend))
+ m.d.comb += self.rd_pend_o.eq(Cat(*rd_pend))
+ # ---
+ # connect Reg Selection vector
+ # ---
+ dest_rsel = []
+ src1_rsel = []
+ src2_rsel = []
+ for rn in range(self.n_reg_col):
+ rsv = regrsv[rn]
+ dest_rsel_o = []
+ src1_rsel_o = []
+ src2_rsel_o = []
+ for fu in range(self.n_fu_row):
+ dc = dm[rn][fu]
+ # accumulate cell reg-select outputs dest/src1/src2
+ dest_rsel_o.append(dc.dest_rsel_o)
+ src1_rsel_o.append(dc.src1_rsel_o)
+ src2_rsel_o.append(dc.src2_rsel_o)
+ # connect cell reg-select outputs to Reg Vector In
+ m.d.comb += [rsv.dest_rsel_i.eq(Cat(*dest_rsel_o)),
+ rsv.src1_rsel_i.eq(Cat(*src1_rsel_o)),
+ rsv.src2_rsel_i.eq(Cat(*src2_rsel_o)),
+ ]
+ # accumulate Reg-Sel Vector outputs
+ dest_rsel.append(rsv.dest_rsel_o)
+ src1_rsel.append(rsv.src1_rsel_o)
+ src2_rsel.append(rsv.src2_rsel_o)
+ # ... and output them from this module (horizontal, width=REGs)
+ m.d.comb += self.dest_rsel_o.eq(Cat(*dest_rsel))
+ m.d.comb += self.src1_rsel_o.eq(Cat(*src1_rsel))
+ m.d.comb += self.src2_rsel_o.eq(Cat(*src2_rsel))
+ # ---
+ # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
+ # ---
+ for rn in range(self.n_reg_col):
+ dest_i = []
+ src1_i = []
+ src2_i = []
+ issue_i = []
+ for fu in range(self.n_fu_row):
+ dc = dm[rn][fu]
+ # accumulate cell inputs dest/src1/src2
+ dest_i.append(dc.dest_i)
+ src1_i.append(dc.src1_i)
+ src2_i.append(dc.src2_i)
+ issue_i.append(dc.issue_i)
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += [Cat(*dest_i).eq(self.dest_i),
+ Cat(*src1_i).eq(self.src1_i),
+ Cat(*src2_i).eq(self.src2_i),
+ Cat(*issue_i).eq(self.issue_i),
+ ]
+ # ---
+ # connect Dependency Matrix go_read_i/go_write_i to module go_rd/go_wr
+ # ---
+ for fu in range(self.n_fu_row):
+ go_read_i = []
+ go_write_i = []
+ for rn in range(self.n_reg_col):
+ dc = dm[rn][fu]
+ # accumulate cell fwd outputs for dest/src1/src2
+ go_read_i.append(dc.go_read_i)
+ go_write_i.append(dc.go_write_i)
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += [Cat(*go_read_i).eq(self.go_read_i),
+ Cat(*go_write_i).eq(self.go_write_i),
+ ]
+ return m
+ def __iter__(self):
+ yield self.dest_i
+ yield self.src1_i
+ yield self.src2_i
+ yield self.issue_i
+ yield self.go_write_i
+ yield self.go_read_i
+ yield self.dest_rsel_o
+ yield self.src1_rsel_o
+ yield self.src2_rsel_o
+ yield self.wr_pend_o
+ yield self.rd_pend_o
+ def ports(self):
+ return list(self)
+def d_matrix_sim(dut):
+ """ XXX TODO
+ """
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_d_matrix():
+ dut = FURegDepMatrix(n_fu_row=3, n_reg_col=4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fu_reg_matrix.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_reg_matrix.vcd')
+if __name__ == '__main__':
+ test_d_matrix()
--- /dev/null
+from nmigen import Elaboratable, Module, Signal, Cat
+class FU_RW_Pend(Elaboratable):
+ """ these are allocated per-FU (horizontally),
+ and are of length reg_count
+ """
+ def __init__(self, reg_count):
+ self.reg_count = reg_count
+ self.dest_fwd_i = Signal(reg_count, reset_less=True)
+ self.src1_fwd_i = Signal(reg_count, reset_less=True)
+ self.src2_fwd_i = Signal(reg_count, reset_less=True)
+ self.reg_wr_pend_o = Signal(reset_less=True)
+ self.reg_rd_pend_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ srces = Cat(self.src1_fwd_i, self.src2_fwd_i)
+ m.d.comb += self.reg_wr_pend_o.eq(self.dest_fwd_i.bool())
+ m.d.comb += self.reg_rd_pend_o.eq(srces.bool())
+ return m
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Elaboratable
+from nmutil.latch import SRLatch
+from nmigen.lib.coding import Decoder
+class GlobalPending(Elaboratable):
+ """ implements Global Pending Vector, basically ORs all incoming Function
+ Unit vectors together. Can be used for creating Read or Write Global
+ Pending. Can be used for INT or FP Global Pending.
+ Inputs:
+ * :wid: register file width
+ * :fu_vecs: a python list of function unit "pending" vectors, each
+ vector being a Signal of width equal to the reg file.
+ Notes:
+ * the regfile may be Int or FP, this code doesn't care which.
+ obviously do not try to put in a mixture of regfiles into fu_vecs.
+ * this code also doesn't care if it's used for Read Pending or Write
+ pending, it can be used for both: again, obviously, do not try to
+ put in a mixture of read *and* write pending vectors in.
+ * if some Function Units happen not to be uniform (don't operate
+ on a particular register (extremely unusual), they must set a Const
+ zero bit in the vector.
+ """
+ def __init__(self, wid, fu_vecs):
+ self.reg_width = wid
+ # inputs
+ self.fu_vecs = fu_vecs
+ for v in fu_vecs:
+ assert len(v) == wid, "FU Vector must be same width as regfile"
+ self.g_pend_o = Signal(wid, reset_less=True) # global pending vector
+ def elaborate(self, platform):
+ m = Module()
+ pend_l = []
+ for i in range(self.reg_width): # per-register
+ vec_bit_l = []
+ for v in self.fu_vecs:
+ vec_bit_l.append(v[i]) # fu bit for same register
+ pend_l.append(Cat(*vec_bit_l).bool()) # OR all bits for same reg
+ m.d.comb += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits
+ return m
+ def __iter__(self):
+ yield from self.fu_vecs
+ yield self.g_pend_o
+ def ports(self):
+ return list(self)
+def g_vec_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_g_vec():
+ vecs = []
+ for i in range(3):
+ vecs.append(Signal(32, name="fu%d" % i))
+ dut = GlobalPending(32, vecs)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_global_pending.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, g_vec_sim(dut), vcd_name='test_global_pending.vcd')
+if __name__ == '__main__':
+ test_g_vec()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Elaboratable
+class PriorityPicker(Elaboratable):
+ """ implements a priority-picker. input: N bits, output: N bits
+ """
+ def __init__(self, wid):
+ self.wid = wid
+ # inputs
+ self.i = Signal(wid, reset_less=True)
+ self.o = Signal(wid, reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ res = []
+ for i in range(0, self.wid):
+ tmp = Signal(reset_less = True)
+ if i == 0:
+ m.d.comb += tmp.eq(self.i[0])
+ else:
+ m.d.comb += tmp.eq((~tmp) & self.i[i])
+ res.append(tmp)
+ # we like Cat(*xxx). turn lists into concatenated bits
+ m.d.comb += self.o.eq(Cat(*res))
+ return m
+ def __iter__(self):
+ yield self.i
+ yield self.o
+ def ports(self):
+ return list(self)
+class GroupPicker(Elaboratable):
+ """ implements 10.5 mitch alsup group picker, p27
+ """
+ def __init__(self, wid):
+ self.gp_wid = wid
+ # inputs
+ self.readable_i = Signal(wid, reset_less=True) # readable in (top)
+ self.writable_i = Signal(wid, reset_less=True) # writable in (top)
+ self.rel_req_i = Signal(wid, reset_less=True) # release request in (top)
+ # outputs
+ self.go_rd_o = Signal(wid, reset_less=True) # go read (bottom)
+ self.go_wr_o = Signal(wid, reset_less=True) # go write (bottom)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rpick = rpick = PriorityPicker(self.gp_wid)
+ m.submodules.wpick = wpick = PriorityPicker(self.gp_wid)
+ # combine release (output ready signal) with writeable
+ m.d.comb += wpick.i.eq(self.writable_i & self.rel_req_i)
+ m.d.comb += self.go_wr_o.eq(wpick.o)
+ m.d.comb += rpick.i.eq(self.readable_i)
+ m.d.comb += self.go_rd_o.eq(rpick.o)
+ return m
+ def __iter__(self):
+ yield self.readable_i
+ yield self.writable_i
+ yield self.rel_req_i
+ yield self.go_rd_o
+ yield self.go_wr_o
+ def ports(self):
+ return list(self)
+def grp_pick_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_grp_pick():
+ dut = GroupPicker(4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_grp_pick.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, grp_pick_sim(dut), vcd_name='test_grp_pick.vcd')
+if __name__ == '__main__':
+ test_grp_pick()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Array, Const, Record, Elaboratable
+from nmutil.latch import SRLatch
+from nmigen.lib.coding import Decoder
+from shadow_fn import ShadowFn
+class IssueUnit(Elaboratable):
+ """ implements 11.4.14 issue unit, p50
+ Inputs
+ * :wid: register file width
+ * :n_insns: number of instructions in this issue unit.
+ """
+ def __init__(self, wid, n_insns):
+ self.reg_width = wid
+ self.n_insns = n_insns
+ # inputs
+ self.store_i = Signal(reset_less=True) # instruction is a store
+ self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in
+ self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in
+ self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in
+ self.g_wr_pend_i = Signal(wid, reset_less=True) # write pending vector
+ self.insn_i = Array(Signal(reset_less=True, name="insn_i") \
+ for i in range(n_insns))
+ self.busy_i = Array(Signal(reset_less=True, name="busy_i") \
+ for i in range(n_insns))
+ # outputs
+ self.fn_issue_o = Array(Signal(reset_less=True, name="fn_issue_o") \
+ for i in range(n_insns))
+ self.g_issue_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.dest_d = dest_d = Decoder(self.reg_width)
+ # temporaries
+ waw_stall = Signal(reset_less=True)
+ fu_stall = Signal(reset_less=True)
+ pend = Signal(self.reg_width, reset_less=True)
+ # dest decoder: write-pending
+ m.d.comb += dest_d.i.eq(self.dest_i)
+ m.d.comb += dest_d.n.eq(~self.store_i) # decode is inverted
+ m.d.comb += pend.eq(dest_d.o & self.g_wr_pend_i)
+ m.d.comb += waw_stall.eq(pend.bool())
+ ib_l = []
+ for i in range(self.n_insns):
+ ib_l.append(self.insn_i[i] & self.busy_i[i])
+ m.d.comb += fu_stall.eq(Cat(*ib_l).bool())
+ m.d.comb += self.g_issue_o.eq(~(waw_stall | fu_stall))
+ for i in range(self.n_insns):
+ m.d.comb += self.fn_issue_o[i].eq(self.g_issue_o & self.insn_i[i])
+ return m
+ def __iter__(self):
+ yield self.store_i
+ yield self.dest_i
+ yield self.src1_i
+ yield self.src2_i
+ yield self.g_wr_pend_i
+ yield from self.insn_i
+ yield from self.busy_i
+ yield from self.fn_issue_o
+ yield self.g_issue_o
+ def ports(self):
+ return list(self)
+class IntFPIssueUnit(Elaboratable):
+ def __init__(self, wid, n_int_insns, n_fp_insns):
+ self.i = IssueUnit(wid, n_int_insns)
+ self.f = IssueUnit(wid, n_fp_insns)
+ self.issue_o = Signal(reset_less=True)
+ # some renames
+ self.int_write_pending_i = self.i.g_wr_pend_i
+ self.fp_write_pending_i = self.f.g_wr_pend_i
+ self.int_write_pending_i.name = 'int_write_pending_i'
+ self.fp_write_pending_i.name = 'fp_write_pending_i'
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.intissue = self.i
+ m.submodules.fpissue = self.f
+ m.d.comb += self.issue_o.eq(self.i.g_issue_o | self.f.g_issue_o)
+ return m
+ def ports(self):
+ yield self.issue_o
+ yield from self.i
+ yield from self.f
+def issue_unit_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_issue_unit():
+ dut = IssueUnit(32, 3)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_issue_unit.il", "w") as f:
+ f.write(vl)
+ dut = IntFPIssueUnit(32, 3, 3)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_intfp_issue_unit.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, issue_unit_sim(dut), vcd_name='test_issue_unit.vcd')
+if __name__ == '__main__':
+ test_issue_unit()
--- /dev/null
+""" Mitch Alsup 6600-style LD/ST scoreboard Dependency Cell
+Relevant bugreports:
+* http://bugs.libre-riscv.org/show_bug.cgi?id=81
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable
+from nmutil.latch import SRLatch
+class LDSTDepCell(Elaboratable):
+ """ implements 11.4.12 mitch alsup load/store dependence cell, p45
+ """
+ def __init__(self):
+ # inputs
+ self.load_i = Signal(reset_less=True) # load pending in (top)
+ self.stor_i = Signal(reset_less=True) # store pending in (top)
+ self.issue_i = Signal(reset_less=True) # Issue in (top)
+ self.load_hit_i = Signal(reset_less=True) # load hit in (right)
+ self.stwd_hit_i = Signal(reset_less=True) # store w/ data hit in (right)
+ # outputs (latched rd/wr pend)
+ self.ld_hold_st_o = Signal(reset_less=True) # load holds st out (left)
+ self.st_hold_ld_o = Signal(reset_less=True) # st holds load out (left)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.war_l = war_l = SRLatch(sync=False) # WriteAfterRead Latch
+ m.submodules.raw_l = raw_l = SRLatch(sync=False) # ReadAfterWrite Latch
+ # issue & store & load - used for both WAR and RAW Setting
+ i_s_l = Signal(reset_less=True)
+ m.d.comb += i_s_l.eq(self.issue_i & self.stor_i & self.load_i)
+ # write after read latch: loads block stores
+ m.d.comb += war_l.s.eq(i_s_l)
+ m.d.comb += war_l.r.eq(self.load_i) # reset on LD
+ # read after write latch: stores block loads
+ m.d.comb += raw_l.s.eq(i_s_l)
+ m.d.comb += raw_l.r.eq(self.stor_i) # reset on ST
+ # Hold results (read out horizontally, accumulate in OR fashion)
+ m.d.comb += self.ld_hold_st_o.eq(war_l.qn & self.load_hit_i)
+ m.d.comb += self.st_hold_ld_o.eq(raw_l.qn & self.stwd_hit_i)
+ return m
+ def __iter__(self):
+ yield self.load_i
+ yield self.stor_i
+ yield self.issue_i
+ yield self.load_hit_i
+ yield self.stwd_hit_i
+ yield self.ld_hold_st_o
+ yield self.st_hold_ld_o
+ def ports(self):
+ return list(self)
+def dcell_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_dcell():
+ dut = LDSTDepCell()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_ldst_dcell.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, dcell_sim(dut), vcd_name='test_ldst_dcell.vcd')
+if __name__ == '__main__':
+ test_dcell()
--- /dev/null
+""" Mitch Alsup 6600-style LD/ST Memory Scoreboard Matrix (sparse vector)
+6600 LD/ST Dependency Table Matrix inputs / outputs
+Relevant comments (p45-46):
+* If there are no WAR dependencies on a Load instruction with a computed
+ address it can assert Bank_Addressable and Translate_Addressable.
+* If there are no RAW dependencies on a Store instruction with both a
+ write permission and store data present it can assert Bank_Addressable
+Relevant bugreports:
+* http://bugs.libre-riscv.org/show_bug.cgi?id=81
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat, Const
+from ldst_dep_cell import LDSTDepCell
+class LDSTDepMatrix(Elaboratable):
+ """ implements 11.4.12 mitch alsup LD/ST Dependency Matrix, p46
+ actually a sparse matrix along the diagonal.
+ load-hold-store and store-hold-load accumulate in a priority-picking
+ fashion, ORing together. the OR gate from the dependency cell is
+ here.
+ """
+ def __init__(self, n_ldst):
+ self.n_ldst = n_ldst # X and Y (FUs)
+ self.load_i = Signal(n_ldst, reset_less=True) # load pending in
+ self.stor_i = Signal(n_ldst, reset_less=True) # store pending in
+ self.issue_i = Signal(n_ldst, reset_less=True) # Issue in
+ self.load_hit_i = Signal(n_ldst, reset_less=True) # load hit in
+ self.stwd_hit_i = Signal(n_ldst, reset_less=True) # store w/data hit in
+ # outputs
+ self.ld_hold_st_o = Signal(reset_less=True) # load holds st out
+ self.st_hold_ld_o = Signal(reset_less=True) # st holds load out
+ def elaborate(self, platform):
+ m = Module()
+ # ---
+ # matrix of dependency cells
+ # ---
+ dm = Array(LDSTDepCell() for f in range(self.n_ldst))
+ for fu in range(self.n_ldst):
+ setattr(m.submodules, "dm_fu%d" % (fu), dm[fu])
+ # ---
+ # connect Function Unit vector
+ # ---
+ lhs_l = []
+ shl_l = []
+ load_l = []
+ stor_l = []
+ issue_l = []
+ lh_l = []
+ sh_l = []
+ for fu in range(self.n_ldst):
+ dc = dm[fu]
+ # accumulate load-hold-store / store-hold-load bits
+ lhs_l.append(dc.ld_hold_st_o)
+ shl_l.append(dc.st_hold_ld_o)
+ # accumulate inputs (for Cat'ing later) - TODO: must be a better way
+ load_l.append(dc.load_i)
+ stor_l.append(dc.stor_i)
+ issue_l.append(dc.issue_i)
+ lh_l.append(dc.load_hit_i)
+ sh_l.append(dc.stwd_hit_i)
+ # connect cell inputs using Cat(*list_of_stuff)
+ m.d.comb += [Cat(*load_l).eq(self.load_i),
+ Cat(*stor_l).eq(self.stor_i),
+ Cat(*issue_l).eq(self.issue_i),
+ Cat(*lh_l).eq(self.load_hit_i),
+ Cat(*sh_l).eq(self.stwd_hit_i),
+ ]
+ # set the load-hold-store / store-hold-load OR-accumulated outputs
+ m.d.comb += self.ld_hold_st_o.eq(Cat(*lhs_l).bool())
+ m.d.comb += self.st_hold_ld_o.eq(Cat(*shl_l).bool())
+ return m
+ def __iter__(self):
+ yield self.load_i
+ yield self.stor_i
+ yield self.issue_i
+ yield self.load_hit_i
+ yield self.stwd_hit_i
+ yield self.ld_hold_st_o
+ yield self.st_hold_ld_o
+ def ports(self):
+ return list(self)
+def d_matrix_sim(dut):
+ """ XXX TODO
+ """
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_d_matrix():
+ dut = LDSTDepMatrix(n_ldst=4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_ld_st_matrix.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, d_matrix_sim(dut), vcd_name='test_ld_st_matrix.vcd')
+if __name__ == '__main__':
+ test_d_matrix()
--- /dev/null
+from nmigen import Elaboratable, Module, Signal
+class Reg_Rsv(Elaboratable):
+ """ these are allocated per-Register (vertically),
+ and are each of length fu_count
+ """
+ def __init__(self, fu_count):
+ self.fu_count = fu_count
+ self.dest_rsel_i = Signal(fu_count, reset_less=True)
+ self.src1_rsel_i = Signal(fu_count, reset_less=True)
+ self.src2_rsel_i = Signal(fu_count, reset_less=True)
+ self.dest_rsel_o = Signal(reset_less=True)
+ self.src1_rsel_o = Signal(reset_less=True)
+ self.src2_rsel_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.dest_rsel_o.eq(self.dest_rsel_i.bool())
+ m.d.comb += self.src1_rsel_o.eq(self.src1_rsel_i.bool())
+ m.d.comb += self.src2_rsel_o.eq(self.src2_rsel_i.bool())
+ return m
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Elaboratable
+from nmutil.latch import SRLatch
+from nmigen.lib.coding import Decoder
+class ShadowFn(Elaboratable):
+ """ implements shadowing 11.5.1, p55, just the individual shadow function
+ """
+ def __init__(self):
+ # inputs
+ self.issue_i = Signal(reset_less=True)
+ self.shadow_i = Signal(reset_less=True)
+ self.s_fail_i = Signal(reset_less=True)
+ self.s_good_i = Signal(reset_less=True)
+ # outputs
+ self.shadow_o = Signal(reset_less=True)
+ self.recover_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.sl = sl = SRLatch(sync=False)
+ m.d.comb += sl.s.eq(self.shadow_i & self.issue_i)
+ m.d.comb += sl.r.eq(self.s_good_i)
+ m.d.comb += self.recover_o.eq(sl.q & self.s_fail_i)
+ m.d.comb += self.shadow_o.eq(sl.q)
+ return m
+ def __iter__(self):
+ yield self.issue_i
+ yield self.shadow_i
+ yield self.s_fail_i
+ yield self.s_good_i
+ yield self.shadow_o
+ yield self.recover_o
+ def ports(self):
+ return list(self)
+def shadow_fn_unit_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_shadow_fn_unit():
+ dut = ShadowFn()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_shadow_fn_unit.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, shadow_fn_unit_sim(dut),
+ vcd_name='test_shadow_fn_unit.vcd')
+if __name__ == '__main__':
+ test_shadow_fn_unit()